On 3/12/06, Andreas Gohr <andi@xxxxxxxxxxxxxx> wrote: > > On Sun, 12 Mar 2006 19:55:26 +0100 > Yann <yann.hamon@xxxxxxxxx> wrote: > > > // common.php, line 943 > > function getRevisions($id){ > > > > > > There is the : > > > > while (($file = readdir($dh)) !== false) { > > if (is_dir($revd.'/'.$file)) continue; > > if > > (preg_match('/^'.$clid.'\.(\d+)\.txt(\.gz)?$/',$file,$match)){ > > $revs[]=$match[1]; > > } > > } > > > > part, that collects all revisions of a given file. > > > > /htdocs/data/attic$ ls -l | wc -l > > 2766 > > > > That part of the code is doing the preg_match 2800 time if I want the > > list of revisions of a file that is in the / of my wiki. I propose to replace the getRevisionInfo code with this: /** * Compare the logline $a to the timestamp $b * @author Yann Hamon <yann.hamon@xxxxxxxxxxxxx> * @returns integer 0 if the logline has timestamp $b, <0 if the timestamp of $a is greater than $b, >0 else. */ function hasTimestamp($a, $b) { if (strpos($a, $b) === 0) return 0; else return strcmp ($a, $b); } /** * performs a dichotomic search on an array using * a custom compare function * * @author Yann Hamon <yann.hamon@xxxxxxxxxxxxx< */ function array_dichotomic_search($ar, $value, $compareFunc) { $value = trim($value); if (!$ar || !$value || !$compareFunc) return (null); $len = count($ar); $l = 0; $r = $len-1; do { $i = floor(($l+$r)/2); if ($compareFunc($ar[$i], $value)<0) $l = $i+1; else $r = $i-1; } while ($compareFunc($ar[$i], $value)!=0 && $l<=$r); if ($compareFunc($ar[$i], $value)==0) return $i; else return -1; } /** * gets additonal informations for a certain pagerevison * from the changelog * * @author Andreas Gohr <andi@xxxxxxxxxxxxxx> * @author Yann Hamon <yann.hamon@xxxxxxxxxxxxx> */ function getRevisionInfo($id,$rev){ global $conf; if(!$rev) return(null); $info = array(); if(!@is_readable($conf['changelog'])){ msg($conf['changelog'].' is not readable',-1); return $recent; } $loglines = file($conf['changelog']); // Search for a line with a matching timestamp $index = array_dichotomic_search ($loglines, $rev, hasTimestamp); if ($index == -1) return; // The following code is necessary when there is more than // one line with one same timestamp $loglines_matching = array(); $loglines_matching[] = $loglines[$index]; for ($i=$index-1;hasTimestamp($loglines[$i], $rev) == 0; $i--) $loglines_matching[] = $loglines[$i]; for ($i=$index+1;hasTimestamp($loglines[$i], $rev) == 0; $i++) $loglines_matching[] = $loglines[$i]; // Match only lines concerning the document $id $loglines_matching = preg_grep("/$rev\t\d+\.\d+\.\d+\.\d+\t$id\t/",$loglines_matching); $loglines_matching = array_reverse($loglines_matching); //reverse sort on timestamp $line = split("\t",$loglines_matching[0]); $info['date'] = $line[0]; $info['ip'] = $line[1]; $info['user'] = $line[3]; $info['sum'] = $line[4]; $info['minor'] = isMinor($info['sum']); return $info; } Instead of doing a preg_match on every line of the changes.log, it assumes the lines ar ordered by timestamp; that way it can do a dichotomic search, which I think should be better (complexity of o(log(n)) instead of o(n)). There is a bit more code in case there is more than one line with the same timestamp. The code might not be perfect... Take it as a suggestion :) Cordially, Yann Hamon