diff options
author | Guy Brand <gb@unistra.fr> | 2015-08-10 10:03:27 +0200 |
---|---|---|
committer | Guy Brand <gb@unistra.fr> | 2015-08-10 10:03:27 +0200 |
commit | 53a57d16b9c741bb44099fd93bf79efa06796341 (patch) | |
tree | 24a90a50afe9325926c8ebaa2ed90f9fa093e5b9 /inc/indexer.php | |
parent | cf6e6645c31a9f185cef3fb9452fb188882ede47 (diff) | |
parent | a060d9973e7c1d5051f2cc426937881826e4972e (diff) | |
download | rpg-53a57d16b9c741bb44099fd93bf79efa06796341.tar.gz rpg-53a57d16b9c741bb44099fd93bf79efa06796341.tar.bz2 |
Merge branch master into stable
Diffstat (limited to 'inc/indexer.php')
-rw-r--r-- | inc/indexer.php | 89 |
1 files changed, 73 insertions, 16 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index 5ca2f0bb1..a86bfc656 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -61,6 +61,8 @@ define('IDX_ASIAN', '(?:'.IDX_ASIAN1.'|'.IDX_ASIAN2.'|'.IDX_ASIAN3.')'); * * @author Tom N Harris <tnharris@whoopdedo.org> * @author Michael Hamann <michael@content-space.de> + * + * @return int|string */ function idx_get_version(){ static $indexer_version = null; @@ -84,6 +86,9 @@ function idx_get_version(){ * Differs from strlen in handling of asian characters. * * @author Tom N Harris <tnharris@whoopdedo.org> + * + * @param string $w + * @return int */ function wordlen($w){ $l = strlen($w); @@ -115,7 +120,8 @@ class Doku_Indexer { * * @param string $page a page name * @param string $text the body of the page - * @return boolean the function completed successfully + * @return string|boolean the function completed successfully + * * @author Tom N Harris <tnharris@whoopdedo.org> * @author Andreas Gohr <andi@splitbrain.org> */ @@ -190,6 +196,7 @@ class Doku_Indexer { * * @param string $text content of the page * @return array list of word IDs and number of times used + * * @author Andreas Gohr <andi@splitbrain.org> * @author Christopher Smith <chris@jalakai.co.uk> * @author Tom N Harris <tnharris@whoopdedo.org> @@ -245,7 +252,8 @@ class Doku_Indexer { * @param string $page a page name * @param mixed $key a key string or array of key=>value pairs * @param mixed $value the value or list of values - * @return boolean the function completed successfully + * @return boolean|string the function completed successfully + * * @author Tom N Harris <tnharris@whoopdedo.org> * @author Michael Hamann <michael@content-space.de> */ @@ -453,7 +461,8 @@ class Doku_Indexer { * Erases entries in all known indexes. * * @param string $page a page name - * @return boolean the function completed successfully + * @return string|boolean the function completed successfully + * * @author Tom N Harris <tnharris@whoopdedo.org> */ public function deletePage($page) { @@ -474,6 +483,7 @@ class Doku_Indexer { * * @param string $page a page name * @return boolean the function completed successfully + * * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function deletePageNoLock($page) { @@ -568,6 +578,7 @@ class Doku_Indexer { * @param string $text plain text * @param boolean $wc are wildcards allowed? * @return array list of words in the text + * * @author Tom N Harris <tnharris@whoopdedo.org> * @author Andreas Gohr <andi@splitbrain.org> */ @@ -676,6 +687,7 @@ class Doku_Indexer { * * @param array $tokens list of words to search for * @return array list of page names with usage counts + * * @author Tom N Harris <tnharris@whoopdedo.org> * @author Andreas Gohr <andi@splitbrain.org> */ @@ -730,6 +742,7 @@ class Doku_Indexer { * @param string $value search term to look for, must be a string or array of strings * @param callback $func comparison function * @return array lists with page names, keys are query values if $value is array + * * @author Tom N Harris <tnharris@whoopdedo.org> * @author Michael Hamann <michael@content-space.de> */ @@ -829,6 +842,7 @@ class Doku_Indexer { * @param array $words The query terms. * @param array $result Set to word => array("length*id" ...) * @return array Set to length => array(id ...) + * * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function getIndexWords(&$words, &$result) { @@ -909,6 +923,7 @@ class Doku_Indexer { * * @param string $key list only pages containing the metadata key (optional) * @return array list of page names + * * @author Tom N Harris <tnharris@whoopdedo.org> */ public function getPages($key=null) { @@ -942,6 +957,7 @@ class Doku_Indexer { * @param int $minlen minimum length of words to count * @param string $key metadata key to list. Uses the fulltext index if not given * @return array list of words as the keys and frequency as values + * * @author Tom N Harris <tnharris@whoopdedo.org> */ public function histogram($min=1, $max=0, $minlen=3, $key=null) { @@ -1002,6 +1018,8 @@ class Doku_Indexer { * Lock the indexer. * * @author Tom N Harris <tnharris@whoopdedo.org> + * + * @return bool|string */ protected function lock() { global $conf; @@ -1033,6 +1051,8 @@ class Doku_Indexer { * Release the indexer lock. * * @author Tom N Harris <tnharris@whoopdedo.org> + * + * @return bool */ protected function unlock() { global $conf; @@ -1050,12 +1070,13 @@ class Doku_Indexer { * @param string $idx name of the index * @param string $suffix subpart identifier * @return array list of lines without CR or LF + * * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function getIndex($idx, $suffix) { global $conf; $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx'; - if (!@file_exists($fn)) return array(); + if (!file_exists($fn)) return array(); return file($fn, FILE_IGNORE_NEW_LINES); } @@ -1066,6 +1087,7 @@ class Doku_Indexer { * @param string $suffix subpart identifier * @param array $lines list of lines without LF * @return bool If saving succeeded + * * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function saveIndex($idx, $suffix, &$lines) { @@ -1090,12 +1112,13 @@ class Doku_Indexer { * @param string $suffix subpart identifier * @param int $id the line number * @return string a line with trailing whitespace removed + * * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function getIndexKey($idx, $suffix, $id) { global $conf; $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx'; - if (!@file_exists($fn)) return ''; + if (!file_exists($fn)) return ''; $fh = @fopen($fn, 'r'); if (!$fh) return ''; $ln = -1; @@ -1114,6 +1137,7 @@ class Doku_Indexer { * @param int $id the line number * @param string $line line to write * @return bool If saving succeeded + * * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function saveIndexKey($idx, $suffix, $id, $line) { @@ -1155,6 +1179,7 @@ class Doku_Indexer { * @param string $suffix subpart identifier * @param string $value line to find in the index * @return int|bool line number of the value in the index or false if writing the index failed + * * @author Tom N Harris <tnharris@whoopdedo.org> */ protected function addIndexKey($idx, $suffix, $value) { @@ -1178,6 +1203,8 @@ class Doku_Indexer { * a sorted array of lengths of the words used in the wiki. * * @author YoBoY <yoboy.leguesh@gmail.com> + * + * @return array */ protected function listIndexLengths() { return idx_listIndexLengths(); @@ -1190,6 +1217,9 @@ class Doku_Indexer { * that there are indices for. * * @author YoBoY <yoboy.leguesh@gmail.com> + * + * @param array|int $filter + * @return array */ protected function indexLengths($filter) { global $conf; @@ -1198,7 +1228,7 @@ class Doku_Indexer { // testing if index files exist only $path = $conf['indexdir']."/i"; foreach ($filter as $key => $value) { - if (@file_exists($path.$key.'.idx')) + if (file_exists($path.$key.'.idx')) $idx[] = $key; } } else { @@ -1216,6 +1246,11 @@ class Doku_Indexer { * Insert or replace a tuple in a line. * * @author Tom N Harris <tnharris@whoopdedo.org> + * + * @param string $line + * @param string|int $id + * @param int $count + * @return string */ protected function updateTuple($line, $id, $count) { if ($line != ''){ @@ -1237,6 +1272,10 @@ class Doku_Indexer { * * @author Tom N Harris <tnharris@whoopdedo.org> * @author Andreas Gohr <andi@splitbrain.org> + * + * @param array $keys + * @param string $line + * @return array */ protected function parseTuples(&$keys, $line) { $result = array(); @@ -1257,13 +1296,16 @@ class Doku_Indexer { * Sum the counts in a list of tuples. * * @author Tom N Harris <tnharris@whoopdedo.org> + * + * @param string $line + * @return int */ protected function countTuples($line) { $freq = 0; $parts = explode(':', $line); foreach ($parts as $tuple) { if ($tuple === '') continue; - list($pid, $cnt) = explode('*', $tuple); + list(/* $pid */, $cnt) = explode('*', $tuple); $freq += (int)$cnt; } return $freq; @@ -1273,7 +1315,8 @@ class Doku_Indexer { /** * Create an instance of the indexer. * - * @return Doku_Indexer a Doku_Indexer + * @return Doku_Indexer a Doku_Indexer + * * @author Tom N Harris <tnharris@whoopdedo.org> */ function idx_get_indexer() { @@ -1288,6 +1331,7 @@ function idx_get_indexer() { * Returns words that will be ignored. * * @return array list of stop words + * * @author Tom N Harris <tnharris@whoopdedo.org> */ function & idx_get_stopwords() { @@ -1295,7 +1339,7 @@ function & idx_get_stopwords() { if (is_null($stopwords)) { global $conf; $swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt'; - if(@file_exists($swfile)){ + if(file_exists($swfile)){ $stopwords = file($swfile, FILE_IGNORE_NEW_LINES); }else{ $stopwords = array(); @@ -1312,14 +1356,15 @@ function & idx_get_stopwords() { * @param string $page name of the page to index * @param boolean $verbose print status messages * @param boolean $force force reindexing even when the index is up to date - * @return boolean the function completed successfully + * @return string|boolean the function completed successfully + * * @author Tom N Harris <tnharris@whoopdedo.org> */ function idx_addPage($page, $verbose=false, $force=false) { $idxtag = metaFN($page,'.indexed'); // check if page was deleted but is still in the index if (!page_exists($page)) { - if (!@file_exists($idxtag)) { + if (!file_exists($idxtag)) { if ($verbose) print("Indexer: $page does not exist, ignoring".DOKU_LF); return false; } @@ -1334,7 +1379,7 @@ function idx_addPage($page, $verbose=false, $force=false) { } // check if indexing needed - if(!$force && @file_exists($idxtag)){ + if(!$force && file_exists($idxtag)){ if(trim(io_readFile($idxtag)) == idx_get_version()){ $last = @filemtime($idxtag); if($last > @filemtime(wikiFN($page))){ @@ -1347,7 +1392,7 @@ function idx_addPage($page, $verbose=false, $force=false) { $indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED); if ($indexenabled === false) { $result = false; - if (@file_exists($idxtag)) { + if (file_exists($idxtag)) { $Indexer = idx_get_indexer(); $result = $Indexer->deletePage($page); if ($result === "locked") { @@ -1441,11 +1486,15 @@ function idx_tokenizer($string, $wc=false) { * Read the list of words in an index (if it exists). * * @author Tom N Harris <tnharris@whoopdedo.org> + * + * @param string $idx + * @param string $suffix + * @return array */ function idx_getIndex($idx, $suffix) { global $conf; $fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx'; - if (!@file_exists($fn)) return array(); + if (!file_exists($fn)) return array(); return file($fn); } @@ -1456,6 +1505,8 @@ function idx_getIndex($idx, $suffix) { * a sorted array of lengths of the words used in the wiki. * * @author YoBoY <yoboy.leguesh@gmail.com> + * + * @return array */ function idx_listIndexLengths() { global $conf; @@ -1464,7 +1515,7 @@ function idx_listIndexLengths() { $docache = false; } else { clearstatcache(); - if (@file_exists($conf['indexdir'].'/lengths.idx') + if (file_exists($conf['indexdir'].'/lengths.idx') && (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) { if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES)) !== false) { $idx = array(); @@ -1510,6 +1561,9 @@ function idx_listIndexLengths() { * that there are indices for. * * @author YoBoY <yoboy.leguesh@gmail.com> + * + * @param array|int $filter + * @return array */ function idx_indexLengths($filter) { global $conf; @@ -1518,7 +1572,7 @@ function idx_indexLengths($filter) { // testing if index files exist only $path = $conf['indexdir']."/i"; foreach ($filter as $key => $value) { - if (@file_exists($path.$key.'.idx')) + if (file_exists($path.$key.'.idx')) $idx[] = $key; } } else { @@ -1539,6 +1593,9 @@ function idx_indexLengths($filter) { * not a letter, number, or underscore. * * @author Tom N Harris <tnharris@whoopdedo.org> + * + * @param string $name + * @return string */ function idx_cleanName($name) { $name = utf8_romanize(trim((string)$name)); |