summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorGuy Brand <gb@unistra.fr>2015-08-10 10:03:27 +0200
committerGuy Brand <gb@unistra.fr>2015-08-10 10:03:27 +0200
commit53a57d16b9c741bb44099fd93bf79efa06796341 (patch)
tree24a90a50afe9325926c8ebaa2ed90f9fa093e5b9 /inc/indexer.php
parentcf6e6645c31a9f185cef3fb9452fb188882ede47 (diff)
parenta060d9973e7c1d5051f2cc426937881826e4972e (diff)
downloadrpg-53a57d16b9c741bb44099fd93bf79efa06796341.tar.gz
rpg-53a57d16b9c741bb44099fd93bf79efa06796341.tar.bz2
Merge branch master into stable
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php89
1 files changed, 73 insertions, 16 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 5ca2f0bb1..a86bfc656 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -61,6 +61,8 @@ define('IDX_ASIAN', '(?:'.IDX_ASIAN1.'|'.IDX_ASIAN2.'|'.IDX_ASIAN3.')');
*
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Michael Hamann <michael@content-space.de>
+ *
+ * @return int|string
*/
function idx_get_version(){
static $indexer_version = null;
@@ -84,6 +86,9 @@ function idx_get_version(){
* Differs from strlen in handling of asian characters.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
+ *
+ * @param string $w
+ * @return int
*/
function wordlen($w){
$l = strlen($w);
@@ -115,7 +120,8 @@ class Doku_Indexer {
*
* @param string $page a page name
* @param string $text the body of the page
- * @return boolean the function completed successfully
+ * @return string|boolean the function completed successfully
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Andreas Gohr <andi@splitbrain.org>
*/
@@ -190,6 +196,7 @@ class Doku_Indexer {
*
* @param string $text content of the page
* @return array list of word IDs and number of times used
+ *
* @author Andreas Gohr <andi@splitbrain.org>
* @author Christopher Smith <chris@jalakai.co.uk>
* @author Tom N Harris <tnharris@whoopdedo.org>
@@ -245,7 +252,8 @@ class Doku_Indexer {
* @param string $page a page name
* @param mixed $key a key string or array of key=>value pairs
* @param mixed $value the value or list of values
- * @return boolean the function completed successfully
+ * @return boolean|string the function completed successfully
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Michael Hamann <michael@content-space.de>
*/
@@ -453,7 +461,8 @@ class Doku_Indexer {
* Erases entries in all known indexes.
*
* @param string $page a page name
- * @return boolean the function completed successfully
+ * @return string|boolean the function completed successfully
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
public function deletePage($page) {
@@ -474,6 +483,7 @@ class Doku_Indexer {
*
* @param string $page a page name
* @return boolean the function completed successfully
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function deletePageNoLock($page) {
@@ -568,6 +578,7 @@ class Doku_Indexer {
* @param string $text plain text
* @param boolean $wc are wildcards allowed?
* @return array list of words in the text
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Andreas Gohr <andi@splitbrain.org>
*/
@@ -676,6 +687,7 @@ class Doku_Indexer {
*
* @param array $tokens list of words to search for
* @return array list of page names with usage counts
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Andreas Gohr <andi@splitbrain.org>
*/
@@ -730,6 +742,7 @@ class Doku_Indexer {
* @param string $value search term to look for, must be a string or array of strings
* @param callback $func comparison function
* @return array lists with page names, keys are query values if $value is array
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Michael Hamann <michael@content-space.de>
*/
@@ -829,6 +842,7 @@ class Doku_Indexer {
* @param array $words The query terms.
* @param array $result Set to word => array("length*id" ...)
* @return array Set to length => array(id ...)
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function getIndexWords(&$words, &$result) {
@@ -909,6 +923,7 @@ class Doku_Indexer {
*
* @param string $key list only pages containing the metadata key (optional)
* @return array list of page names
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
public function getPages($key=null) {
@@ -942,6 +957,7 @@ class Doku_Indexer {
* @param int $minlen minimum length of words to count
* @param string $key metadata key to list. Uses the fulltext index if not given
* @return array list of words as the keys and frequency as values
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
public function histogram($min=1, $max=0, $minlen=3, $key=null) {
@@ -1002,6 +1018,8 @@ class Doku_Indexer {
* Lock the indexer.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
+ *
+ * @return bool|string
*/
protected function lock() {
global $conf;
@@ -1033,6 +1051,8 @@ class Doku_Indexer {
* Release the indexer lock.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
+ *
+ * @return bool
*/
protected function unlock() {
global $conf;
@@ -1050,12 +1070,13 @@ class Doku_Indexer {
* @param string $idx name of the index
* @param string $suffix subpart identifier
* @return array list of lines without CR or LF
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function getIndex($idx, $suffix) {
global $conf;
$fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
- if (!@file_exists($fn)) return array();
+ if (!file_exists($fn)) return array();
return file($fn, FILE_IGNORE_NEW_LINES);
}
@@ -1066,6 +1087,7 @@ class Doku_Indexer {
* @param string $suffix subpart identifier
* @param array $lines list of lines without LF
* @return bool If saving succeeded
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function saveIndex($idx, $suffix, &$lines) {
@@ -1090,12 +1112,13 @@ class Doku_Indexer {
* @param string $suffix subpart identifier
* @param int $id the line number
* @return string a line with trailing whitespace removed
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function getIndexKey($idx, $suffix, $id) {
global $conf;
$fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
- if (!@file_exists($fn)) return '';
+ if (!file_exists($fn)) return '';
$fh = @fopen($fn, 'r');
if (!$fh) return '';
$ln = -1;
@@ -1114,6 +1137,7 @@ class Doku_Indexer {
* @param int $id the line number
* @param string $line line to write
* @return bool If saving succeeded
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function saveIndexKey($idx, $suffix, $id, $line) {
@@ -1155,6 +1179,7 @@ class Doku_Indexer {
* @param string $suffix subpart identifier
* @param string $value line to find in the index
* @return int|bool line number of the value in the index or false if writing the index failed
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
protected function addIndexKey($idx, $suffix, $value) {
@@ -1178,6 +1203,8 @@ class Doku_Indexer {
* a sorted array of lengths of the words used in the wiki.
*
* @author YoBoY <yoboy.leguesh@gmail.com>
+ *
+ * @return array
*/
protected function listIndexLengths() {
return idx_listIndexLengths();
@@ -1190,6 +1217,9 @@ class Doku_Indexer {
* that there are indices for.
*
* @author YoBoY <yoboy.leguesh@gmail.com>
+ *
+ * @param array|int $filter
+ * @return array
*/
protected function indexLengths($filter) {
global $conf;
@@ -1198,7 +1228,7 @@ class Doku_Indexer {
// testing if index files exist only
$path = $conf['indexdir']."/i";
foreach ($filter as $key => $value) {
- if (@file_exists($path.$key.'.idx'))
+ if (file_exists($path.$key.'.idx'))
$idx[] = $key;
}
} else {
@@ -1216,6 +1246,11 @@ class Doku_Indexer {
* Insert or replace a tuple in a line.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
+ *
+ * @param string $line
+ * @param string|int $id
+ * @param int $count
+ * @return string
*/
protected function updateTuple($line, $id, $count) {
if ($line != ''){
@@ -1237,6 +1272,10 @@ class Doku_Indexer {
*
* @author Tom N Harris <tnharris@whoopdedo.org>
* @author Andreas Gohr <andi@splitbrain.org>
+ *
+ * @param array $keys
+ * @param string $line
+ * @return array
*/
protected function parseTuples(&$keys, $line) {
$result = array();
@@ -1257,13 +1296,16 @@ class Doku_Indexer {
* Sum the counts in a list of tuples.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
+ *
+ * @param string $line
+ * @return int
*/
protected function countTuples($line) {
$freq = 0;
$parts = explode(':', $line);
foreach ($parts as $tuple) {
if ($tuple === '') continue;
- list($pid, $cnt) = explode('*', $tuple);
+ list(/* $pid */, $cnt) = explode('*', $tuple);
$freq += (int)$cnt;
}
return $freq;
@@ -1273,7 +1315,8 @@ class Doku_Indexer {
/**
* Create an instance of the indexer.
*
- * @return Doku_Indexer a Doku_Indexer
+ * @return Doku_Indexer a Doku_Indexer
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
function idx_get_indexer() {
@@ -1288,6 +1331,7 @@ function idx_get_indexer() {
* Returns words that will be ignored.
*
* @return array list of stop words
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
function & idx_get_stopwords() {
@@ -1295,7 +1339,7 @@ function & idx_get_stopwords() {
if (is_null($stopwords)) {
global $conf;
$swfile = DOKU_INC.'inc/lang/'.$conf['lang'].'/stopwords.txt';
- if(@file_exists($swfile)){
+ if(file_exists($swfile)){
$stopwords = file($swfile, FILE_IGNORE_NEW_LINES);
}else{
$stopwords = array();
@@ -1312,14 +1356,15 @@ function & idx_get_stopwords() {
* @param string $page name of the page to index
* @param boolean $verbose print status messages
* @param boolean $force force reindexing even when the index is up to date
- * @return boolean the function completed successfully
+ * @return string|boolean the function completed successfully
+ *
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
function idx_addPage($page, $verbose=false, $force=false) {
$idxtag = metaFN($page,'.indexed');
// check if page was deleted but is still in the index
if (!page_exists($page)) {
- if (!@file_exists($idxtag)) {
+ if (!file_exists($idxtag)) {
if ($verbose) print("Indexer: $page does not exist, ignoring".DOKU_LF);
return false;
}
@@ -1334,7 +1379,7 @@ function idx_addPage($page, $verbose=false, $force=false) {
}
// check if indexing needed
- if(!$force && @file_exists($idxtag)){
+ if(!$force && file_exists($idxtag)){
if(trim(io_readFile($idxtag)) == idx_get_version()){
$last = @filemtime($idxtag);
if($last > @filemtime(wikiFN($page))){
@@ -1347,7 +1392,7 @@ function idx_addPage($page, $verbose=false, $force=false) {
$indexenabled = p_get_metadata($page, 'internal index', METADATA_RENDER_UNLIMITED);
if ($indexenabled === false) {
$result = false;
- if (@file_exists($idxtag)) {
+ if (file_exists($idxtag)) {
$Indexer = idx_get_indexer();
$result = $Indexer->deletePage($page);
if ($result === "locked") {
@@ -1441,11 +1486,15 @@ function idx_tokenizer($string, $wc=false) {
* Read the list of words in an index (if it exists).
*
* @author Tom N Harris <tnharris@whoopdedo.org>
+ *
+ * @param string $idx
+ * @param string $suffix
+ * @return array
*/
function idx_getIndex($idx, $suffix) {
global $conf;
$fn = $conf['indexdir'].'/'.$idx.$suffix.'.idx';
- if (!@file_exists($fn)) return array();
+ if (!file_exists($fn)) return array();
return file($fn);
}
@@ -1456,6 +1505,8 @@ function idx_getIndex($idx, $suffix) {
* a sorted array of lengths of the words used in the wiki.
*
* @author YoBoY <yoboy.leguesh@gmail.com>
+ *
+ * @return array
*/
function idx_listIndexLengths() {
global $conf;
@@ -1464,7 +1515,7 @@ function idx_listIndexLengths() {
$docache = false;
} else {
clearstatcache();
- if (@file_exists($conf['indexdir'].'/lengths.idx')
+ if (file_exists($conf['indexdir'].'/lengths.idx')
&& (time() < @filemtime($conf['indexdir'].'/lengths.idx') + $conf['readdircache'])) {
if (($lengths = @file($conf['indexdir'].'/lengths.idx', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES)) !== false) {
$idx = array();
@@ -1510,6 +1561,9 @@ function idx_listIndexLengths() {
* that there are indices for.
*
* @author YoBoY <yoboy.leguesh@gmail.com>
+ *
+ * @param array|int $filter
+ * @return array
*/
function idx_indexLengths($filter) {
global $conf;
@@ -1518,7 +1572,7 @@ function idx_indexLengths($filter) {
// testing if index files exist only
$path = $conf['indexdir']."/i";
foreach ($filter as $key => $value) {
- if (@file_exists($path.$key.'.idx'))
+ if (file_exists($path.$key.'.idx'))
$idx[] = $key;
}
} else {
@@ -1539,6 +1593,9 @@ function idx_indexLengths($filter) {
* not a letter, number, or underscore.
*
* @author Tom N Harris <tnharris@whoopdedo.org>
+ *
+ * @param string $name
+ * @return string
*/
function idx_cleanName($name) {
$name = utf8_romanize(trim((string)$name));