diff options
author | Tom N Harris <tnharris@whoopdedo.org> | 2011-02-22 03:38:22 -0500 |
---|---|---|
committer | Tom N Harris <tnharris@whoopdedo.org> | 2011-02-22 03:38:22 -0500 |
commit | 175193d28ead34dd3a45395407813c080f1b2f25 (patch) | |
tree | fff1e26b948f31718bed23b85fbc123ab874c9b8 | |
parent | d0d6fe1be56ef474d844b3556af7ba2a5961d798 (diff) | |
download | rpg-175193d28ead34dd3a45395407813c080f1b2f25.tar.gz rpg-175193d28ead34dd3a45395407813c080f1b2f25.tar.bz2 |
Implement histogram method of indexer
-rw-r--r-- | inc/indexer.php | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/inc/indexer.php b/inc/indexer.php index bcda2a9b9..c28f24f75 100644 --- a/inc/indexer.php +++ b/inc/indexer.php @@ -712,6 +712,54 @@ class Doku_Indexer { * @author Tom N Harris <tnharris@whoopdedo.org> */ public function histogram($min=1, $max=0, $key=null) { + if ($min < 1) + $min = 1; + if ($max < $min) + $max = 0; + + $result = array(); + + if ($key == 'title') { + $index = $this->_getIndex('title', ''); + $index = array_count_values($index); + foreach ($index as $val => $cnt) { + if ($cnt >= $min && (!$max || $cnt <= $max)) + $result[$val] = $cnt; + } + } + elseif (!is_null($key)) { + $metaname = idx_cleanName($key); + $index = $this->_getIndex($metaname.'_i', ''); + $val_idx = array(); + foreach ($index as $wid => $line) { + $freq = $this->_countTuples($line); + if ($freq >= $min && (!$max || $freq <= $max)) + $val_idx[$wid] = $freq; + } + if (!empty($val_idx)) { + $words = $this->_getIndex($metaname.'_w', ''); + foreach ($val_idx as $wid => $freq) + $result[$words[$wid]] = $freq; + } + } + else { + $lengths = idx_listIndexLengths(); + foreach ($lengths as $length) { + $index = $this->_getIndex('i', $length); + $words = null; + foreach ($index as $wid => $line) { + $freq = $this->_countTuples($line); + if ($freq >= $min && (!$max || $freq <= $max)) { + if ($words === null) + $words = $this->_getIndex('w', $length); + $result[$words[$wid]] = $freq; + } + } + } + } + + arsort($result); + return $result; } /** @@ -1005,6 +1053,22 @@ class Doku_Indexer { } return $result; } + + /** + * Sum the counts in a list of tuples. + * + * @author Tom N Harris <tnharris@whoopdedo.org> + */ + private function _countTuples($line) { + $freq = 0; + $parts = explode(':', $line); + foreach ($parts as $tuple) { + if ($tuple == '') continue; + list($pid, $cnt) = explode('*', $tuple); + $freq += (int)$cnt; + } + return $freq; + } } /** |