summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorTom N Harris <tnharris@whoopdedo.org>2011-02-22 03:38:22 -0500
committerTom N Harris <tnharris@whoopdedo.org>2011-02-22 03:38:22 -0500
commit175193d28ead34dd3a45395407813c080f1b2f25 (patch)
treefff1e26b948f31718bed23b85fbc123ab874c9b8 /inc/indexer.php
parentd0d6fe1be56ef474d844b3556af7ba2a5961d798 (diff)
downloadrpg-175193d28ead34dd3a45395407813c080f1b2f25.tar.gz
rpg-175193d28ead34dd3a45395407813c080f1b2f25.tar.bz2
Implement histogram method of indexer
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php64
1 files changed, 64 insertions, 0 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index bcda2a9b9..c28f24f75 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -712,6 +712,54 @@ class Doku_Indexer {
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
public function histogram($min=1, $max=0, $key=null) {
+ if ($min < 1)
+ $min = 1;
+ if ($max < $min)
+ $max = 0;
+
+ $result = array();
+
+ if ($key == 'title') {
+ $index = $this->_getIndex('title', '');
+ $index = array_count_values($index);
+ foreach ($index as $val => $cnt) {
+ if ($cnt >= $min && (!$max || $cnt <= $max))
+ $result[$val] = $cnt;
+ }
+ }
+ elseif (!is_null($key)) {
+ $metaname = idx_cleanName($key);
+ $index = $this->_getIndex($metaname.'_i', '');
+ $val_idx = array();
+ foreach ($index as $wid => $line) {
+ $freq = $this->_countTuples($line);
+ if ($freq >= $min && (!$max || $freq <= $max))
+ $val_idx[$wid] = $freq;
+ }
+ if (!empty($val_idx)) {
+ $words = $this->_getIndex($metaname.'_w', '');
+ foreach ($val_idx as $wid => $freq)
+ $result[$words[$wid]] = $freq;
+ }
+ }
+ else {
+ $lengths = idx_listIndexLengths();
+ foreach ($lengths as $length) {
+ $index = $this->_getIndex('i', $length);
+ $words = null;
+ foreach ($index as $wid => $line) {
+ $freq = $this->_countTuples($line);
+ if ($freq >= $min && (!$max || $freq <= $max)) {
+ if ($words === null)
+ $words = $this->_getIndex('w', $length);
+ $result[$words[$wid]] = $freq;
+ }
+ }
+ }
+ }
+
+ arsort($result);
+ return $result;
}
/**
@@ -1005,6 +1053,22 @@ class Doku_Indexer {
}
return $result;
}
+
+ /**
+ * Sum the counts in a list of tuples.
+ *
+ * @author Tom N Harris <tnharris@whoopdedo.org>
+ */
+ private function _countTuples($line) {
+ $freq = 0;
+ $parts = explode(':', $line);
+ foreach ($parts as $tuple) {
+ if ($tuple == '') continue;
+ list($pid, $cnt) = explode('*', $tuple);
+ $freq += (int)$cnt;
+ }
+ return $freq;
+ }
}
/**