summaryrefslogtreecommitdiff
path: root/inc/indexer.php
diff options
context:
space:
mode:
authorTom N Harris <tnharris@whoopdedo.org>2011-02-23 15:01:10 -0500
committerTom N Harris <tnharris@whoopdedo.org>2011-02-23 15:01:10 -0500
commitb8c040db1fdc0eee80963e57d95a15fd3813912d (patch)
tree1da4069bafa2a732c20ef43aa78c85cb2fe53d90 /inc/indexer.php
parent287bc2877f03f25914816a266c305c6cf6c8c772 (diff)
downloadrpg-b8c040db1fdc0eee80963e57d95a15fd3813912d.tar.gz
rpg-b8c040db1fdc0eee80963e57d95a15fd3813912d.tar.bz2
Add minimum length option to index histogram
Diffstat (limited to 'inc/indexer.php')
-rw-r--r--inc/indexer.php8
1 files changed, 5 insertions, 3 deletions
diff --git a/inc/indexer.php b/inc/indexer.php
index 2e36b6ed7..6b21797af 100644
--- a/inc/indexer.php
+++ b/inc/indexer.php
@@ -707,11 +707,12 @@ class Doku_Indexer {
*
* @param int $min bottom frequency threshold
* @param int $max upper frequency limit. No limit if $max<$min
+ * @param int $length minimum length of words to count
* @param string $key metadata key to list. Uses the fulltext index if not given
* @return array list of words as the keys and frequency as values
* @author Tom N Harris <tnharris@whoopdedo.org>
*/
- public function histogram($min=1, $max=0, $key=null) {
+ public function histogram($min=1, $max=0, $minlen=3, $key=null) {
if ($min < 1)
$min = 1;
if ($max < $min)
@@ -723,7 +724,7 @@ class Doku_Indexer {
$index = $this->_getIndex('title', '');
$index = array_count_values($index);
foreach ($index as $val => $cnt) {
- if ($cnt >= $min && (!$max || $cnt <= $max))
+ if ($cnt >= $min && (!$max || $cnt <= $max) && strlen($val) >= $minlen)
$result[$val] = $cnt;
}
}
@@ -733,7 +734,7 @@ class Doku_Indexer {
$val_idx = array();
foreach ($index as $wid => $line) {
$freq = $this->_countTuples($line);
- if ($freq >= $min && (!$max || $freq <= $max))
+ if ($freq >= $min && (!$max || $freq <= $max) && strlen($val) >= $minlen)
$val_idx[$wid] = $freq;
}
if (!empty($val_idx)) {
@@ -745,6 +746,7 @@ class Doku_Indexer {
else {
$lengths = idx_listIndexLengths();
foreach ($lengths as $length) {
+ if ($length < $minlen) continue;
$index = $this->_getIndex('i', $length);
$words = null;
foreach ($index as $wid => $line) {