From grdetil@scrc.umanitoba.ca Mon Mar 22 19:12:05 1999 Date: Mon, 22 Mar 1999 16:47:11 -0600 (CST) From: Gilles Detillieux To: htdig@htdig.org Subject: [htdig] Patch: use minimum_word_length for plain text documents Hi again. If any of you have been wondering why the minimum_word_length attribute seems to have no effect on words indexed in plain text files, here's the fix: --- htdig/Plaintext.cc.nominword Tue Feb 16 23:03:53 1999 +++ htdig/Plaintext.cc Fri Mar 19 10:39:54 1999 @@ -67,6 +67,7 @@ Plaintext::parse(Retriever &retriever, U unsigned char *position = (unsigned char *) contents->get(); unsigned char *start = position; + static int minimumWordLength = config.Value("minimum_word_length", 3); int offset = 0; int in_space = 0; String word; @@ -94,11 +95,11 @@ Plaintext::parse(Retriever &retriever, U head << word; } - if (word.length() > 2) + if (word.length() >= minimumWordLength) { word.lowercase(); word.remove(valid_punctuation); - if (word.length() > 2) + if (word.length() >= minimumWordLength) { retriever.got_word(word, int(offset * 1000 / contents->length()), -- Gilles R. Detillieux E-mail: Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil Dept. Physiology, U. of Manitoba Phone: (204)789-3766 Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930 ------------------------------------ To unsubscribe from the htdig mailing list, send a message to htdig@htdig.org containing the single word "unsubscribe" in the SUBJECT of the message.