FAQ
For those that where wondering about the performance of the Phrase/Span
Query Highlighter VS the standard contrib Highlighter, here is some
basic benchmark info.

Best viewed in mono-space font. Highlighter is original, HighlighterSpan
is position sensitive version.

[echo] Working Directory: work
[java] Running algorithm from:
C:\workspace\ApacheLucene\contrib\benchmark\conf\highlight.alg
[java] ------------> config properties:
[java] log.queries = true
[java] task.max.depth.log = 2
[java] doc.maker =
org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
[java] analyzer = org.apache.lucene.analysis.standard.StandardAnalyzer
[java] doc.add.log.step = 500
[java] doc.term.vector = false
[java] directory = FSDirectory
[java] query.maker =
org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
[java] doc.stored = true
[java] work.dir = work
[java] docs.dir = reuters-out
[java] compound = true
[java] doc.tokenized = true
[java] -------------------------------
[java] ------------> ReutersDocMaker statistics (0):
[java] total count of unique texts: 21,578
[java] total bytes of unique texts: 17,673,672
[java] ------------> queries:
[java] 0. TermQuery - body:salomon
[java] 1. TermQuery - body:comex
[java] 2. BooleanQuery - body:night body:trading
[java] 3. BooleanQuery - body:japan body:sony
[java] 4. PhraseQuery - body:"sony japan"
[java] 5. PhraseQuery - body:"food needs"~3
[java] 6. BooleanQuery - +body:"world bank"^2.0 +body:nigeria
[java] 7. BooleanQuery - body:"world bank" -body:nigeria
[java] 8. PhraseQuery - body:"ford credit"~5
[java] 9. BooleanQuery - body:airline body:europe body:canada
body:destination
[java] 10. BooleanQuery - body:long body:term body:pressure
body:trade body:ministers body:necessary body:current body:uruguay
body:round body:talks body:general body:agreement body:trade
body:tariffs body:gatt body:succeed
[java] 11. SpanFirstQuery - spanFirst(body:ford, 5)
[java] 12. SpanNearQuery - spanNear([body:night, body:trading], 4,
false)
[java] 13. SpanNearQuery - spanNear([spanFirst(body:ford, 10),
body:credit], 10, false)
[java] 14. WildcardQuery - body:fo*
[java] ------------> algorithm:
[java] Seq {
[java] Populate {
[java] CreateIndex
[java] MAddDocs_4000 {
[java] AddDoc
[java] > * 4000
[java] Optimize
[java] CloseIndex
[java] }
[java] Rounds_4 {
[java] Highlight_200 {
[java] Highlight
[java] > * 200
[java] HighlightSpan_200 {
[java] HighlightSpan
[java] > * 200
[java] NewRound
[java] } * 4
[java] RepSumByName
[java] RepSumByPrefRound Highlight
[java] }
[java] ------------> starting task: Populate
[java] ------------> starting task: CreateIndex
[java] ------------> starting task: MAddDocs_4000
[java] --> main processed (add) 500 docs
[java] --> main processed (add) 1000 docs
[java] --> main processed (add) 1500 docs
[java] --> main processed (add) 2000 docs
[java] --> main processed (add) 2500 docs
[java] --> main processed (add) 3000 docs
[java] --> main processed (add) 3500 docs
[java] --> main processed (add) 4000 docs
[java] ------------> starting task: Optimize
[java] ------------> starting task: CloseIndex
[java] ------------> starting task: Rounds_4
[java] ------------> starting task: Highlight_200
[java] ------------> starting task: HighlightSpan_200
[java] ------------> starting task: NewRound
[java] --> Round 0-->1
[java] ------------> starting task: Highlight_200
[java] ------------> starting task: HighlightSpan_200
[java] ------------> starting task: NewRound
[java] --> Round 1-->2
[java] ------------> starting task: Highlight_200
[java] ------------> starting task: HighlightSpan_200
[java] ------------> starting task: NewRound
[java] --> Round 2-->3
[java] ------------> starting task: Highlight_200
[java] ------------> starting task: HighlightSpan_200
[java] ------------> starting task: NewRound
[java] --> Round 3-->4
[java] ------------> Report Sum By (any) Name (8 about 14 out of 14)
[java] Operation round runCnt recsPerRun rec/s
elapsedSec avgUsedMem avgTotalMem
[java] Populate 0 1 4003
150.3 26.62 6,153,584 10,395,648
[java] CreateIndex - - - 0 - - 1 - - - - 1 - - - 7.1
- - 0.14 - 3,901,352 - - 8,179,712
[java] MAddDocs_4000 0 1 4000
154.7 25.86 6,346,824 10,395,648
[java] Optimize - - - - 0 - - 1 - - - - 1 - - - 1.6
- - 0.62 - 6,346,824 - 10,395,648
[java] CloseIndex 0 1 1
1,000.0 0.00 6,153,584 10,395,648
[java] Rounds_4 - - - - 0 - - 1 - - 346980 - - 461.4
- - 751.97 - 8,834,520 - 11,456,512
[java] Highlight_200 - 4 43372
468.4 370.41 8,328,474 11,358,208
[java] HighlightSpan_200 - - - - 4 - - 43372 - - 454.7
- - 381.56 - 8,233,914 - 11,358,208
[java] ------------> Report sum by Prefix (Highlight) and Round (8
about 8 out of 14)
[java] Operation round runCnt recsPerRun rec/s
elapsedSec avgUsedMem avgTotalMem
[java] Highlight_200 0 1 42890
519.7 82.53 8,417,256 11,325,440
[java] HighlightSpan_200 - 0 - - 1 - - 42890 - - 465.5
- - 92.14 - 8,417,256 - 11,325,440
[java] Highlight_200 1 1 42731
470.4 90.84 7,547,592 11,325,440
[java] HighlightSpan_200 - 1 - - 1 - - 42731 - - 449.2
- - 95.12 - 7,169,352 - 11,325,440
[java] Highlight_200 2 1 44979
463.5 97.05 6,540,944 11,325,440
[java] HighlightSpan_200 - 2 - - 1 - - 44979 - - 468.2
- - 96.08 - 6,540,944 - 11,325,440
[java] Highlight_200 3 1 42890
429.0 99.98 10,808,104 11,456,512
[java] HighlightSpan_200 - 3 - - 1 - - 42890 - - 436.7
- - 98.22 - 10,808,104 - 11,456,512
[java] ####################
[java] ### D O N E !!! ###
[java] ####################

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org

Search Discussions

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupjava-user @
categorieslucene
postedJul 5, '07 at 9:18p
activeJul 5, '07 at 9:18p
posts1
users1
websitelucene.apache.org

1 user in discussion

Mark Miller: 1 post

People

Translate

site design / logo © 2022 Grokbase