Hi,
I am trying to compute the counts of terms of the documents returned
by running a query using a TermVectorMapper.
I was wondering if anyone knew if there was a faster way to do this
rather than using a HashMap with a TermVectorMapper to store the
counts of the terms and calling getTermFreqVector().
I do not require the term frequency within a document.
Thanks,
Thomas
HashMap termDocCount = new HashMap();
TermQuery tagQuery = new TermQuery(tagTerm);
TopDocs docs = searcher.search(tagQuery, numDocs);
for (int i=0 ; i<docs.scoreDocs.length; ++i) {
ScoreDoc sdoc=docs.scoreDocs[i];
Document doc = ir.document(sdoc.doc);
//iterate over a subset of index fields
for (int j=0; j <fieldNames.length; ++j) {
String fieldName=fieldNames[j];
DocTermVectorMapper vMapper=new DocTermVectorMapper(termDocCount);
ir.getTermFreqVector(sdoc.doc, fieldName,vMapper);
}
}
private class DocTermVectorMapper extends TermVectorMapper {
private HashMap termDocCount;
private String currField;
DocTermVectorMapper(HashMap termDocCount) {
this.termDocCount=termDocCount;
}
public boolean isIgnoringOffsets() {
return true;
}
public boolean isIgnoringPositions() {
return true;
}
public void map(String term, int frequency, TermVectorOffsetInfo[]
offsets, int[] positions) {
Term t=new Term(currField,term);
if (!termDocCount.containsKey(t))
termDocCount.put(t, new Int());
else {
((Int)termDocCount.get(t)).x++;
}
}
public void setExpectations(String field, int numTerms, boolean
storeOffsets, boolean storePositions) {
currField=field;
}
}
private class Int {
int x;
Int() {
x = 1;
}
}