FAQ
Hi all,

say I have an Index with one field named "category". There are two documents one with value "(testvalue)" and one with value "test value".
Now somone search with "test". My Searchenine uses the org.apache.lucene.search.PrefixQuery and finds 2 documents. Maybe he estimated only one hit; owever: if he searches for "(test" and the Searchengine uses the QueryParser.escape to clean the request and takes that PrefixQuery to search nothing results.

How can I search for the document "(testvalue)" and only this one?

Thx!





package foo.bar;

import java.io.IOException;

import junit.framework.TestCase;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;



public class TestPrefixQuery extends TestCase {
public void testEscapeAndPrefix() throws CorruptIndexException,
LockObtainFailedException, IOException {

RAMDirectory directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(),
true, IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("category", "(testvalue)", Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("category", "test value", Field.Store.YES,
Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();

String value= "test";
PrefixQuery query = new PrefixQuery(new Term("category", value));
//log.debug(query.toString());
IndexSearcher searcher = new IndexSearcher(directory);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("One for " + value , 2, hits.length); //I want one for this?!

value= "(test";
String escaped = QueryParser.escape(value);
query = new PrefixQuery(new Term("category", escaped));
//log.debug(query.toString());
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("One for " + value + "/" + escaped, 1, hits.length); //FAILS!
}
}

--
GRATIS für alle GMX-Mitglieder: Die maxdome Movie-FLAT!
Jetzt freischalten unter http://portal.gmx.net/de/go/maxdome01

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Search Discussions

  • Steven A Rowe at Apr 14, 2010 at 12:29 pm
    Hi Franz,

    The likely problem is that you're using an index-time analyzer that strips out the parentheses. StandardAnalyzer, for example, does this; WhitespaceAnalyzer does not.

    Remember that hits are the result of matches between index-analyzed terms and query-analyzed terms. Except in the case of synonyms, most people will want their index and query analyzers to be the same.

    Steve

    ________________________________________
    From: Franz Roth [[email protected]]
    Sent: Wednesday, April 14, 2010 7:42 AM
    To: [email protected]
    Subject: PrefixQuery and special characters

    Hi all,

    say I have an Index with one field named "category". There are two documents one with value "(testvalue)" and one with value "test value".
    Now somone search with "test". My Searchenine uses the org.apache.lucene.search.PrefixQuery and finds 2 documents. Maybe he estimated only one hit; owever: if he searches for "(test" and the Searchengine uses the QueryParser.escape to clean the request and takes that PrefixQuery to search nothing results.

    How can I search for the document "(testvalue)" and only this one?

    Thx!





    package foo.bar;

    import java.io.IOException;

    import junit.framework.TestCase;

    import org.apache.log4j.Logger;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.PrefixQuery;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.apache.lucene.store.RAMDirectory;



    public class TestPrefixQuery extends TestCase {
    public void testEscapeAndPrefix() throws CorruptIndexException,
    LockObtainFailedException, IOException {

    RAMDirectory directory = new RAMDirectory();
    IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(),
    true, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    doc.add(new Field("category", "(testvalue)", Field.Store.YES,
    Field.Index.ANALYZED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("category", "test value", Field.Store.YES,
    Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();

    String value= "test";
    PrefixQuery query = new PrefixQuery(new Term("category", value));
    //log.debug(query.toString());
    IndexSearcher searcher = new IndexSearcher(directory);
    ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("One for " + value , 2, hits.length); //I want one for this?!

    value= "(test";
    String escaped = QueryParser.escape(value);
    query = new PrefixQuery(new Term("category", escaped));
    //log.debug(query.toString());
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("One for " + value + "/" + escaped, 1, hits.length); //FAILS!
    }
    }

    --
    GRATIS für alle GMX-Mitglieder: Die maxdome Movie-FLAT!
    Jetzt freischalten unter http://portal.gmx.net/de/go/maxdome01

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: [email protected]
    For additional commands, e-mail: [email protected]
    ---------------------------------------------------------------------
    To unsubscribe, e-mail: [email protected]
    For additional commands, e-mail: [email protected]
  • Franz Roth at Apr 15, 2010 at 1:04 pm
    Tnx Steve,
    this solved the minor problem (finding two documents).
    The maior problem (and customers bugreport) was not to find "(test". Now solved: Indeed I should not use QueryParser.escape(...) by building a PrefixQuery.
    kind regards


    -------- Original-Nachricht --------
    Datum: Wed, 14 Apr 2010 08:24:38 -0400
    Von: Steven A Rowe <[email protected]>
    An: "java-[email protected]" <[email protected]>
    Betreff: RE: PrefixQuery and special characters
    Hi Franz,

    The likely problem is that you're using an index-time analyzer that strips
    out the parentheses. StandardAnalyzer, for example, does this;
    WhitespaceAnalyzer does not.

    Remember that hits are the result of matches between index-analyzed terms
    and query-analyzed terms. Except in the case of synonyms, most people will
    want their index and query analyzers to be the same.

    Steve

    ________________________________________
    From: Franz Roth [[email protected]]
    Sent: Wednesday, April 14, 2010 7:42 AM
    To: [email protected]
    Subject: PrefixQuery and special characters

    Hi all,

    say I have an Index with one field named "category". There are two
    documents one with value "(testvalue)" and one with value "test value".
    Now somone search with "test". My Searchenine uses the
    org.apache.lucene.search.PrefixQuery and finds 2 documents. Maybe he estimated only one hit;
    owever: if he searches for "(test" and the Searchengine uses the
    QueryParser.escape to clean the request and takes that PrefixQuery to search nothing
    results.

    How can I search for the document "(testvalue)" and only this one?

    Thx!





    package foo.bar;

    import java.io.IOException;

    import junit.framework.TestCase;

    import org.apache.log4j.Logger;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.index.CorruptIndexException;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.Term;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.PrefixQuery;
    import org.apache.lucene.search.ScoreDoc;
    import org.apache.lucene.store.LockObtainFailedException;
    import org.apache.lucene.store.RAMDirectory;



    public class TestPrefixQuery extends TestCase {
    public void testEscapeAndPrefix() throws CorruptIndexException,
    LockObtainFailedException, IOException {

    RAMDirectory directory = new RAMDirectory();
    IndexWriter writer = new IndexWriter(directory, new
    StandardAnalyzer(),
    true, IndexWriter.MaxFieldLength.LIMITED);
    Document doc = new Document();
    doc.add(new Field("category", "(testvalue)", Field.Store.YES,
    Field.Index.ANALYZED));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("category", "test value", Field.Store.YES,
    Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();

    String value= "test";
    PrefixQuery query = new PrefixQuery(new Term("category",
    value));
    //log.debug(query.toString());
    IndexSearcher searcher = new IndexSearcher(directory);
    ScoreDoc[] hits = searcher.search(query, null,
    1000).scoreDocs;
    assertEquals("One for " + value , 2, hits.length); //I want
    one for this?!

    value= "(test";
    String escaped = QueryParser.escape(value);
    query = new PrefixQuery(new Term("category", escaped));
    //log.debug(query.toString());
    hits = searcher.search(query, null, 1000).scoreDocs;
    assertEquals("One for " + value + "/" + escaped, 1,
    hits.length); //FAILS!
    }
    }

    --
    GRATIS für alle GMX-Mitglieder: Die maxdome Movie-FLAT!
    Jetzt freischalten unter http://portal.gmx.net/de/go/maxdome01

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: [email protected]
    For additional commands, e-mail: [email protected]
    ---------------------------------------------------------------------
    To unsubscribe, e-mail: [email protected]
    For additional commands, e-mail: [email protected]
    --
    GRATIS für alle GMX-Mitglieder: Die maxdome Movie-FLAT!
    Jetzt freischalten unter http://portal.gmx.net/de/go/maxdome01

    ---------------------------------------------------------------------
    To unsubscribe, e-mail: [email protected]
    For additional commands, e-mail: [email protected]

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
groupjava-user @
categorieslucene
postedApr 14, '10 at 11:42a
activeApr 15, '10 at 1:04p
posts3
users2
websitelucene.apache.org

2 users in discussion

Franz Roth: 2 posts Steven A Rowe: 1 post

People

Translate

site design / logo © 2023 Grokbase