full-text-searchlucene.net

Lucene.net Negation clause is not working


I am very much new to Lucene.net and though I am not able to achieve basic functionality i.e. Not in.

My requirement is to search "road?construction" without "Works" word.

e.g.

Main Road Construction Works -- Invalid

Road Construction And Maintenance Services -- Valid (Doesn't contains word Works)

Please refer my code below.

string searchQuery = "\"road?construction\"*";
BooleanQuery query2 = new BooleanQuery();
Query query;

try
{
  query = parser.Parse(searchQuery.Trim());
}
catch (ParseException)
{
  query = parser.Parse(QueryParser.Escape(searchQuery.Trim()));
                    }
  query2.Add(query,Occur.SHOULD);
  query2.Add(new BooleanClause(new TermQuery (new Term("Name", "Works")), Occur.MUST_NOT));

This still gets both above mentioned record in to search result. I wish to cut invalid record(first).

enter image description here

Here is the result query generated in backend.

Please suggest workaround.

Thanks in advance.


Solution

  • Not sure why your putting wildcard characters into the phrase. If you're looking for "road construction" then that's all you need. If you are looking to allow some variations then maybe a "slop phrase" is what you need ie. "road construction"~2. The number part allows for n "operations" like n additional words inbetween.

    Here's a set of tests that show your examples (TestExpr2, TestExpr3) and some working variations (TestExpr1 and TestQuery).

    Hope this helps

    [TestClass]
    public class UnitTest7
    {
        [TestMethod]
        public void TestExpr1()
        {
            TestExpr("\"road construction\" -works");
        }
        [TestMethod]
        public void TestExpr2()
        {
            TestExpr("\"road?construction\"* -works");
        }
        [TestMethod]
        public void TestExpr3()
        {
            TestExpr(QueryParser.Escape("\"road?construction\"* -works"));
        }
    
        private void TestExpr(string expr)
        {
            var writer = CreateIndex();
            Add(writer, "Main Road Construction Works");
            Add(writer, "Road Construction And Maintenance Services");
            writer.Flush(true, true, true);
            var searcher = new IndexSearcher(writer.GetReader());
    
            var result = Search(searcher, expr);
    
            Assert.AreEqual(1, result.Count);
            Assert.IsTrue(result.Contains("Road Construction And Maintenance Services"));
            writer.Dispose();
        }
    
        [TestMethod]
        public void TestQuery()
        {
            var writer = CreateIndex();
            Add(writer, "Main Road Construction Works");
            Add(writer, "Road Construction And Maintenance Services");
            writer.Flush(true, true, true);
            var searcher = new IndexSearcher(writer.GetReader());
    
            var query = new BooleanQuery();
            var p = new PhraseQuery();
            p.Add(new Term("name", "road"));
            p.Add(new Term("name", "construction"));
            query.Add(p, Occur.MUST);
            query.Add(new TermQuery(new Term("name", "works")), Occur.MUST_NOT);
    
            var result = Search(searcher, query);
    
            Assert.AreEqual(1, result.Count);
            Assert.IsTrue(result.Contains("Road Construction And Maintenance Services"));
            writer.Dispose();
        }
    
        private List<string> Search(IndexSearcher searcher, string expr)
        {
            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            var queryParser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "name", analyzer);
            var query = queryParser.Parse(expr);
            return Search(searcher, query);
        }
    
        private List<string> Search(IndexSearcher searcher, Query query)
        {
            var collector = TopScoreDocCollector.Create(10, true);
            searcher.Search(query, collector);
            var result = new List<string>();
            var matches = collector.TopDocs().ScoreDocs;
            foreach (var item in matches)
            {
                var id = item.Doc;
                var doc = searcher.Doc(id);
                result.Add(doc.GetField("name").StringValue);
            }
    
            return result;
        }
    
        IndexWriter CreateIndex()
        {
            var directory = new RAMDirectory();
            var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
            var writer = new IndexWriter(directory, analyzer, new IndexWriter.MaxFieldLength(1000));
            return writer;
        }
    
        void Add(IndexWriter writer, string text)
        {
            var document = new Document();
            document.Add(new Field("name", text, Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(document);
        }
    }