In java it could be done using "MatchAllDocsQuery()", but there is no documentation for Pylucene that mentions how could it be done.
This is the python code to post individual queries and then extract all the fields from the retrieved documents.
INDEX_DIR = "directory/where/the/document/index/is/stored"
import sys, os, lucene
from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
def run(searcher, analyzer):
while True:
print
print("Hit enter with no input to quit.")
command = input("Query:")
if command == '':
return
print
print("Searching for:", command)
query = QueryParser("contents", analyzer).parse(command)
#query = "MatchAllDocsQuery()"
scoreDocs = searcher.search(query, 50).scoreDocs
print("%s total matching documents." % len(scoreDocs))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table = dict((field.name(), field.stringValue()) for field in doc.getFields())
print(table['doi'])
#print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))
if __name__ == '__main__':
lucene.initVM()
print('lucene', lucene.VERSION)
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
print("Directory name is given below")
print(directory)
searcher = IndexSearcher(DirectoryReader.open(directory))
print(searcher)
analyzer = StandardAnalyzer()
# Calling the run function for execution
run(searcher, analyzer)
del searcher
The minor change in the query can make Lucene retrieve all the indexed document. This is to just replace the command variable with (command = ".✱."). The .✱. searches all the fields and field values in all the documents (using asterisk mark).
INDEX_DIR = "directory/where/the/document/index/is/stored"
import sys, os, lucene
from java.nio.file import Paths
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
def run(searcher, analyzer):
command = ".*."
print("Searching for:", command)
query = QueryParser("contents", analyzer).parse(command)
#query = "MatchAllDocsQuery()"
scoreDocs = searcher.search(query, 50).scoreDocs
print("%s total matching documents." % len(scoreDocs))
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table = dict((field.name(), field.stringValue()) for field in doc.getFields())
print(table['doi'])
#print('path:', doc.get("path"), 'name:', doc.get("name"), 'title:', doc.get("text"))
if __name__ == '__main__':
lucene.initVM()
print('lucene', lucene.VERSION)
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory.open(Paths.get(INDEX_DIR))
print("Directory name is given below")
print(directory)
searcher = IndexSearcher(DirectoryReader.open(directory))
print(searcher)
analyzer = StandardAnalyzer()
# Calling the run function for execution
run(searcher, analyzer)
del searcher