#!/usr/bin/env python

INDEX_FILENAME = "IndexFiles.index"

import sys, os, lucene

from java.io import File
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.util import Version
from org.apache.lucene.search import ScoreDoc
from org.apache.lucene.search.similarities import BM25Similarity
from org.apache.lucene.search.similarities import TFIDFSimilarity

def run(searcher, analyzer, runtag):
    # open the standard input to read queries
    fin = open('/dev/stdin','r')
    # read the first queryes
    line = fin.readline()
    # while the file is not finished
    while len(line)>0:
        # split in query id and query text
        string = line.split("\t");
        # strip newline from query text
        string[len(string)-1] = string[len(string)-1].strip()
        query_text = string[1]
        query = QueryParser(Version.LUCENE_CURRENT, 
                            "contents",
                            analyzer).parse(query_text)
        # get the top 1000 documents
        topDocs = searcher.search(query, 1000)
        rank = 1
        # for each retrieved doc
        scoreDocs = topDocs.scoreDocs
        for scoreDoc in scoreDocs:
            # get doc info
            doc = searcher.doc(scoreDoc.doc)
            # remove ".txt" from the doc name
            docname = (doc.get("name").split("."))[0]
            print string[0],'\tQ0\t',docname,'\t',rank,'\t',scoreDoc.score,'\t',runtag
            rank = rank+1
        # read next query
        line = fin.readline()

# init Java Virtual Machine
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
# get the absolute path of this script
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
# get the absolute path of the index
index_dir = SimpleFSDirectory(File(os.path.join(base_dir, INDEX_FILENAME)))
# define a searcher for the index
searcher = IndexSearcher(DirectoryReader.open(index_dir))
# set the standard query analyzer
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
# run searcher using the analyzer
runtag = sys.argv[3]
run(searcher, analyzer, runtag)
del searcher