File size: 2,381 Bytes
dc75be1
8b52ce3
dc75be1
 
 
 
8b52ce3
 
 
 
 
 
dc75be1
8b52ce3
 
 
 
 
 
 
 
dc75be1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b52ce3
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
import pickle
from model.questionAnsweringBot import QuestionAnsweringBot
from model.retriever import Retriever

def process_query(llm_key, query, retrieval_method):
    # if "retriever" not in st.session_state:
    #     st.session_state.retriever = Retriever()
    #     print("Loading and preparing dataset...")
    #     st.session_state.retriever.load_and_prepare_dataset()
    #     st.session_state.retriever.prepare_bm25()
    #     st.session_state.retriever.compute_embeddings()
    if "retriever" not in st.session_state:
        with st.spinner("Loading precomputed retriever..."):
            try:
                import pickle
                with open("retriever.pkl", "rb") as f:
                    st.session_state.retriever = pickle.load(f)
                st.success("Preloaded retriever successfully!")
            except Exception as e:
                st.error(f"Failed to load precomputed retriever: {e}")

    retriever = st.session_state.retriever

    if retrieval_method == "BM25":
        print("Retrieving documents using BM25...")
        retrieved_docs = retriever.retrieve_documents_bm25(query)
    else:
        print("Retrieving documents using Semantic Search...")
        retrieved_docs = retriever.retrieve_documents_semantic(query)

    bot = QuestionAnsweringBot(llm_key)
    prompt = getPrompt(retrieved_docs, query)
    answer = bot.generate_answer(prompt)

    return retrieved_docs, answer

def getPrompt(retrieved_docs, query):
    prompt = (
        "You are an LM integrated into an RAG system that answers questions based on provided documents.\n"
        "Rules:\n"
        "- Reply with the answer only and nothing but the answer.\n"
        "- Say 'I don't know' if you don't know the answer.\n"
        "- Use only the provided documents.\n"
        "- Citations are required. Include the document and chunk number in square brackets after the information (e.g., [Document 1, Chunk 2]).\n\n"
        "Documents:\n"
    )

    for i, doc in enumerate(retrieved_docs):
        prompt += f"Document {i + 1}: {doc}\n"

    prompt += f"\nQuery: {query}\n"

    return prompt

def prepare_retriever():
    retriever = Retriever()
    retriever.load_and_prepare_dataset()
    retriever.prepare_bm25()
    retriever.compute_embeddings()

    with open("retriever.pkl", "wb") as f:
        pickle.dump(retriever, f)