File size: 3,646 Bytes
3430157
fada25c
 
bd45407
fada25c
 
 
bd45407
fada25c
bd45407
fada25c
 
 
 
 
 
 
 
 
 
 
 
3430157
fada25c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3794682
fada25c
 
 
 
ab182a7
bd45407
fada25c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd45407
fada25c
bd45407
fada25c
bd45407
 
 
 
bc16553
bd45407
3794682
bd45407
 
 
 
fada25c
 
 
bd45407
fada25c
 
3794682
fada25c
3794682
 
 
 
 
 
 
 
fada25c
bd45407
 
 
 
 
 
3794682
 
bd45407
3794682
 
fada25c
bd45407
fada25c
 
3794682
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
from dotenv import load_dotenv
import gradio as gr
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from sentence_transformers import SentenceTransformer

load_dotenv()

# Configure the Llama index settings
Settings.llm = HuggingFaceInferenceAPI(
    model_name="google/gemma-1.1-7b-it",
    tokenizer_name="google/gemma-1.1-7b-it",
    context_window=3000,
    token=os.getenv("HF_TOKEN"),
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.1},
)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

# Define the directory for persistent storage and data
PERSIST_DIR = "db"
PDF_DIRECTORY = 'data'  # Changed to the directory containing PDFs

# Ensure PDF directory exists
os.makedirs(PDF_DIRECTORY, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

def data_ingestion_from_directory():
    # Use SimpleDirectoryReader on the directory containing the PDF files
    documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)

def handle_query(query):
    chat_text_qa_msgs = [
        (
            "user",
            """
            You are a RedfernsTech chatbot whose aim is to provide better service to the user, utilizing provided context to deliver answers.
            and collect the some basic information first also name, email, company name
            {context_str}
            Question:
            {query_str}
            """
        )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    # Load index from storage
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        response = answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        response = answer['response']
    else:
        response = "Sorry, I couldn't find an answer."

    # Append the query and response to chat history
    chat_history.append((query, response))

    return response

# Initialize chat history
chat_history = []

# Example usage: Process PDF ingestion from directory
print("Processing PDF ingestion from directory:", PDF_DIRECTORY)
data_ingestion_from_directory()

# Example query
query = "How do I use the RedfernsTech Q&A assistant?"
print("Query:", query)
response = handle_query(query)
print("Answer:", response)

# Define the input and output components for the Gradio interface
input_component = gr.Textbox(
    show_label=False,
    placeholder="Ask me anything about the document..."
)

output_component = gr.Textbox()

# Function to add chat history to output
def chat_with_history(query):
    response = handle_query(query)
    history_str = "\n\n".join([f"Query:\n{q}\nAnswer:\n{a}" for q, a in chat_history])
    return f"{response}\n\nChat History:\n\n{history_str}"

# Create the Gradio interface
interface = gr.Interface(
    fn=chat_with_history,
    inputs=input_component,
    outputs=output_component,
    title="RedfernsTech Q&A Chatbot",
    description="Ask me anything about the uploaded document. View chat history below."
)

# Launch the Gradio interface
interface.launch()