File size: 3,429 Bytes
8ab8162
 
 
0863426
350725b
0863426
8ab8162
0863426
8ab8162
881c624
 
 
8ab8162
0863426
 
 
4a1be1e
cd7fdea
4a1be1e
0863426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a1be1e
0863426
 
4a1be1e
0863426
 
 
 
 
 
 
 
 
 
 
 
 
4a1be1e
0863426
 
 
f84af85
0863426
 
 
 
 
 
 
 
 
 
4f7c970
0863426
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a1be1e
0863426
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from dotenv import load_dotenv
import gradio as gr
import os
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate, Settings
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI 
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from sentence_transformers import SentenceTransformer
import datetime
import uuid
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
import random

# Load environment variables
load_dotenv()

app = FastAPI()

# Serve static files (HTML, CSS, JS)
app.mount("/static", StaticFiles(directory="static"), name="static")

# Configure Llama index settings
Settings.llm = HuggingFaceInferenceAPI(
    model_name="meta-llama/Meta-Llama-3-8B-Instruct",
    tokenizer_name="meta-llama/Meta-Llama-3-8B-Instruct",
    context_window=3000,
    token=os.getenv("HF_TOKEN"),
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.1},
)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

PERSIST_DIR = "db"
PDF_DIRECTORY = 'data'

# Ensure directories exist
os.makedirs(PDF_DIRECTORY, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

def data_ingestion_from_directory():
    documents = SimpleDirectoryReader(PDF_DIRECTORY).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)

data_ingestion_from_directory()  # Process PDF ingestion at startup

# Store chat history in-memory (you could also use a database)
chat_history = []

def handle_query(query):
    chat_text_qa_msgs = [
        (
            "user",
            f"""
            You are the Clara Redfernstech chatbot. Your goal is to provide accurate, professional, and helpful answers to user queries based on the company's data. Always ensure your responses are clear and concise. Give response within 10-15 words only       
            {context_str}
            Question:
            {query}
            """
        )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    # Load index from storage
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)

    # Use chat history to enhance response
    context_str = "\n".join([f"User asked: '{msg['message']}'\nBot answered: '{msg['response']}'" for msg in chat_history])

    query_engine = index.as_query_engine(text_qa_template=text_qa_template, context_str=context_str)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."

@app.get("/", response_class=HTMLResponse)
async def read_root():
    with open("static/index.html") as f:
        return f.read()

@app.post("/chat/")
async def chat(message: str):
    response = handle_query(message)
    
    message_data = {
        "sender": "User",
        "message": message,
        "response": response,
        "timestamp": datetime.datetime.now().isoformat()
    }

    # Store the interaction in chat history
    chat_history.append(message_data)

    return {"response": response}