File size: 3,616 Bytes
0df7243
 
 
 
 
 
 
c7ec1c2
0df7243
 
 
 
a8efd9f
0df7243
 
 
 
 
 
c2fcac9
 
 
0df7243
d8d90e3
0df7243
 
74fa4c4
0df7243
74fa4c4
 
6bfc426
0df7243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2c81753
 
0df7243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8d90e3
 
0df7243
 
 
d8d90e3
0df7243
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# main.py

from fastapi import FastAPI, Request, Form
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
from dotenv import load_dotenv
import os
# Load environment variables
load_dotenv()
from gliner import GLiNER
from groq import Groq
from vectordb_utils import search_vectordb, init_qdrant_collection,populate_vectordb_from_hf
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
app = FastAPI()
templates = Jinja2Templates(directory="templates")
app.mount("/static", StaticFiles(directory="static"), name="static")

# Load models
cache_dir = os.environ.get("MODEL_CACHE_DIR", "/app/cache")  # Fallback to /app/cache
os.makedirs(cache_dir, exist_ok=True)
gliner_model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1",cache_dir=cache_dir)
groq_client = Groq(api_key=GROQ_API_KEY)
chat_memory=[]

def extract_entities(text):
    # Tokenize the input text first
    labels = ["PRODUCT", "ISSUE", "PROBLEM", "SERVICE"]
    
    # Predict entities
    return gliner_model.predict_entities(text, labels)

def validate_answer(user_query, retrieved_answer):
    prompt = f"""
You are a validator assistant.

Given the user query and the answer retrieved from a knowledge base, decide if the answer is relevant and correctly addresses the query.

Respond ONLY with:
- "YES" if the answer is appropriate.
- "NO" if the answer is unrelated, inaccurate, or insufficient.

User Query:
{user_query}

Retrieved Answer:
{retrieved_answer}

Is the answer appropriate?
"""
    completion = groq_client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        messages=[{"role": "user", "content": prompt}],
        temperature=0, max_completion_tokens=10, top_p=1
    )
    return completion.choices[0].message.content.strip()

def generate_response(user_query, validated_answer):
    prompt = f"""
You are a customer support agent.

Using the following validated support answer, respond helpfully and politely to the user's query.
warning: there should not be [],<> tags in the response

User Query:
{user_query}

Support Answer:
{validated_answer}

Compose your response:
"""
    completion = groq_client.chat.completions.create(
        model="meta-llama/llama-4-scout-17b-16e-instruct",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.8, max_completion_tokens=1000, top_p=1
    )
    return completion.choices[0].message.content.strip()

@app.get("/", response_class=HTMLResponse)
async def home(request: Request):
    global chat_memory
    chat_memory = []  # Reset chat history on reload
    return templates.TemplateResponse("chat.html", {"request": request, "chat_history": []})

@app.post("/chat", response_class=HTMLResponse)
async def chat(request: Request, message: str = Form(...)):
    entities = extract_entities(message)
    entity_info = [(e['text'], e['label']) for e in entities]

    results = search_vectordb(message)
    if not results:
        bot_reply = "Sorry, I couldn't find anything helpful."
    else:
        answer = results[0].payload["response"]
        if validate_answer(message, answer) == "YES":
            bot_reply = generate_response(message, answer)
        else:
            bot_reply = "Sorry, I couldn't find a suitable answer. Please contact support."
    chat_memory.append({"sender": "User", "message": message})
    chat_memory.append({"sender": "Bot", "message": bot_reply})

    return templates.TemplateResponse("chat.html", {
        "request": request,
        "chat_history": chat_memory,
        "entities": entity_info
    })