Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,7 @@ app = Flask(__name__, template_folder=os.getcwd())
|
|
11 |
|
12 |
# Default settings
|
13 |
class ChatConfig:
|
14 |
-
MODEL = "
|
15 |
DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
|
16 |
DEFAULT_MAX_TOKENS = 512
|
17 |
DEFAULT_TEMP = 0.3
|
@@ -21,7 +21,7 @@ class ChatConfig:
|
|
21 |
HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
|
22 |
client = InferenceClient(
|
23 |
ChatConfig.MODEL,
|
24 |
-
token=HF_TOKEN
|
25 |
)
|
26 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
|
27 |
vector_dim = 384 # Embedding size
|
@@ -39,26 +39,9 @@ def extract_text_from_pdf(pdf_stream):
|
|
39 |
def create_vector_db(text_chunks):
|
40 |
"""Embeds text chunks and adds them to FAISS index"""
|
41 |
global documents, index
|
42 |
-
|
43 |
-
# Reinitialize the FAISS index
|
44 |
-
index = faiss.IndexFlatL2(vector_dim)
|
45 |
-
|
46 |
documents = text_chunks
|
47 |
embeddings = embed_model.encode(text_chunks)
|
48 |
-
|
49 |
-
# Convert embeddings to np.float32 for FAISS
|
50 |
-
embeddings = np.array(embeddings, dtype=np.float32)
|
51 |
-
|
52 |
-
# Ensure that embeddings have the correct shape (should be 2D, with each vector having the right dimension)
|
53 |
-
if embeddings.ndim == 1: # If only one embedding, reshape it
|
54 |
-
embeddings = embeddings.reshape(1, -1)
|
55 |
-
|
56 |
-
# Add embeddings to the FAISS index
|
57 |
-
index.add(embeddings)
|
58 |
-
|
59 |
-
# Check if adding was successful (optional)
|
60 |
-
if index.ntotal == 0:
|
61 |
-
print("Error: FAISS index is empty after adding embeddings.")
|
62 |
|
63 |
def search_relevant_text(query):
|
64 |
"""Finds the most relevant text chunk for the given query"""
|
@@ -79,34 +62,30 @@ def generate_response(
|
|
79 |
|
80 |
context = search_relevant_text(message) # Get relevant content from PDF
|
81 |
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
84 |
|
85 |
-
|
86 |
-
{context}
|
87 |
-
|
88 |
-
User Question: {message} [/INST]"""
|
89 |
-
|
90 |
-
# Add conversation history if it exists
|
91 |
-
for prev_msg, prev_response in history:
|
92 |
-
prompt += f" {prev_response} </s>[INST] {prev_msg} [/INST]"
|
93 |
|
94 |
try:
|
95 |
-
response =
|
96 |
-
|
97 |
-
|
|
|
|
|
98 |
temperature=temperature,
|
99 |
top_p=top_p,
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
for chunk in response:
|
105 |
-
full_response += chunk
|
106 |
-
return full_response
|
107 |
except Exception as e:
|
108 |
print(f"Error generating response: {str(e)}")
|
109 |
-
|
110 |
|
111 |
@app.route('/')
|
112 |
def index():
|
@@ -149,7 +128,7 @@ def ask_question():
|
|
149 |
message = request.json.get('message')
|
150 |
history = request.json.get('history', [])
|
151 |
response = generate_response(message, history)
|
152 |
-
return jsonify({"response": response})
|
153 |
|
154 |
if __name__ == '__main__':
|
155 |
app.run(debug=True)
|
|
|
11 |
|
12 |
# Default settings
|
13 |
class ChatConfig:
|
14 |
+
MODEL = "google/gemma-3-27b-it" # Change back to Gemma
|
15 |
DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
|
16 |
DEFAULT_MAX_TOKENS = 512
|
17 |
DEFAULT_TEMP = 0.3
|
|
|
21 |
HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
|
22 |
client = InferenceClient(
|
23 |
ChatConfig.MODEL,
|
24 |
+
token=HF_TOKEN
|
25 |
)
|
26 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
|
27 |
vector_dim = 384 # Embedding size
|
|
|
39 |
def create_vector_db(text_chunks):
|
40 |
"""Embeds text chunks and adds them to FAISS index"""
|
41 |
global documents, index
|
|
|
|
|
|
|
|
|
42 |
documents = text_chunks
|
43 |
embeddings = embed_model.encode(text_chunks)
|
44 |
+
index.add(np.array(embeddings, dtype=np.float32))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
def search_relevant_text(query):
|
47 |
"""Finds the most relevant text chunk for the given query"""
|
|
|
62 |
|
63 |
context = search_relevant_text(message) # Get relevant content from PDF
|
64 |
|
65 |
+
messages = [{"role": "system", "content": system_message}]
|
66 |
+
for user_msg, bot_msg in history:
|
67 |
+
if user_msg:
|
68 |
+
messages.append({"role": "user", "content": user_msg})
|
69 |
+
if bot_msg:
|
70 |
+
messages.append({"role": "assistant", "content": bot_msg})
|
71 |
|
72 |
+
messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {message}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
try:
|
75 |
+
response = ""
|
76 |
+
for chunk in client.chat_completion(
|
77 |
+
messages,
|
78 |
+
max_tokens=max_tokens,
|
79 |
+
stream=True,
|
80 |
temperature=temperature,
|
81 |
top_p=top_p,
|
82 |
+
):
|
83 |
+
token = chunk.choices[0].delta.content or ""
|
84 |
+
response += token
|
85 |
+
yield response
|
|
|
|
|
|
|
86 |
except Exception as e:
|
87 |
print(f"Error generating response: {str(e)}")
|
88 |
+
yield "I apologize, but I encountered an error while generating the response. Please try again."
|
89 |
|
90 |
@app.route('/')
|
91 |
def index():
|
|
|
128 |
message = request.json.get('message')
|
129 |
history = request.json.get('history', [])
|
130 |
response = generate_response(message, history)
|
131 |
+
return jsonify({"response": "".join(response)}) # Join all streamed responses
|
132 |
|
133 |
if __name__ == '__main__':
|
134 |
app.run(debug=True)
|