Pamudu13 commited on
Commit
91d834f
·
verified ·
1 Parent(s): acda467

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -42
app.py CHANGED
@@ -11,7 +11,7 @@ app = Flask(__name__, template_folder=os.getcwd())
11
 
12
  # Default settings
13
  class ChatConfig:
14
- MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
15
  DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
16
  DEFAULT_MAX_TOKENS = 512
17
  DEFAULT_TEMP = 0.3
@@ -21,7 +21,7 @@ class ChatConfig:
21
  HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
22
  client = InferenceClient(
23
  ChatConfig.MODEL,
24
- token=HF_TOKEN # Add your Hugging Face token here
25
  )
26
  embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
27
  vector_dim = 384 # Embedding size
@@ -39,26 +39,9 @@ def extract_text_from_pdf(pdf_stream):
39
  def create_vector_db(text_chunks):
40
  """Embeds text chunks and adds them to FAISS index"""
41
  global documents, index
42
-
43
- # Reinitialize the FAISS index
44
- index = faiss.IndexFlatL2(vector_dim)
45
-
46
  documents = text_chunks
47
  embeddings = embed_model.encode(text_chunks)
48
-
49
- # Convert embeddings to np.float32 for FAISS
50
- embeddings = np.array(embeddings, dtype=np.float32)
51
-
52
- # Ensure that embeddings have the correct shape (should be 2D, with each vector having the right dimension)
53
- if embeddings.ndim == 1: # If only one embedding, reshape it
54
- embeddings = embeddings.reshape(1, -1)
55
-
56
- # Add embeddings to the FAISS index
57
- index.add(embeddings)
58
-
59
- # Check if adding was successful (optional)
60
- if index.ntotal == 0:
61
- print("Error: FAISS index is empty after adding embeddings.")
62
 
63
  def search_relevant_text(query):
64
  """Finds the most relevant text chunk for the given query"""
@@ -79,34 +62,30 @@ def generate_response(
79
 
80
  context = search_relevant_text(message) # Get relevant content from PDF
81
 
82
- # Format the prompt for Mistral
83
- prompt = f"""<s>[INST] {system_message}
 
 
 
 
84
 
85
- Context from the PDF:
86
- {context}
87
-
88
- User Question: {message} [/INST]"""
89
-
90
- # Add conversation history if it exists
91
- for prev_msg, prev_response in history:
92
- prompt += f" {prev_response} </s>[INST] {prev_msg} [/INST]"
93
 
94
  try:
95
- response = client.text_generation(
96
- prompt,
97
- max_new_tokens=max_tokens,
 
 
98
  temperature=temperature,
99
  top_p=top_p,
100
- stream=True
101
- )
102
-
103
- full_response = ""
104
- for chunk in response:
105
- full_response += chunk
106
- return full_response
107
  except Exception as e:
108
  print(f"Error generating response: {str(e)}")
109
- return "I apologize, but I encountered an error while generating the response. Please try again."
110
 
111
  @app.route('/')
112
  def index():
@@ -149,7 +128,7 @@ def ask_question():
149
  message = request.json.get('message')
150
  history = request.json.get('history', [])
151
  response = generate_response(message, history)
152
- return jsonify({"response": response})
153
 
154
  if __name__ == '__main__':
155
  app.run(debug=True)
 
11
 
12
  # Default settings
13
  class ChatConfig:
14
+ MODEL = "google/gemma-3-27b-it" # Change back to Gemma
15
  DEFAULT_SYSTEM_MSG = "You are an AI assistant answering only based on the uploaded PDF."
16
  DEFAULT_MAX_TOKENS = 512
17
  DEFAULT_TEMP = 0.3
 
21
  HF_TOKEN = os.getenv('HUGGINGFACE_TOKEN')
22
  client = InferenceClient(
23
  ChatConfig.MODEL,
24
+ token=HF_TOKEN
25
  )
26
  embed_model = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="/tmp")
27
  vector_dim = 384 # Embedding size
 
39
  def create_vector_db(text_chunks):
40
  """Embeds text chunks and adds them to FAISS index"""
41
  global documents, index
 
 
 
 
42
  documents = text_chunks
43
  embeddings = embed_model.encode(text_chunks)
44
+ index.add(np.array(embeddings, dtype=np.float32))
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def search_relevant_text(query):
47
  """Finds the most relevant text chunk for the given query"""
 
62
 
63
  context = search_relevant_text(message) # Get relevant content from PDF
64
 
65
+ messages = [{"role": "system", "content": system_message}]
66
+ for user_msg, bot_msg in history:
67
+ if user_msg:
68
+ messages.append({"role": "user", "content": user_msg})
69
+ if bot_msg:
70
+ messages.append({"role": "assistant", "content": bot_msg})
71
 
72
+ messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {message}"})
 
 
 
 
 
 
 
73
 
74
  try:
75
+ response = ""
76
+ for chunk in client.chat_completion(
77
+ messages,
78
+ max_tokens=max_tokens,
79
+ stream=True,
80
  temperature=temperature,
81
  top_p=top_p,
82
+ ):
83
+ token = chunk.choices[0].delta.content or ""
84
+ response += token
85
+ yield response
 
 
 
86
  except Exception as e:
87
  print(f"Error generating response: {str(e)}")
88
+ yield "I apologize, but I encountered an error while generating the response. Please try again."
89
 
90
  @app.route('/')
91
  def index():
 
128
  message = request.json.get('message')
129
  history = request.json.get('history', [])
130
  response = generate_response(message, history)
131
+ return jsonify({"response": "".join(response)}) # Join all streamed responses
132
 
133
  if __name__ == '__main__':
134
  app.run(debug=True)