Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,6 +7,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
7 |
import requests
|
8 |
from twilio.rest import Client
|
9 |
|
|
|
10 |
# Flask app
|
11 |
app = Flask(__name__)
|
12 |
|
@@ -15,6 +16,35 @@ CHROMA_PATH = '/code/chroma_db'
|
|
15 |
if not os.path.exists(CHROMA_PATH):
|
16 |
os.makedirs(CHROMA_PATH)
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# Initialize ChromaDB
|
19 |
def initialize_chroma():
|
20 |
try:
|
@@ -46,15 +76,67 @@ def download_file(url, ext):
|
|
46 |
return local_filename
|
47 |
|
48 |
# Process PDF and return text
|
|
|
|
|
|
|
|
|
49 |
def extract_text_from_pdf(pdf_filepath):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
try:
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
except Exception as e:
|
56 |
print(f"Error processing PDF: {e}")
|
57 |
-
return "Error extracting text from PDF."
|
58 |
|
59 |
# Flask route to handle WhatsApp webhook
|
60 |
@app.route('/whatsapp', methods=['POST'])
|
@@ -63,19 +145,23 @@ def whatsapp_webhook():
|
|
63 |
sender = request.values.get('From')
|
64 |
num_media = int(request.values.get('NumMedia', 0))
|
65 |
|
|
|
|
|
66 |
if num_media > 0:
|
67 |
media_url = request.values.get('MediaUrl0')
|
68 |
content_type = request.values.get('MediaContentType0')
|
69 |
|
70 |
if content_type == 'application/pdf':
|
71 |
filepath = download_file(media_url, ".pdf")
|
72 |
-
|
73 |
-
response_text =
|
74 |
else:
|
75 |
response_text = "Unsupported file type. Please upload a PDF document."
|
76 |
else:
|
77 |
-
|
|
|
78 |
|
|
|
79 |
send_message(sender, response_text)
|
80 |
return '', 204
|
81 |
|
|
|
7 |
import requests
|
8 |
from twilio.rest import Client
|
9 |
|
10 |
+
|
11 |
# Flask app
|
12 |
app = Flask(__name__)
|
13 |
|
|
|
16 |
if not os.path.exists(CHROMA_PATH):
|
17 |
os.makedirs(CHROMA_PATH)
|
18 |
|
19 |
+
from ai71 import AI71
|
20 |
+
|
21 |
+
def generate_response(query, chat_history):
|
22 |
+
response = ''
|
23 |
+
try:
|
24 |
+
ai71_client = AI71(api_key=AI71_API_KEY)
|
25 |
+
chat_completion = ai71_client.chat.completions.create(
|
26 |
+
model="tiiuae/falcon-180b-chat",
|
27 |
+
messages=[
|
28 |
+
{"role": "system", "content": "You are the best agricultural assistant. Remember to give a response in not more than 2 sentences."},
|
29 |
+
{"role": "user", "content": f"Answer the query based on history {chat_history}: {query}"}
|
30 |
+
],
|
31 |
+
stream=True
|
32 |
+
)
|
33 |
+
|
34 |
+
for chunk in chat_completion:
|
35 |
+
if chunk.choices[0].delta.content:
|
36 |
+
response += chunk.choices[0].delta.content
|
37 |
+
|
38 |
+
# Clean up response text
|
39 |
+
response = response.replace("###", '').replace('\nUser:', '')
|
40 |
+
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Error generating response: {e}")
|
43 |
+
response = "An error occurred while generating the response."
|
44 |
+
|
45 |
+
return response
|
46 |
+
|
47 |
+
|
48 |
# Initialize ChromaDB
|
49 |
def initialize_chroma():
|
50 |
try:
|
|
|
76 |
return local_filename
|
77 |
|
78 |
# Process PDF and return text
|
79 |
+
|
80 |
+
|
81 |
+
import fitz # PyMuPDF
|
82 |
+
|
83 |
def extract_text_from_pdf(pdf_filepath):
|
84 |
+
text = ''
|
85 |
+
try:
|
86 |
+
pdf_document = fitz.open(pdf_filepath)
|
87 |
+
for page_num in range(len(pdf_document)):
|
88 |
+
page = pdf_document.load_page(page_num)
|
89 |
+
text += page.get_text()
|
90 |
+
pdf_document.close()
|
91 |
+
except Exception as e:
|
92 |
+
print(f"Error extracting text from PDF: {e}")
|
93 |
+
return None
|
94 |
+
return text
|
95 |
+
|
96 |
+
def query_rag(query_text: str, chat_history):
|
97 |
+
try:
|
98 |
+
embedding_function = HuggingFaceEmbeddings()
|
99 |
+
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
|
100 |
+
|
101 |
+
results = db.similarity_search_with_score(query_text, k=5)
|
102 |
+
|
103 |
+
if not results:
|
104 |
+
return "Sorry, I couldn't find any relevant information."
|
105 |
+
|
106 |
+
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
|
107 |
+
|
108 |
+
prompt = f"Context:\n{context_text}\n\nQuestion:\n{query_text}"
|
109 |
+
response = generate_response(prompt, chat_history)
|
110 |
+
|
111 |
+
return response
|
112 |
+
except Exception as e:
|
113 |
+
print(f"Error querying RAG system: {e}")
|
114 |
+
return "An error occurred while querying the RAG system."
|
115 |
+
|
116 |
+
|
117 |
+
def save_pdf_and_update_database(pdf_filepath):
|
118 |
try:
|
119 |
+
text = extract_text_from_pdf(pdf_filepath)
|
120 |
+
if not text:
|
121 |
+
print("Error extracting text from PDF.")
|
122 |
+
return
|
123 |
+
|
124 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
125 |
+
chunk_size=800,
|
126 |
+
chunk_overlap=80,
|
127 |
+
length_function=len,
|
128 |
+
is_separator_regex=False,
|
129 |
+
)
|
130 |
+
chunks = text_splitter.split_text(text)
|
131 |
+
|
132 |
+
embedding_function = HuggingFaceEmbeddings()
|
133 |
+
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embedding_function)
|
134 |
+
|
135 |
+
db.add_documents(chunks)
|
136 |
+
db.persist()
|
137 |
+
print("PDF processed and data updated in Chroma.")
|
138 |
except Exception as e:
|
139 |
print(f"Error processing PDF: {e}")
|
|
|
140 |
|
141 |
# Flask route to handle WhatsApp webhook
|
142 |
@app.route('/whatsapp', methods=['POST'])
|
|
|
145 |
sender = request.values.get('From')
|
146 |
num_media = int(request.values.get('NumMedia', 0))
|
147 |
|
148 |
+
chat_history = [] # You need to handle chat history appropriately
|
149 |
+
|
150 |
if num_media > 0:
|
151 |
media_url = request.values.get('MediaUrl0')
|
152 |
content_type = request.values.get('MediaContentType0')
|
153 |
|
154 |
if content_type == 'application/pdf':
|
155 |
filepath = download_file(media_url, ".pdf")
|
156 |
+
save_pdf_and_update_database(filepath)
|
157 |
+
response_text = "PDF has been processed. You can now ask questions related to its content."
|
158 |
else:
|
159 |
response_text = "Unsupported file type. Please upload a PDF document."
|
160 |
else:
|
161 |
+
# Use RAG to generate a response based on the query
|
162 |
+
response_text = query_rag(incoming_msg, chat_history)
|
163 |
|
164 |
+
# Send the response back to the sender
|
165 |
send_message(sender, response_text)
|
166 |
return '', 204
|
167 |
|