Spaces:

gaur3009
/

QA_Bot

Sleeping

App Files Files Community

gaur3009 commited on May 17

Commit

c50dda1

verified ·

1 Parent(s): a5762bc

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -27

app.py CHANGED Viewed

@@ -2,21 +2,23 @@ import gradio as gr
 import PyPDF2
 from transformers import AutoTokenizer, AutoModel
 import torch
-import weaviate
 import cohere
-auth_config = weaviate.AuthApiKey(api_key="7VoeYTjkOS4aHINuhllGpH4JPgE2QquFmSMn")
-client = weaviate.Client(
-   "https://vgwhgmrlqrqqgnlb1avjaa.c0.us-west3.gcp.weaviate.cloud",
-    auth_client_secret=auth_config
 )
 cohere_client = cohere.Client("LEvCVeZkqZMW1aLYjxDqlstCzWi4Cvlt9PiysqT8")
 def load_pdf(file):
     reader = PyPDF2.PdfReader(file)
     text = ''
-    for page in range(len(reader.pages)):
-        text += reader.pages[page].extract_text()
     return text
 tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
@@ -29,21 +31,21 @@ def get_embeddings(text):
     return embeddings
 def upload_document_chunks(chunks):
-    for idx, chunk in enumerate(chunks):
         embedding = get_embeddings(chunk)
-        client.data_object.create(
-            {"content": chunk},
-            "Document",
             vector=embedding.tolist()
         )
 def query_answer(query):
     query_embedding = get_embeddings(query)
-    result = client.query.get("Document", ["content"])\
-                .with_near_vector({"vector": query_embedding.tolist()})\
-                .with_limit(3)\
-                .do()
-    return result
 def generate_response(context, query):
     response = cohere_client.generate(
@@ -58,10 +60,9 @@ def qa_pipeline(pdf_file, query):
     document_chunks = [document_text[i:i+500] for i in range(0, len(document_text), 500)]
     upload_document_chunks(document_chunks)
-    response = query_answer(query)
-    context = ' '.join([doc['content'] for doc in response['data']['Get']['Document']])
     answer = generate_response(context, query)
     return context, answer
@@ -81,17 +82,13 @@ with gr.Blocks(theme="compact") as demo:
     with gr.Row():
         with gr.Column(scale=1):
-            pdf_input = gr.File(label="📁 Upload PDF", file_types=[".pdf"], show_label=True)
-            query_input = gr.Textbox(
-                label="❓ Ask a Question",
-                placeholder="Enter your question here...",
-                lines=1
-            )
             submit_button = gr.Button("🔍 Submit")
         with gr.Column(scale=2):
-            doc_segments_output = gr.Textbox(label="📜 Retrieved Document Segments", placeholder="Document segments will be displayed here...", lines=10)
-            answer_output = gr.Textbox(label="💬 Answer", placeholder="The answer will appear here...", lines=3)
     submit_button.click(
         qa_pipeline,

 import PyPDF2
 from transformers import AutoTokenizer, AutoModel
 import torch
+from weaviate import WeaviateClient
+from weaviate.auth import AuthApiKey
 import cohere
+auth = AuthApiKey(api_key="7VoeYTjkOS4aHINuhllGpH4JPgE2QquFmSMn")
+client = WeaviateClient(
+    url="https://vgwhgmrlqrqqgnlb1avjaa.c0.us-west3.gcp.weaviate.cloud",
+    auth_client=auth
 )
 cohere_client = cohere.Client("LEvCVeZkqZMW1aLYjxDqlstCzWi4Cvlt9PiysqT8")
 def load_pdf(file):
     reader = PyPDF2.PdfReader(file)
     text = ''
+    for page in reader.pages:
+        text += page.extract_text()
     return text
 tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
     return embeddings
 def upload_document_chunks(chunks):
+    doc_collection = client.collections.get("Document")
+    for chunk in chunks:
         embedding = get_embeddings(chunk)
+        doc_collection.data.insert(
+            properties={"content": chunk},
             vector=embedding.tolist()
         )
 def query_answer(query):
     query_embedding = get_embeddings(query)
+    response = client.collections.get("Document").query.near_vector(
+        near_vector=query_embedding.tolist(),
+        limit=3
+    )
+    return response.objects
 def generate_response(context, query):
     response = cohere_client.generate(
     document_chunks = [document_text[i:i+500] for i in range(0, len(document_text), 500)]
     upload_document_chunks(document_chunks)
+    top_docs = query_answer(query)
+    context = ' '.join([doc.properties['content'] for doc in top_docs])
     answer = generate_response(context, query)
     return context, answer
     with gr.Row():
         with gr.Column(scale=1):
+            pdf_input = gr.File(label="📁 Upload PDF", file_types=[".pdf"])
+            query_input = gr.Textbox(label="❓ Ask a Question", placeholder="Enter your question here...")
             submit_button = gr.Button("🔍 Submit")
         with gr.Column(scale=2):
+            doc_segments_output = gr.Textbox(label="📜 Retrieved Document Segments", lines=10)
+            answer_output = gr.Textbox(label="💬 Answer", lines=3)
     submit_button.click(
         qa_pipeline,