Spaces:

gaur3009
/

QA_Bot

Sleeping

App Files Files Community

gaur3009 commited on Sep 18, 2024

Commit

eb15225

verified ·

1 Parent(s): 40781f0

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -13

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import torch
 import weaviate
 import cohere
-# Initialize Weaviate and Cohere clients
 auth_config = weaviate.AuthApiKey(api_key="16LRz5YwOtnq8ov51Lhg1UuAollpsMgspulV")
 client = weaviate.Client(
     url="https://wkoll9rds3orbu9fhzfr2a.c0.asia-southeast1.gcp.weaviate.cloud",
@@ -13,7 +12,6 @@ client = weaviate.Client(
 )
 cohere_client = cohere.Client("LEvCVeZkqZMW1aLYjxDqlstCzWi4Cvlt9PiysqT8")
-# Function to extract text from uploaded PDF
 def load_pdf(file):
     reader = PyPDF2.PdfReader(file)
     text = ''
@@ -21,18 +19,15 @@ def load_pdf(file):
         text += reader.pages[page].extract_text()
     return text
-# Initialize transformer model and tokenizer
 tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
 model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
-# Function to get embeddings for text
 def get_embeddings(text):
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
     with torch.no_grad():
         embeddings = model(**inputs).last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
     return embeddings
-# Upload document chunks to Weaviate
 def upload_document_chunks(chunks):
     for idx, chunk in enumerate(chunks):
         embedding = get_embeddings(chunk)
@@ -42,7 +37,6 @@ def upload_document_chunks(chunks):
             vector=embedding.tolist()
         )
-# Query Weaviate for relevant document chunks
 def query_answer(query):
     query_embedding = get_embeddings(query)
     result = client.query.get("Document", ["content"])\
@@ -51,7 +45,6 @@ def query_answer(query):
                 .do()
     return result
-# Generate answer using Cohere
 def generate_response(context, query):
     response = cohere_client.generate(
         model='command',
@@ -60,24 +53,19 @@ def generate_response(context, query):
     )
     return response.generations[0].text.strip()
-# Function to handle the full pipeline: uploading PDF, generating embeddings, answering queries
 def qa_pipeline(pdf_file, query):
     document_text = load_pdf(pdf_file)
     document_chunks = [document_text[i:i+500] for i in range(0, len(document_text), 500)]
-    # Upload document chunks to Weaviate
     upload_document_chunks(document_chunks)
-    # Query Weaviate for document segments related to the query
     response = query_answer(query)
     context = ' '.join([doc['content'] for doc in response['data']['Get']['Document']])
-    # Generate response from the retrieved context
     answer = generate_response(context, query)
     return context, answer
-# Define Gradio interface with enhanced UI
 with gr.Blocks(theme="compact") as demo:
     gr.Markdown(
         """
@@ -145,5 +133,4 @@ with gr.Blocks(theme="compact") as demo:
         """
     )
-# Launch the Gradio interface
 demo.launch()

 import weaviate
 import cohere
 auth_config = weaviate.AuthApiKey(api_key="16LRz5YwOtnq8ov51Lhg1UuAollpsMgspulV")
 client = weaviate.Client(
     url="https://wkoll9rds3orbu9fhzfr2a.c0.asia-southeast1.gcp.weaviate.cloud",
 )
 cohere_client = cohere.Client("LEvCVeZkqZMW1aLYjxDqlstCzWi4Cvlt9PiysqT8")
 def load_pdf(file):
     reader = PyPDF2.PdfReader(file)
     text = ''
         text += reader.pages[page].extract_text()
     return text
 tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
 model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
 def get_embeddings(text):
     inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
     with torch.no_grad():
         embeddings = model(**inputs).last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
     return embeddings
 def upload_document_chunks(chunks):
     for idx, chunk in enumerate(chunks):
         embedding = get_embeddings(chunk)
             vector=embedding.tolist()
         )
 def query_answer(query):
     query_embedding = get_embeddings(query)
     result = client.query.get("Document", ["content"])\
                 .do()
     return result
 def generate_response(context, query):
     response = cohere_client.generate(
         model='command',
     )
     return response.generations[0].text.strip()
 def qa_pipeline(pdf_file, query):
     document_text = load_pdf(pdf_file)
     document_chunks = [document_text[i:i+500] for i in range(0, len(document_text), 500)]
     upload_document_chunks(document_chunks)
     response = query_answer(query)
     context = ' '.join([doc['content'] for doc in response['data']['Get']['Document']])
     answer = generate_response(context, query)
     return context, answer
 with gr.Blocks(theme="compact") as demo:
     gr.Markdown(
         """
         """
     )
 demo.launch()