Spaces:

Shriharsh
/

Web_Content_QA

Running

App Files Files Community

Shriharsh commited on Mar 21

Commit

1f0a0b4

verified ·

1 Parent(s): 524057e

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -17

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Web Content Q&A Tool for Hugging Face Spaces
 # Optimized for memory constraints (2GB RAM) and 24-hour timeline
-# Features: Ingest up to 3 URLs, ask questions, get concise one-line answers using RoBERTa with PyTorch
 import gradio as gr
 from bs4 import BeautifulSoup
@@ -31,20 +31,20 @@ corpus = []  # List of paragraphs from URLs
 embeddings = None  # Precomputed embeddings for retrieval
 sources_list = []  # Source URLs for each paragraph
-# Load models at startup (memory: ~410MB total)
-# Retrieval model: multi-qa-mpnet-base-dot-v1 (~110MB, 768-dim embeddings)
-retriever = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
 # Load PyTorch model for QA
-# Model: roberta-base-squad2 (~355MB, quantized to ~200-250MB)
 try:
-    model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
-    tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
 except Exception as e:
     print(f"Error loading model: {str(e)}. Retrying with force_download=True...")
     # Force re-download in case of corrupted cache
-    model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2", force_download=True)
-    tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2", force_download=True)
 # Set model to evaluation mode
 model.eval()
@@ -116,10 +116,10 @@ def ingest_urls(urls):
 def answer_question(question):
     """
-    Answer a question using retrieved context and RoBERTa QA (PyTorch).
-    Retrieves top 3 paragraphs to improve answer accuracy.
-    If total context exceeds 512 tokens (RoBERTa's max length), it will be truncated automatically.
-    Rejects answers with confidence below 0.3. Ensures answers are one line (max 100 chars).
     """
     global corpus, embeddings, sources_list
     if not corpus or embeddings is None:
@@ -130,15 +130,15 @@ def answer_question(question):
     # Compute cosine similarity with stored embeddings
     cos_scores = util.cos_sim(question_embedding, embeddings)[0]
-    top_k = min(3, len(corpus))  # Get top 3 paragraphs as preferred
     top_indices = np.argsort(-cos_scores)[:top_k]
-    # Retrieve context (top 3 paragraphs)
     contexts = [corpus[i] for i in top_indices]
     context = " ".join(contexts)  # Concatenate with space
     sources = [sources_list[i] for i in top_indices]
-    # Extract answer with RoBERTa (PyTorch)
     with torch.no_grad():  # Disable gradient computation for faster inference
         result = qa_model(question=question, context=context)
     answer = result['answer']
@@ -146,7 +146,7 @@ def answer_question(question):
     # Check confidence threshold
     if confidence < 0.3:
-        return f"No confident answer found (confidence {confidence:.2f} below 0.3)."
     # Truncate answer to one line
     answer = truncate_to_one_line(answer)

 # Web Content Q&A Tool for Hugging Face Spaces
 # Optimized for memory constraints (2GB RAM) and 24-hour timeline
+# Features: Ingest up to 3 URLs, ask questions, get concise one-line answers using DistilBERT with PyTorch
 import gradio as gr
 from bs4 import BeautifulSoup
 embeddings = None  # Precomputed embeddings for retrieval
 sources_list = []  # Source URLs for each paragraph
+# Load models at startup (memory: ~370MB total)
+# Retrieval model: all-mpnet-base-v2 (~110MB, 768-dim embeddings)
+retriever = SentenceTransformer('all-mpnet-base-v2')
 # Load PyTorch model for QA
+# Model: distilbert-base-uncased-distilled-squad (~260MB)
 try:
+    model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad")
+    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
 except Exception as e:
     print(f"Error loading model: {str(e)}. Retrying with force_download=True...")
     # Force re-download in case of corrupted cache
+    model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True)
+    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True)
 # Set model to evaluation mode
 model.eval()
 def answer_question(question):
     """
+    Answer a question using retrieved context and DistilBERT QA (PyTorch).
+    Retrieves top 2 paragraphs to improve answer accuracy.
+    If total context exceeds 512 tokens (DistilBERT's max length), it will be truncated automatically.
+    Ensures answers are one line (max 100 chars). Rejects answers with confidence below 0.3.
     """
     global corpus, embeddings, sources_list
     if not corpus or embeddings is None:
     # Compute cosine similarity with stored embeddings
     cos_scores = util.cos_sim(question_embedding, embeddings)[0]
+    top_k = min(2, len(corpus))  # Get top 2 paragraphs to improve accuracy
     top_indices = np.argsort(-cos_scores)[:top_k]
+    # Retrieve context (top 2 paragraphs)
     contexts = [corpus[i] for i in top_indices]
     context = " ".join(contexts)  # Concatenate with space
     sources = [sources_list[i] for i in top_indices]
+    # Extract answer with DistilBERT (PyTorch)
     with torch.no_grad():  # Disable gradient computation for faster inference
         result = qa_model(question=question, context=context)
     answer = result['answer']
     # Check confidence threshold
     if confidence < 0.3:
+        return f"Unable to answer (confidence {confidence:.2f} below 0.3)."
     # Truncate answer to one line
     answer = truncate_to_one_line(answer)