Spaces:

Shriharsh
/

Web_Content_QA

Sleeping

App Files Files Community

Shriharsh commited on Mar 20

Commit

aaaa3f2

verified ·

1 Parent(s): a951dd8

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -7

app.py CHANGED Viewed

@@ -1,13 +1,15 @@
 # Web Content Q&A Tool for Hugging Face Spaces
 # Optimized for memory constraints (2GB RAM) and 24-hour timeline
-# Features: Ingest up to 3 URLs, ask questions, get concise answers using DistilBERT
 import gradio as gr
 from bs4 import BeautifulSoup
 import requests
 from sentence_transformers import SentenceTransformer, util
 import numpy as np
-from transformers import pipeline
 # Global variables for in-memory storage (reset on app restart)
 corpus = []  # List of paragraphs from URLs
@@ -17,8 +19,13 @@ sources_list = []  # Source URLs for each paragraph
 # Load models at startup (memory: ~340MB total)
 # Retrieval model: all-MiniLM-L6-v2 (~80MB, 384-dim embeddings)
 retriever = SentenceTransformer('all-MiniLM-L6-v2')
-# QA model: Xenova/distilbert-base-uncased-distilled-squad
-qa_model = pipeline("question-answering", model="Xenova/distilbert-base-uncased-distilled-squad")
 def ingest_urls(urls):
     """
@@ -69,7 +76,7 @@ def ingest_urls(urls):
 def answer_question(question):
     """
-    Answer a question using retrieved context and DistilBERT QA.
     Retrieves top 3 paragraphs to provide broader context for cross-questioning.
     If total context exceeds 512 tokens (DistilBERT's max length), it will be truncated automatically.
     """
@@ -82,7 +89,7 @@ def answer_question(question):
     # Compute cosine similarity with stored embeddings
     cos_scores = util.cos_sim(question_embedding, embeddings)[0]
-    top_k = min(1, len(corpus))  # Get topmost or less if fewer paragraphs
     top_indices = np.argsort(-cos_scores)[:top_k]
     # Retrieve context (top 3 paragraphs)
@@ -90,7 +97,7 @@ def answer_question(question):
     context = " ".join(contexts)  # Concatenate with space
     sources = [sources_list[i] for i in top_indices]
-    # Extract answer with DistilBERT
     # Note: If total tokens exceed 512, it will be truncated automatically
     result = qa_model(question=question, context=context)
     answer = result['answer']

 # Web Content Q&A Tool for Hugging Face Spaces
 # Optimized for memory constraints (2GB RAM) and 24-hour timeline
+# Features: Ingest up to 3 URLs, ask questions, get concise answers using DistilBERT with ONNX
 import gradio as gr
 from bs4 import BeautifulSoup
 import requests
 from sentence_transformers import SentenceTransformer, util
 import numpy as np
+from optimum.onnxruntime import ORTModelForQuestionAnswering
+from transformers import AutoTokenizer
+from optimum.pipelines import pipeline
 # Global variables for in-memory storage (reset on app restart)
 corpus = []  # List of paragraphs from URLs
 # Load models at startup (memory: ~340MB total)
 # Retrieval model: all-MiniLM-L6-v2 (~80MB, 384-dim embeddings)
 retriever = SentenceTransformer('all-MiniLM-L6-v2')
+# Load ONNX model for QA using optimum.onnxruntime
+# Model: Xenova/distilbert-base-uncased-distilled-squad (~260MB)
+# Use ORTModelForQuestionAnswering to load the ONNX model
+model = ORTModelForQuestionAnswering.from_pretrained("Xenova/distilbert-base-uncased-distilled-squad")
+tokenizer = AutoTokenizer.from_pretrained("Xenova/distilbert-base-uncased-distilled-squad")
+qa_model = pipeline("question-answering", model=model, tokenizer=tokenizer, framework="ort")
 def ingest_urls(urls):
     """
 def answer_question(question):
     """
+    Answer a question using retrieved context and DistilBERT QA (ONNX).
     Retrieves top 3 paragraphs to provide broader context for cross-questioning.
     If total context exceeds 512 tokens (DistilBERT's max length), it will be truncated automatically.
     """
     # Compute cosine similarity with stored embeddings
     cos_scores = util.cos_sim(question_embedding, embeddings)[0]
+    top_k = min(2, len(corpus))  # Get top 3 or less if fewer paragraphs
     top_indices = np.argsort(-cos_scores)[:top_k]
     # Retrieve context (top 3 paragraphs)
     context = " ".join(contexts)  # Concatenate with space
     sources = [sources_list[i] for i in top_indices]
+    # Extract answer with DistilBERT (ONNX)
     # Note: If total tokens exceed 512, it will be truncated automatically
     result = qa_model(question=question, context=context)
     answer = result['answer']