Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
# Web Content Q&A Tool for Hugging Face Spaces
|
2 |
# Optimized for memory constraints (2GB RAM) and 24-hour timeline
|
3 |
-
# Features: Ingest up to 3 URLs, ask questions, get concise one-line answers using
|
4 |
|
5 |
import gradio as gr
|
6 |
from bs4 import BeautifulSoup
|
@@ -31,20 +31,20 @@ corpus = [] # List of paragraphs from URLs
|
|
31 |
embeddings = None # Precomputed embeddings for retrieval
|
32 |
sources_list = [] # Source URLs for each paragraph
|
33 |
|
34 |
-
# Load models at startup (memory: ~
|
35 |
-
# Retrieval model:
|
36 |
-
retriever = SentenceTransformer('
|
37 |
|
38 |
# Load PyTorch model for QA
|
39 |
-
# Model:
|
40 |
try:
|
41 |
-
model = AutoModelForQuestionAnswering.from_pretrained("
|
42 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
43 |
except Exception as e:
|
44 |
print(f"Error loading model: {str(e)}. Retrying with force_download=True...")
|
45 |
# Force re-download in case of corrupted cache
|
46 |
-
model = AutoModelForQuestionAnswering.from_pretrained("
|
47 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
48 |
|
49 |
# Set model to evaluation mode
|
50 |
model.eval()
|
@@ -116,10 +116,10 @@ def ingest_urls(urls):
|
|
116 |
|
117 |
def answer_question(question):
|
118 |
"""
|
119 |
-
Answer a question using retrieved context and
|
120 |
-
Retrieves top
|
121 |
-
If total context exceeds 512 tokens (
|
122 |
-
|
123 |
"""
|
124 |
global corpus, embeddings, sources_list
|
125 |
if not corpus or embeddings is None:
|
@@ -130,15 +130,15 @@ def answer_question(question):
|
|
130 |
|
131 |
# Compute cosine similarity with stored embeddings
|
132 |
cos_scores = util.cos_sim(question_embedding, embeddings)[0]
|
133 |
-
top_k = min(
|
134 |
top_indices = np.argsort(-cos_scores)[:top_k]
|
135 |
|
136 |
-
# Retrieve context (top
|
137 |
contexts = [corpus[i] for i in top_indices]
|
138 |
context = " ".join(contexts) # Concatenate with space
|
139 |
sources = [sources_list[i] for i in top_indices]
|
140 |
|
141 |
-
# Extract answer with
|
142 |
with torch.no_grad(): # Disable gradient computation for faster inference
|
143 |
result = qa_model(question=question, context=context)
|
144 |
answer = result['answer']
|
@@ -146,7 +146,7 @@ def answer_question(question):
|
|
146 |
|
147 |
# Check confidence threshold
|
148 |
if confidence < 0.3:
|
149 |
-
return f"
|
150 |
|
151 |
# Truncate answer to one line
|
152 |
answer = truncate_to_one_line(answer)
|
|
|
1 |
# Web Content Q&A Tool for Hugging Face Spaces
|
2 |
# Optimized for memory constraints (2GB RAM) and 24-hour timeline
|
3 |
+
# Features: Ingest up to 3 URLs, ask questions, get concise one-line answers using DistilBERT with PyTorch
|
4 |
|
5 |
import gradio as gr
|
6 |
from bs4 import BeautifulSoup
|
|
|
31 |
embeddings = None # Precomputed embeddings for retrieval
|
32 |
sources_list = [] # Source URLs for each paragraph
|
33 |
|
34 |
+
# Load models at startup (memory: ~370MB total)
|
35 |
+
# Retrieval model: all-mpnet-base-v2 (~110MB, 768-dim embeddings)
|
36 |
+
retriever = SentenceTransformer('all-mpnet-base-v2')
|
37 |
|
38 |
# Load PyTorch model for QA
|
39 |
+
# Model: distilbert-base-uncased-distilled-squad (~260MB)
|
40 |
try:
|
41 |
+
model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad")
|
42 |
+
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
|
43 |
except Exception as e:
|
44 |
print(f"Error loading model: {str(e)}. Retrying with force_download=True...")
|
45 |
# Force re-download in case of corrupted cache
|
46 |
+
model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True)
|
47 |
+
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True)
|
48 |
|
49 |
# Set model to evaluation mode
|
50 |
model.eval()
|
|
|
116 |
|
117 |
def answer_question(question):
|
118 |
"""
|
119 |
+
Answer a question using retrieved context and DistilBERT QA (PyTorch).
|
120 |
+
Retrieves top 2 paragraphs to improve answer accuracy.
|
121 |
+
If total context exceeds 512 tokens (DistilBERT's max length), it will be truncated automatically.
|
122 |
+
Ensures answers are one line (max 100 chars). Rejects answers with confidence below 0.3.
|
123 |
"""
|
124 |
global corpus, embeddings, sources_list
|
125 |
if not corpus or embeddings is None:
|
|
|
130 |
|
131 |
# Compute cosine similarity with stored embeddings
|
132 |
cos_scores = util.cos_sim(question_embedding, embeddings)[0]
|
133 |
+
top_k = min(2, len(corpus)) # Get top 2 paragraphs to improve accuracy
|
134 |
top_indices = np.argsort(-cos_scores)[:top_k]
|
135 |
|
136 |
+
# Retrieve context (top 2 paragraphs)
|
137 |
contexts = [corpus[i] for i in top_indices]
|
138 |
context = " ".join(contexts) # Concatenate with space
|
139 |
sources = [sources_list[i] for i in top_indices]
|
140 |
|
141 |
+
# Extract answer with DistilBERT (PyTorch)
|
142 |
with torch.no_grad(): # Disable gradient computation for faster inference
|
143 |
result = qa_model(question=question, context=context)
|
144 |
answer = result['answer']
|
|
|
146 |
|
147 |
# Check confidence threshold
|
148 |
if confidence < 0.3:
|
149 |
+
return f"Unable to answer (confidence {confidence:.2f} below 0.3)."
|
150 |
|
151 |
# Truncate answer to one line
|
152 |
answer = truncate_to_one_line(answer)
|