Shriharsh commited on
Commit
1f0a0b4
·
verified ·
1 Parent(s): 524057e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # Web Content Q&A Tool for Hugging Face Spaces
2
  # Optimized for memory constraints (2GB RAM) and 24-hour timeline
3
- # Features: Ingest up to 3 URLs, ask questions, get concise one-line answers using RoBERTa with PyTorch
4
 
5
  import gradio as gr
6
  from bs4 import BeautifulSoup
@@ -31,20 +31,20 @@ corpus = [] # List of paragraphs from URLs
31
  embeddings = None # Precomputed embeddings for retrieval
32
  sources_list = [] # Source URLs for each paragraph
33
 
34
- # Load models at startup (memory: ~410MB total)
35
- # Retrieval model: multi-qa-mpnet-base-dot-v1 (~110MB, 768-dim embeddings)
36
- retriever = SentenceTransformer('multi-qa-mpnet-base-dot-v1')
37
 
38
  # Load PyTorch model for QA
39
- # Model: roberta-base-squad2 (~355MB, quantized to ~200-250MB)
40
  try:
41
- model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
42
- tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
43
  except Exception as e:
44
  print(f"Error loading model: {str(e)}. Retrying with force_download=True...")
45
  # Force re-download in case of corrupted cache
46
- model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2", force_download=True)
47
- tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2", force_download=True)
48
 
49
  # Set model to evaluation mode
50
  model.eval()
@@ -116,10 +116,10 @@ def ingest_urls(urls):
116
 
117
  def answer_question(question):
118
  """
119
- Answer a question using retrieved context and RoBERTa QA (PyTorch).
120
- Retrieves top 3 paragraphs to improve answer accuracy.
121
- If total context exceeds 512 tokens (RoBERTa's max length), it will be truncated automatically.
122
- Rejects answers with confidence below 0.3. Ensures answers are one line (max 100 chars).
123
  """
124
  global corpus, embeddings, sources_list
125
  if not corpus or embeddings is None:
@@ -130,15 +130,15 @@ def answer_question(question):
130
 
131
  # Compute cosine similarity with stored embeddings
132
  cos_scores = util.cos_sim(question_embedding, embeddings)[0]
133
- top_k = min(3, len(corpus)) # Get top 3 paragraphs as preferred
134
  top_indices = np.argsort(-cos_scores)[:top_k]
135
 
136
- # Retrieve context (top 3 paragraphs)
137
  contexts = [corpus[i] for i in top_indices]
138
  context = " ".join(contexts) # Concatenate with space
139
  sources = [sources_list[i] for i in top_indices]
140
 
141
- # Extract answer with RoBERTa (PyTorch)
142
  with torch.no_grad(): # Disable gradient computation for faster inference
143
  result = qa_model(question=question, context=context)
144
  answer = result['answer']
@@ -146,7 +146,7 @@ def answer_question(question):
146
 
147
  # Check confidence threshold
148
  if confidence < 0.3:
149
- return f"No confident answer found (confidence {confidence:.2f} below 0.3)."
150
 
151
  # Truncate answer to one line
152
  answer = truncate_to_one_line(answer)
 
1
  # Web Content Q&A Tool for Hugging Face Spaces
2
  # Optimized for memory constraints (2GB RAM) and 24-hour timeline
3
+ # Features: Ingest up to 3 URLs, ask questions, get concise one-line answers using DistilBERT with PyTorch
4
 
5
  import gradio as gr
6
  from bs4 import BeautifulSoup
 
31
  embeddings = None # Precomputed embeddings for retrieval
32
  sources_list = [] # Source URLs for each paragraph
33
 
34
+ # Load models at startup (memory: ~370MB total)
35
+ # Retrieval model: all-mpnet-base-v2 (~110MB, 768-dim embeddings)
36
+ retriever = SentenceTransformer('all-mpnet-base-v2')
37
 
38
  # Load PyTorch model for QA
39
+ # Model: distilbert-base-uncased-distilled-squad (~260MB)
40
  try:
41
+ model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad")
42
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad")
43
  except Exception as e:
44
  print(f"Error loading model: {str(e)}. Retrying with force_download=True...")
45
  # Force re-download in case of corrupted cache
46
+ model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True)
47
+ tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True)
48
 
49
  # Set model to evaluation mode
50
  model.eval()
 
116
 
117
  def answer_question(question):
118
  """
119
+ Answer a question using retrieved context and DistilBERT QA (PyTorch).
120
+ Retrieves top 2 paragraphs to improve answer accuracy.
121
+ If total context exceeds 512 tokens (DistilBERT's max length), it will be truncated automatically.
122
+ Ensures answers are one line (max 100 chars). Rejects answers with confidence below 0.3.
123
  """
124
  global corpus, embeddings, sources_list
125
  if not corpus or embeddings is None:
 
130
 
131
  # Compute cosine similarity with stored embeddings
132
  cos_scores = util.cos_sim(question_embedding, embeddings)[0]
133
+ top_k = min(2, len(corpus)) # Get top 2 paragraphs to improve accuracy
134
  top_indices = np.argsort(-cos_scores)[:top_k]
135
 
136
+ # Retrieve context (top 2 paragraphs)
137
  contexts = [corpus[i] for i in top_indices]
138
  context = " ".join(contexts) # Concatenate with space
139
  sources = [sources_list[i] for i in top_indices]
140
 
141
+ # Extract answer with DistilBERT (PyTorch)
142
  with torch.no_grad(): # Disable gradient computation for faster inference
143
  result = qa_model(question=question, context=context)
144
  answer = result['answer']
 
146
 
147
  # Check confidence threshold
148
  if confidence < 0.3:
149
+ return f"Unable to answer (confidence {confidence:.2f} below 0.3)."
150
 
151
  # Truncate answer to one line
152
  answer = truncate_to_one_line(answer)