Shriharsh commited on
Commit
a951dd8
·
verified ·
1 Parent(s): d2dba3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -17,8 +17,8 @@ sources_list = [] # Source URLs for each paragraph
17
  # Load models at startup (memory: ~340MB total)
18
  # Retrieval model: all-MiniLM-L6-v2 (~80MB, 384-dim embeddings)
19
  retriever = SentenceTransformer('all-MiniLM-L6-v2')
20
- # QA model: DistilBERT fine-tuned on SQuAD (~260MB)
21
- qa_model = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")
22
 
23
  def ingest_urls(urls):
24
  """
@@ -82,7 +82,7 @@ def answer_question(question):
82
 
83
  # Compute cosine similarity with stored embeddings
84
  cos_scores = util.cos_sim(question_embedding, embeddings)[0]
85
- top_k = min(3, len(corpus)) # Get top 3 or less if fewer paragraphs
86
  top_indices = np.argsort(-cos_scores)[:top_k]
87
 
88
  # Retrieve context (top 3 paragraphs)
 
17
  # Load models at startup (memory: ~340MB total)
18
  # Retrieval model: all-MiniLM-L6-v2 (~80MB, 384-dim embeddings)
19
  retriever = SentenceTransformer('all-MiniLM-L6-v2')
20
+ # QA model: Xenova/distilbert-base-uncased-distilled-squad
21
+ qa_model = pipeline("question-answering", model="Xenova/distilbert-base-uncased-distilled-squad")
22
 
23
  def ingest_urls(urls):
24
  """
 
82
 
83
  # Compute cosine similarity with stored embeddings
84
  cos_scores = util.cos_sim(question_embedding, embeddings)[0]
85
+ top_k = min(1, len(corpus)) # Get topmost or less if fewer paragraphs
86
  top_indices = np.argsort(-cos_scores)[:top_k]
87
 
88
  # Retrieve context (top 3 paragraphs)