Spaces:

Shriharsh
/

Web_Content_QA

Running

Shriharsh commited on Mar 20

Commit

5a35f4a

verified ·

1 Parent(s): 5334719

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,8 +22,11 @@ retriever = SentenceTransformer('all-MiniLM-L6-v2')
 # Load ONNX model for QA using optimum.onnxruntime
 # Model: Xenova/distilbert-base-uncased-distilled-squad (~260MB)
-# Use ORTModelForQuestionAnswering to load the ONNX model
-model = ORTModelForQuestionAnswering.from_pretrained("Xenova/distilbert-base-uncased-distilled-squad")
 tokenizer = AutoTokenizer.from_pretrained("Xenova/distilbert-base-uncased-distilled-squad")
 qa_model = pipeline("question-answering", model=model, tokenizer=tokenizer, framework="ort")
@@ -89,7 +92,7 @@ def answer_question(question):
     # Compute cosine similarity with stored embeddings
     cos_scores = util.cos_sim(question_embedding, embeddings)[0]
-    top_k = min(2, len(corpus))  # Get top 3 or less if fewer paragraphs
     top_indices = np.argsort(-cos_scores)[:top_k]
     # Retrieve context (top 3 paragraphs)

 # Load ONNX model for QA using optimum.onnxruntime
 # Model: Xenova/distilbert-base-uncased-distilled-squad (~260MB)
+# Specify file_name="model.onnx" to select the correct ONNX file
+model = ORTModelForQuestionAnswering.from_pretrained(
+    "Xenova/distilbert-base-uncased-distilled-squad",
+    file_name="model.onnx"
+)
 tokenizer = AutoTokenizer.from_pretrained("Xenova/distilbert-base-uncased-distilled-squad")
 qa_model = pipeline("question-answering", model=model, tokenizer=tokenizer, framework="ort")
     # Compute cosine similarity with stored embeddings
     cos_scores = util.cos_sim(question_embedding, embeddings)[0]
+    top_k = min(2, len(corpus))  # Get top 2 or less if fewer paragraphs
     top_indices = np.argsort(-cos_scores)[:top_k]
     # Retrieve context (top 3 paragraphs)