Spaces:

0x70DA
/

abs-qa-demo

Sleeping

MahmoudH commited on Apr 6, 2023

Commit

332312f

1 Parent(s): 633e625

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,8 +16,8 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 retriever = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1", device=device)
 # Load generation model
-tokenizer = AutoTokenizer.from_pretrained("yjernite/bart_eli5")
-model = AutoModelForSeq2SeqLM.from_pretrained("yjernite/bart_eli5").to(device)
 def scrape(urls: List[str]) -> Dataset:
@@ -64,7 +64,7 @@ def search_web(query: str) -> List[str]:
     # Extract the title and URL of the top search results
     urls = set()
-    for result in search_results[:10]:
         url = result.find("a")["href"]
         if url.startswith("http"):
             urls.add(url)
@@ -83,12 +83,10 @@ def generate_answer(question_doc: str) -> str:
     model_output = model.generate(
         input_ids=q_ids,
         attention_mask=q_mask,
-        min_new_tokens=32,
         max_new_tokens=256,
-        no_repeat_ngram_size=3,
-        num_beams=2,
-        do_sample=True,
         length_penalty=1.5,
     )
     answer = tokenizer.batch_decode(model_output, skip_special_tokens=True)[0]
     return answer.strip()

 retriever = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1", device=device)
 # Load generation model
+tokenizer = AutoTokenizer.from_pretrained("MahmoudH/t5-v1_1-base-abs_qa")
+model = AutoModelForSeq2SeqLM.from_pretrained("MahmoudH/t5-v1_1-base-abs_qa", from_tf=True).to(device)
 def scrape(urls: List[str]) -> Dataset:
     # Extract the title and URL of the top search results
     urls = set()
+    for result in search_results[:5]:
         url = result.find("a")["href"]
         if url.startswith("http"):
             urls.add(url)
     model_output = model.generate(
         input_ids=q_ids,
         attention_mask=q_mask,
         max_new_tokens=256,
         length_penalty=1.5,
+        do_sample=True,
+        num_beams=4
     )
     answer = tokenizer.batch_decode(model_output, skip_special_tokens=True)[0]
     return answer.strip()