Spaces:

legacy107
/

flan-t5-large-ia3-newsqa

Sleeping

legacy107 commited on Nov 3, 2023

Commit

8e1c673

1 Parent(s): c462daf

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,12 +12,12 @@ import nltk
 nltk.download('punkt')
 # Load bi encoder
-top_k = 10
 cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
 # Load your fine-tuned model and tokenizer
 model_name = "google/flan-t5-large"
-peft_name = "legacy107/flan-t5-large-ia3-newsqa"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 pretrained_model = T5ForConditionalGeneration.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
@@ -31,7 +31,7 @@ dataset = dataset.shuffle()
 dataset = dataset.select(range(10))
 # Context chunking
-def chunk_splitter(context, chunk_size=50, overlap=0.10):
     overlap_size = chunk_size * overlap
     sentences = nltk.sent_tokenize(context)
@@ -75,7 +75,7 @@ def retrieve_context(query, contexts):
     hits = sorted(hits, key=lambda x: x["cross-score"], reverse=True)
     return " ".join(
-        [contexts[hit["corpus_id"]] for hit in hits[0:top_k]]
     ).replace("\n", " ")

 nltk.download('punkt')
 # Load bi encoder
+# top_k = 10
 cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
 # Load your fine-tuned model and tokenizer
 model_name = "google/flan-t5-large"
+peft_name = "legacy107/flan-t5-large-ia3-newsqa-100"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 pretrained_model = T5ForConditionalGeneration.from_pretrained(model_name)
 model = T5ForConditionalGeneration.from_pretrained(model_name)
 dataset = dataset.select(range(10))
 # Context chunking
+def chunk_splitter(context, chunk_size=100, overlap=0.10):
     overlap_size = chunk_size * overlap
     sentences = nltk.sent_tokenize(context)
     hits = sorted(hits, key=lambda x: x["cross-score"], reverse=True)
     return " ".join(
+        [contexts[hit["corpus_id"]] for hit in hits]
     ).replace("\n", " ")