Spaces:

jchen8000
/

RAG_Demo

Sleeping

App Files Files Community

jchen8000 commited on May 28

Commit

25656b2

verified ·

1 Parent(s): a58f7e6

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -9

app.py CHANGED Viewed

@@ -19,7 +19,10 @@ print(f"Pyton version {sys.version}.")
 vector_store = None
 # Sample PDF file
-sample_filename = "Attention Is All You Need.pdf"
 examples_questions = [["What is Transformer?"],
             ["What is Attention?"],
@@ -61,15 +64,17 @@ def index_pdf(pdf):
     return "PDF indexed successfully!"
-def load_sample_pdf():
     global vector_store
-    # Load the PDF
-    loader = PyPDFLoader(sample_filename)
-    documents = loader.load()
     # Split the documents into chunks
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
     texts = text_splitter.split_documents(documents)
     # Embed the chunks
@@ -78,7 +83,7 @@ def load_sample_pdf():
     # Store the embeddings in the vector store
     vector_store = FAISS.from_documents(texts, embeddings)
-    return "Sample PDF indexed successfully!"
 def format_docs(docs):
@@ -139,7 +144,7 @@ with gr.Blocks(theme="Nymbo/Alyx_Theme") as demo:
         sample_description = gr.Markdown("This sample PDF is a seminal paper in the field of machine learning, titled 'Attention Is All You Need' at https://arxiv.org/abs/1706.03762. It introduces the Transformer model, which has become foundational in natural language processing.")
         index_output = gr.Textbox(label="Indexing Status")
         # index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
-        load_sample.click(load_sample_pdf, inputs=None, outputs=index_output)
     with gr.Tab("Chatbot"):
         gr.ChatInterface(

 vector_store = None
 # Sample PDF file
+sample_filenames = ["Attention Is All You Need.pdf",
+                   "Generative Adversarial Nets.pdf",
+                   "Parameter-Efficient Transfer Learning for NLP.pdf",
+                  ]
 examples_questions = [["What is Transformer?"],
             ["What is Attention?"],
     return "PDF indexed successfully!"
+def load_sample_pdf(files):
     global vector_store
+    # Load the PDFs
+    loaders = [PyPDFLoader(x) for x in files]
+    documents = []
+    for loader in loaders:
+        documents.extend(loader.load())
     # Split the documents into chunks
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
     texts = text_splitter.split_documents(documents)
     # Embed the chunks
     # Store the embeddings in the vector store
     vector_store = FAISS.from_documents(texts, embeddings)
+    return "Sample PDFs indexed successfully!"
 def format_docs(docs):
         sample_description = gr.Markdown("This sample PDF is a seminal paper in the field of machine learning, titled 'Attention Is All You Need' at https://arxiv.org/abs/1706.03762. It introduces the Transformer model, which has become foundational in natural language processing.")
         index_output = gr.Textbox(label="Indexing Status")
         # index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
+        load_sample.click(load_sample_pdf, inputs=sample_filenames, outputs=index_output)
     with gr.Tab("Chatbot"):
         gr.ChatInterface(