jchen8000 commited on
Commit
25656b2
·
verified ·
1 Parent(s): a58f7e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -9
app.py CHANGED
@@ -19,7 +19,10 @@ print(f"Pyton version {sys.version}.")
19
  vector_store = None
20
 
21
  # Sample PDF file
22
- sample_filename = "Attention Is All You Need.pdf"
 
 
 
23
 
24
  examples_questions = [["What is Transformer?"],
25
  ["What is Attention?"],
@@ -61,15 +64,17 @@ def index_pdf(pdf):
61
 
62
  return "PDF indexed successfully!"
63
 
64
- def load_sample_pdf():
65
  global vector_store
66
-
67
- # Load the PDF
68
- loader = PyPDFLoader(sample_filename)
69
- documents = loader.load()
 
 
70
 
71
  # Split the documents into chunks
72
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
73
  texts = text_splitter.split_documents(documents)
74
 
75
  # Embed the chunks
@@ -78,7 +83,7 @@ def load_sample_pdf():
78
  # Store the embeddings in the vector store
79
  vector_store = FAISS.from_documents(texts, embeddings)
80
 
81
- return "Sample PDF indexed successfully!"
82
 
83
 
84
  def format_docs(docs):
@@ -139,7 +144,7 @@ with gr.Blocks(theme="Nymbo/Alyx_Theme") as demo:
139
  sample_description = gr.Markdown("This sample PDF is a seminal paper in the field of machine learning, titled 'Attention Is All You Need' at https://arxiv.org/abs/1706.03762. It introduces the Transformer model, which has become foundational in natural language processing.")
140
  index_output = gr.Textbox(label="Indexing Status")
141
  # index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
142
- load_sample.click(load_sample_pdf, inputs=None, outputs=index_output)
143
 
144
  with gr.Tab("Chatbot"):
145
  gr.ChatInterface(
 
19
  vector_store = None
20
 
21
  # Sample PDF file
22
+ sample_filenames = ["Attention Is All You Need.pdf",
23
+ "Generative Adversarial Nets.pdf",
24
+ "Parameter-Efficient Transfer Learning for NLP.pdf",
25
+ ]
26
 
27
  examples_questions = [["What is Transformer?"],
28
  ["What is Attention?"],
 
64
 
65
  return "PDF indexed successfully!"
66
 
67
+ def load_sample_pdf(files):
68
  global vector_store
69
+
70
+ # Load the PDFs
71
+ loaders = [PyPDFLoader(x) for x in files]
72
+ documents = []
73
+ for loader in loaders:
74
+ documents.extend(loader.load())
75
 
76
  # Split the documents into chunks
77
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
78
  texts = text_splitter.split_documents(documents)
79
 
80
  # Embed the chunks
 
83
  # Store the embeddings in the vector store
84
  vector_store = FAISS.from_documents(texts, embeddings)
85
 
86
+ return "Sample PDFs indexed successfully!"
87
 
88
 
89
  def format_docs(docs):
 
144
  sample_description = gr.Markdown("This sample PDF is a seminal paper in the field of machine learning, titled 'Attention Is All You Need' at https://arxiv.org/abs/1706.03762. It introduces the Transformer model, which has become foundational in natural language processing.")
145
  index_output = gr.Textbox(label="Indexing Status")
146
  # index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
147
+ load_sample.click(load_sample_pdf, inputs=sample_filenames, outputs=index_output)
148
 
149
  with gr.Tab("Chatbot"):
150
  gr.ChatInterface(