Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,10 @@ print(f"Pyton version {sys.version}.")
|
|
19 |
vector_store = None
|
20 |
|
21 |
# Sample PDF file
|
22 |
-
|
|
|
|
|
|
|
23 |
|
24 |
examples_questions = [["What is Transformer?"],
|
25 |
["What is Attention?"],
|
@@ -61,15 +64,17 @@ def index_pdf(pdf):
|
|
61 |
|
62 |
return "PDF indexed successfully!"
|
63 |
|
64 |
-
def load_sample_pdf():
|
65 |
global vector_store
|
66 |
-
|
67 |
-
# Load the
|
68 |
-
|
69 |
-
documents =
|
|
|
|
|
70 |
|
71 |
# Split the documents into chunks
|
72 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=
|
73 |
texts = text_splitter.split_documents(documents)
|
74 |
|
75 |
# Embed the chunks
|
@@ -78,7 +83,7 @@ def load_sample_pdf():
|
|
78 |
# Store the embeddings in the vector store
|
79 |
vector_store = FAISS.from_documents(texts, embeddings)
|
80 |
|
81 |
-
return "Sample
|
82 |
|
83 |
|
84 |
def format_docs(docs):
|
@@ -139,7 +144,7 @@ with gr.Blocks(theme="Nymbo/Alyx_Theme") as demo:
|
|
139 |
sample_description = gr.Markdown("This sample PDF is a seminal paper in the field of machine learning, titled 'Attention Is All You Need' at https://arxiv.org/abs/1706.03762. It introduces the Transformer model, which has become foundational in natural language processing.")
|
140 |
index_output = gr.Textbox(label="Indexing Status")
|
141 |
# index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
|
142 |
-
load_sample.click(load_sample_pdf, inputs=
|
143 |
|
144 |
with gr.Tab("Chatbot"):
|
145 |
gr.ChatInterface(
|
|
|
19 |
vector_store = None
|
20 |
|
21 |
# Sample PDF file
|
22 |
+
sample_filenames = ["Attention Is All You Need.pdf",
|
23 |
+
"Generative Adversarial Nets.pdf",
|
24 |
+
"Parameter-Efficient Transfer Learning for NLP.pdf",
|
25 |
+
]
|
26 |
|
27 |
examples_questions = [["What is Transformer?"],
|
28 |
["What is Attention?"],
|
|
|
64 |
|
65 |
return "PDF indexed successfully!"
|
66 |
|
67 |
+
def load_sample_pdf(files):
|
68 |
global vector_store
|
69 |
+
|
70 |
+
# Load the PDFs
|
71 |
+
loaders = [PyPDFLoader(x) for x in files]
|
72 |
+
documents = []
|
73 |
+
for loader in loaders:
|
74 |
+
documents.extend(loader.load())
|
75 |
|
76 |
# Split the documents into chunks
|
77 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
|
78 |
texts = text_splitter.split_documents(documents)
|
79 |
|
80 |
# Embed the chunks
|
|
|
83 |
# Store the embeddings in the vector store
|
84 |
vector_store = FAISS.from_documents(texts, embeddings)
|
85 |
|
86 |
+
return "Sample PDFs indexed successfully!"
|
87 |
|
88 |
|
89 |
def format_docs(docs):
|
|
|
144 |
sample_description = gr.Markdown("This sample PDF is a seminal paper in the field of machine learning, titled 'Attention Is All You Need' at https://arxiv.org/abs/1706.03762. It introduces the Transformer model, which has become foundational in natural language processing.")
|
145 |
index_output = gr.Textbox(label="Indexing Status")
|
146 |
# index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
|
147 |
+
load_sample.click(load_sample_pdf, inputs=sample_filenames, outputs=index_output)
|
148 |
|
149 |
with gr.Tab("Chatbot"):
|
150 |
gr.ChatInterface(
|