Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -33,6 +33,8 @@ import torchaudio
|
|
33 |
import numpy as np
|
34 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
35 |
from langchain_huggingface import HuggingFaceEmbeddings
|
|
|
|
|
36 |
|
37 |
|
38 |
# Neo4j imports
|
@@ -126,12 +128,12 @@ gpt4o_mini_model = initialize_gpt4o_mini_model()
|
|
126 |
|
127 |
# Existing embeddings and vector store for GPT-4o
|
128 |
gpt_embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
|
129 |
-
gpt_vectorstore = PineconeVectorStore(index_name="
|
130 |
gpt_retriever = gpt_vectorstore.as_retriever(search_kwargs={'k': 5})
|
131 |
|
132 |
# New vector store setup for Phi-3.5
|
133 |
phi_embeddings = embeddings
|
134 |
-
phi_vectorstore = PineconeVectorStore(index_name="
|
135 |
phi_retriever = phi_vectorstore.as_retriever(search_kwargs={'k': 5})
|
136 |
|
137 |
|
@@ -142,7 +144,8 @@ phi_retriever = phi_vectorstore.as_retriever(search_kwargs={'k': 5})
|
|
142 |
from pinecone import Pinecone
|
143 |
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
|
144 |
|
145 |
-
index_name = "italyopenai"
|
|
|
146 |
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
|
147 |
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
|
148 |
|
@@ -1562,6 +1565,24 @@ def fetch_google_flights(departure_id="JFK", arrival_id="BHM", outbound_date=cur
|
|
1562 |
# def insert_prompt(current_text, prompt):
|
1563 |
# return prompt[0] if prompt else current_text
|
1564 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1565 |
|
1566 |
|
1567 |
|
@@ -1669,7 +1690,16 @@ with gr.Blocks(theme='gradio/soft') as demo:
|
|
1669 |
# refresh_button = gr.Button("Refresh Images")
|
1670 |
# refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
|
1671 |
|
1672 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1673 |
|
1674 |
|
1675 |
|
|
|
33 |
import numpy as np
|
34 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
35 |
from langchain_huggingface import HuggingFaceEmbeddings
|
36 |
+
from langchain_community.document_loaders import PDFPlumberLoader
|
37 |
+
import pdfplumber
|
38 |
|
39 |
|
40 |
# Neo4j imports
|
|
|
128 |
|
129 |
# Existing embeddings and vector store for GPT-4o
|
130 |
gpt_embeddings = OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
|
131 |
+
gpt_vectorstore = PineconeVectorStore(index_name="italy-pdf", embedding=gpt_embeddings)
|
132 |
gpt_retriever = gpt_vectorstore.as_retriever(search_kwargs={'k': 5})
|
133 |
|
134 |
# New vector store setup for Phi-3.5
|
135 |
phi_embeddings = embeddings
|
136 |
+
phi_vectorstore = PineconeVectorStore(index_name="italy-pdf", embedding=embeddings)
|
137 |
phi_retriever = phi_vectorstore.as_retriever(search_kwargs={'k': 5})
|
138 |
|
139 |
|
|
|
144 |
from pinecone import Pinecone
|
145 |
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'])
|
146 |
|
147 |
+
# index_name = "italyopenai"
|
148 |
+
index_name = "italy-pdf"
|
149 |
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)
|
150 |
retriever = vectorstore.as_retriever(search_kwargs={'k': 5})
|
151 |
|
|
|
1565 |
# def insert_prompt(current_text, prompt):
|
1566 |
# return prompt[0] if prompt else current_text
|
1567 |
|
1568 |
+
# Function to process PDF, extract text, split it into chunks, and upload to the vector DB
|
1569 |
+
def process_pdf(pdf_file):
|
1570 |
+
with pdfplumber.open(pdf_file) as pdf:
|
1571 |
+
all_text = ""
|
1572 |
+
for page in pdf.pages:
|
1573 |
+
all_text += page.extract_text()
|
1574 |
+
|
1575 |
+
# Split the text into chunks
|
1576 |
+
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
1577 |
+
chunks = text_splitter.split_text(all_text)
|
1578 |
+
|
1579 |
+
# Embed and upload the chunks into the vector database
|
1580 |
+
chunk_ids = []
|
1581 |
+
for chunk in chunks:
|
1582 |
+
chunk_id = vector_store.add_documents([chunk])
|
1583 |
+
chunk_ids.append(chunk_id)
|
1584 |
+
|
1585 |
+
return f"Uploaded {len(chunks)} chunks to the vector database."
|
1586 |
|
1587 |
|
1588 |
|
|
|
1690 |
# refresh_button = gr.Button("Refresh Images")
|
1691 |
# refresh_button.click(fn=update_images, inputs=None, outputs=[image_output_1, image_output_2, image_output_3])
|
1692 |
|
1693 |
+
# File upload component
|
1694 |
+
with gr.Column():
|
1695 |
+
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
1696 |
+
# Button to trigger processing
|
1697 |
+
process_button = gr.Button("Process PDF and Upload")
|
1698 |
+
# Output textbox for results
|
1699 |
+
output_textbox = gr.Textbox(label="Result")
|
1700 |
+
|
1701 |
+
# Define button click action
|
1702 |
+
process_button.click(fn=process_pdf, inputs=file_input, outputs=output_textbox)
|
1703 |
|
1704 |
|
1705 |
|