Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,26 +1,31 @@
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
import fitz # PyMuPDF
|
4 |
-
import
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
-
from pinecone import Pinecone, ServerlessSpec
|
7 |
|
8 |
# Load the environment variables from the .env file
|
9 |
load_dotenv()
|
10 |
-
|
11 |
-
pinecone_api_key = os.getenv('PINECONE_API_KEY')
|
12 |
-
pinecone_environment = os.getenv('PINECONE_ENVIRONMENT')
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
# Streamlit app
|
22 |
st.title("Chat with Your Document")
|
23 |
-
st.write("Upload a PDF file to chat with its content using
|
24 |
|
25 |
# File upload
|
26 |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
|
@@ -33,38 +38,38 @@ if uploaded_file is not None:
|
|
33 |
page = pdf_document.load_page(page_num)
|
34 |
pdf_text += page.get_text()
|
35 |
|
36 |
-
#
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
if index_name not in pc.list_indexes().names():
|
42 |
-
pc.create_index(
|
43 |
-
name=index_name,
|
44 |
-
dimension=512,
|
45 |
-
metric='euclidean',
|
46 |
-
spec=ServerlessSpec(cloud='aws', region='us-east-1')
|
47 |
-
)
|
48 |
-
vector_store = pc.Index(index_name)
|
49 |
|
50 |
-
#
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
57 |
|
58 |
# Chat with the document
|
59 |
user_input = st.text_input("Ask a question about the document:")
|
60 |
if st.button("Ask"):
|
61 |
if user_input:
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
else:
|
69 |
st.write("Please enter a question to ask.")
|
70 |
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
import fitz # PyMuPDF
|
4 |
+
from google.cloud import language_v1
|
5 |
+
from google.oauth2 import service_account
|
6 |
from dotenv import load_dotenv
|
|
|
7 |
|
8 |
# Load the environment variables from the .env file
|
9 |
load_dotenv()
|
10 |
+
google_api_key = os.getenv('GOOGLE_API_KEY')
|
|
|
|
|
11 |
|
12 |
+
# Initialize Google Cloud client using API key
|
13 |
+
client = language_v1.LanguageServiceClient.from_service_account_info({
|
14 |
+
"type": "service_account",
|
15 |
+
"project_id": "your-project-id",
|
16 |
+
"private_key_id": "your-private-key-id",
|
17 |
+
"private_key": "your-private-key",
|
18 |
+
"client_email": "your-client-email",
|
19 |
+
"client_id": "your-client-id",
|
20 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
21 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
22 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
23 |
+
"client_x509_cert_url": "your-cert-url"
|
24 |
+
})
|
25 |
|
26 |
# Streamlit app
|
27 |
st.title("Chat with Your Document")
|
28 |
+
st.write("Upload a PDF file to chat with its content using Google's Gemini API.")
|
29 |
|
30 |
# File upload
|
31 |
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
|
|
|
38 |
page = pdf_document.load_page(page_num)
|
39 |
pdf_text += page.get_text()
|
40 |
|
41 |
+
# Process the PDF text with Google's embedding model
|
42 |
+
document = language_v1.Document(content=pdf_text, type_=language_v1.Document.Type.PLAIN_TEXT)
|
43 |
+
response = client.analyze_entities(document=document)
|
44 |
+
|
45 |
+
entities = response.entities
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
# Extract entities and their embeddings
|
48 |
+
pdf_embeddings = []
|
49 |
+
for entity in entities:
|
50 |
+
pdf_embeddings.append({
|
51 |
+
'name': entity.name,
|
52 |
+
'type': language_v1.Entity.Type(entity.type_).name,
|
53 |
+
'salience': entity.salience
|
54 |
+
})
|
55 |
|
56 |
# Chat with the document
|
57 |
user_input = st.text_input("Ask a question about the document:")
|
58 |
if st.button("Ask"):
|
59 |
if user_input:
|
60 |
+
# Process the user input with Google's embedding model
|
61 |
+
document = language_v1.Document(content=user_input, type_=language_v1.Document.Type.PLAIN_TEXT)
|
62 |
+
response = client.analyze_entities(document=document)
|
63 |
+
user_entities = response.entities
|
64 |
+
|
65 |
+
# Match user question with PDF content
|
66 |
+
response_text = "Here are some key entities from the document:\n"
|
67 |
+
for entity in user_entities:
|
68 |
+
for pdf_entity in pdf_embeddings:
|
69 |
+
if pdf_entity['name'] == entity.name:
|
70 |
+
response_text += f"Entity: {pdf_entity['name']}, Type: {pdf_entity['type']}, Salience: {pdf_entity['salience']}\n"
|
71 |
+
|
72 |
+
st.write(response_text.strip())
|
73 |
else:
|
74 |
st.write("Please enter a question to ask.")
|
75 |
|