Waseemhassan771 commited on
Commit
eefd852
·
verified ·
1 Parent(s): ff60d27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -38
app.py CHANGED
@@ -1,26 +1,31 @@
1
  import os
2
  import streamlit as st
3
  import fitz # PyMuPDF
4
- import openai
 
5
  from dotenv import load_dotenv
6
- from pinecone import Pinecone, ServerlessSpec
7
 
8
  # Load the environment variables from the .env file
9
  load_dotenv()
10
- openai_api_key = os.getenv('OPENAI_API_KEY')
11
- pinecone_api_key = os.getenv('PINECONE_API_KEY')
12
- pinecone_environment = os.getenv('PINECONE_ENVIRONMENT')
13
 
14
- # Debugging: Print the API keys
15
- print(f"OpenAI API Key: {openai_api_key}")
16
- print(f"Pinecone API Key: {pinecone_api_key}")
17
-
18
- # Initialize Pinecone
19
- pc = Pinecone(api_key=pinecone_api_key)
 
 
 
 
 
 
 
20
 
21
  # Streamlit app
22
  st.title("Chat with Your Document")
23
- st.write("Upload a PDF file to chat with its content using Pinecone and OpenAI.")
24
 
25
  # File upload
26
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
@@ -33,38 +38,38 @@ if uploaded_file is not None:
33
  page = pdf_document.load_page(page_num)
34
  pdf_text += page.get_text()
35
 
36
- # Initialize OpenAI embeddings
37
- openai.api_key = openai_api_key
38
-
39
- # Create a Pinecone vector store
40
- index_name = "pdf-analysis"
41
- if index_name not in pc.list_indexes().names():
42
- pc.create_index(
43
- name=index_name,
44
- dimension=512,
45
- metric='euclidean',
46
- spec=ServerlessSpec(cloud='aws', region='us-east-1')
47
- )
48
- vector_store = pc.Index(index_name)
49
 
50
- # Add the PDF text to the vector store
51
- vector_store.upsert([
52
- (str(i), openai.Embedding.create(
53
- input=pdf_text[i],
54
- model='text-embedding-ada-002'
55
- )["data"][0]["embedding"]) for i in range(len(pdf_text))
56
- ])
 
57
 
58
  # Chat with the document
59
  user_input = st.text_input("Ask a question about the document:")
60
  if st.button("Ask"):
61
  if user_input:
62
- response = openai.Completion.create(
63
- engine="davinci",
64
- prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}",
65
- max_tokens=150
66
- )
67
- st.write(response.choices[0].text.strip())
 
 
 
 
 
 
 
68
  else:
69
  st.write("Please enter a question to ask.")
70
 
 
1
  import os
2
  import streamlit as st
3
  import fitz # PyMuPDF
4
+ from google.cloud import language_v1
5
+ from google.oauth2 import service_account
6
  from dotenv import load_dotenv
 
7
 
8
  # Load the environment variables from the .env file
9
  load_dotenv()
10
+ google_api_key = os.getenv('GOOGLE_API_KEY')
 
 
11
 
12
+ # Initialize Google Cloud client using API key
13
+ client = language_v1.LanguageServiceClient.from_service_account_info({
14
+ "type": "service_account",
15
+ "project_id": "your-project-id",
16
+ "private_key_id": "your-private-key-id",
17
+ "private_key": "your-private-key",
18
+ "client_email": "your-client-email",
19
+ "client_id": "your-client-id",
20
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
21
+ "token_uri": "https://oauth2.googleapis.com/token",
22
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
23
+ "client_x509_cert_url": "your-cert-url"
24
+ })
25
 
26
  # Streamlit app
27
  st.title("Chat with Your Document")
28
+ st.write("Upload a PDF file to chat with its content using Google's Gemini API.")
29
 
30
  # File upload
31
  uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
 
38
  page = pdf_document.load_page(page_num)
39
  pdf_text += page.get_text()
40
 
41
+ # Process the PDF text with Google's embedding model
42
+ document = language_v1.Document(content=pdf_text, type_=language_v1.Document.Type.PLAIN_TEXT)
43
+ response = client.analyze_entities(document=document)
44
+
45
+ entities = response.entities
 
 
 
 
 
 
 
 
46
 
47
+ # Extract entities and their embeddings
48
+ pdf_embeddings = []
49
+ for entity in entities:
50
+ pdf_embeddings.append({
51
+ 'name': entity.name,
52
+ 'type': language_v1.Entity.Type(entity.type_).name,
53
+ 'salience': entity.salience
54
+ })
55
 
56
  # Chat with the document
57
  user_input = st.text_input("Ask a question about the document:")
58
  if st.button("Ask"):
59
  if user_input:
60
+ # Process the user input with Google's embedding model
61
+ document = language_v1.Document(content=user_input, type_=language_v1.Document.Type.PLAIN_TEXT)
62
+ response = client.analyze_entities(document=document)
63
+ user_entities = response.entities
64
+
65
+ # Match user question with PDF content
66
+ response_text = "Here are some key entities from the document:\n"
67
+ for entity in user_entities:
68
+ for pdf_entity in pdf_embeddings:
69
+ if pdf_entity['name'] == entity.name:
70
+ response_text += f"Entity: {pdf_entity['name']}, Type: {pdf_entity['type']}, Salience: {pdf_entity['salience']}\n"
71
+
72
+ st.write(response_text.strip())
73
  else:
74
  st.write("Please enter a question to ask.")
75