Spaces:

Waseemhassan771
/

chat_document

Running

App Files Files Community

Waseemhassan771 commited on Feb 24

Commit

eefd852

verified ·

1 Parent(s): ff60d27

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -38

app.py CHANGED Viewed

@@ -1,26 +1,31 @@
 import os
 import streamlit as st
 import fitz  # PyMuPDF
-import openai
 from dotenv import load_dotenv
-from pinecone import Pinecone, ServerlessSpec
 # Load the environment variables from the .env file
 load_dotenv()
-openai_api_key = os.getenv('OPENAI_API_KEY')
-pinecone_api_key = os.getenv('PINECONE_API_KEY')
-pinecone_environment = os.getenv('PINECONE_ENVIRONMENT')
-# Debugging: Print the API keys
-print(f"OpenAI API Key: {openai_api_key}")
-print(f"Pinecone API Key: {pinecone_api_key}")
-# Initialize Pinecone
-pc = Pinecone(api_key=pinecone_api_key)
 # Streamlit app
 st.title("Chat with Your Document")
-st.write("Upload a PDF file to chat with its content using Pinecone and OpenAI.")
 # File upload
 uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
@@ -33,38 +38,38 @@ if uploaded_file is not None:
         page = pdf_document.load_page(page_num)
         pdf_text += page.get_text()
-    # Initialize OpenAI embeddings
-    openai.api_key = openai_api_key
-    # Create a Pinecone vector store
-    index_name = "pdf-analysis"
-    if index_name not in pc.list_indexes().names():
-        pc.create_index(
-            name=index_name,
-            dimension=512,
-            metric='euclidean',
-            spec=ServerlessSpec(cloud='aws', region='us-east-1')
-        )
-    vector_store = pc.Index(index_name)
-    # Add the PDF text to the vector store
-    vector_store.upsert([
-        (str(i), openai.Embedding.create(
-            input=pdf_text[i],
-            model='text-embedding-ada-002'
-        )["data"][0]["embedding"]) for i in range(len(pdf_text))
-    ])
     # Chat with the document
     user_input = st.text_input("Ask a question about the document:")
     if st.button("Ask"):
         if user_input:
-            response = openai.Completion.create(
-                engine="davinci",
-                prompt=f"Analyze the following text and answer the question: {pdf_text}\n\nQuestion: {user_input}",
-                max_tokens=150
-            )
-            st.write(response.choices[0].text.strip())
         else:
             st.write("Please enter a question to ask.")

 import os
 import streamlit as st
 import fitz  # PyMuPDF
+from google.cloud import language_v1
+from google.oauth2 import service_account
 from dotenv import load_dotenv
 # Load the environment variables from the .env file
 load_dotenv()
+google_api_key = os.getenv('GOOGLE_API_KEY')
+# Initialize Google Cloud client using API key
+client = language_v1.LanguageServiceClient.from_service_account_info({
+    "type": "service_account",
+    "project_id": "your-project-id",
+    "private_key_id": "your-private-key-id",
+    "private_key": "your-private-key",
+    "client_email": "your-client-email",
+    "client_id": "your-client-id",
+    "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+    "token_uri": "https://oauth2.googleapis.com/token",
+    "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+    "client_x509_cert_url": "your-cert-url"
+})
 # Streamlit app
 st.title("Chat with Your Document")
+st.write("Upload a PDF file to chat with its content using Google's Gemini API.")
 # File upload
 uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
         page = pdf_document.load_page(page_num)
         pdf_text += page.get_text()
+    # Process the PDF text with Google's embedding model
+    document = language_v1.Document(content=pdf_text, type_=language_v1.Document.Type.PLAIN_TEXT)
+    response = client.analyze_entities(document=document)
+    entities = response.entities
+    # Extract entities and their embeddings
+    pdf_embeddings = []
+    for entity in entities:
+        pdf_embeddings.append({
+            'name': entity.name,
+            'type': language_v1.Entity.Type(entity.type_).name,
+            'salience': entity.salience
+        })
     # Chat with the document
     user_input = st.text_input("Ask a question about the document:")
     if st.button("Ask"):
         if user_input:
+            # Process the user input with Google's embedding model
+            document = language_v1.Document(content=user_input, type_=language_v1.Document.Type.PLAIN_TEXT)
+            response = client.analyze_entities(document=document)
+            user_entities = response.entities
+            # Match user question with PDF content
+            response_text = "Here are some key entities from the document:\n"
+            for entity in user_entities:
+                for pdf_entity in pdf_embeddings:
+                    if pdf_entity['name'] == entity.name:
+                        response_text += f"Entity: {pdf_entity['name']}, Type: {pdf_entity['type']}, Salience: {pdf_entity['salience']}\n"
+            st.write(response_text.strip())
         else:
             st.write("Please enter a question to ask.")