Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,45 +4,18 @@ from langchain.prompts import PromptTemplate
|
|
4 |
from langchain_community.llms import HuggingFaceEndpoint
|
5 |
from pdfminer.high_level import extract_text
|
6 |
import docx2txt
|
7 |
-
import
|
8 |
import re
|
9 |
from typing import List
|
10 |
-
from langchain.chains import LLMChain
|
11 |
-
from langchain.prompts import PromptTemplate
|
12 |
-
from langchain_community.llms import HuggingFaceEndpoint
|
13 |
-
from pdfminer.high_level import extract_text
|
14 |
from langchain.vectorstores import Chroma
|
15 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
16 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
17 |
-
import os
|
18 |
-
import re
|
19 |
from sentence_transformers import SentenceTransformer
|
20 |
from sklearn.metrics.pairwise import cosine_similarity
|
21 |
import numpy as np
|
22 |
-
|
23 |
-
from google_auth_oauthlib.flow import InstalledAppFlow
|
24 |
-
from googleapiclient.discovery import build
|
25 |
-
from googleapiclient.http import MediaIoBaseDownload
|
26 |
-
import io
|
27 |
|
28 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
29 |
-
GOOGLE_DRIVE_SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
|
30 |
-
CLIENT_SECRET_FILE = 'client_secret_64686904440-0a015tg0h941o993tif67c9mq1jr4mio.apps.googleusercontent.com.json'
|
31 |
-
|
32 |
-
def authenticate_google_drive():
|
33 |
-
flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, GOOGLE_DRIVE_SCOPES)
|
34 |
-
creds = flow.run_local_server(port=0)
|
35 |
-
return build('drive', 'v3', credentials=creds)
|
36 |
-
|
37 |
-
def get_file_from_google_drive(drive_service, file_id):
|
38 |
-
request = drive_service.files().get_media(fileId=file_id)
|
39 |
-
file_content = io.BytesIO()
|
40 |
-
downloader = MediaIoBaseDownload(file_content, request, chunksize=1024*1024)
|
41 |
-
done = False
|
42 |
-
while done is False:
|
43 |
-
status, done = downloader.next_chunk()
|
44 |
-
file_content.seek(0)
|
45 |
-
return file_content.read()
|
46 |
|
47 |
def extract_text_from_pdf(pdf_content):
|
48 |
return extract_text(io.BytesIO(pdf_content))
|
@@ -95,7 +68,7 @@ def answer_query_with_similarity(query, file_contents):
|
|
95 |
|
96 |
if not docs:
|
97 |
print("No documents match the query.")
|
98 |
-
return None
|
99 |
|
100 |
docs_content = [doc.page_content for doc in docs]
|
101 |
for i, content in enumerate(docs_content, start=1):
|
@@ -131,32 +104,14 @@ def answer_query_with_similarity(query, file_contents):
|
|
131 |
cleaned_answer = answer.split("Answer:")[-1].strip()
|
132 |
print(f"\n\nAnswer: {cleaned_answer}")
|
133 |
|
134 |
-
return cleaned_answer
|
135 |
except Exception as e:
|
136 |
-
print("An error occurred
|
137 |
-
return None
|
138 |
|
139 |
def main():
|
140 |
st.title("Document Query App")
|
141 |
|
142 |
-
# Get user input for authentication method
|
143 |
-
#auth_method = st.radio("Choose authentication method", ("Google Drive", "Upload Files"))
|
144 |
-
|
145 |
-
#if auth_method == "Google Drive":
|
146 |
-
# Authenticate with Google Drive
|
147 |
-
#drive_service = authenticate_google_drive()
|
148 |
-
|
149 |
-
# Get file IDs from user input
|
150 |
-
#file_ids = st.text_input("Enter the file IDs (comma-separated):")
|
151 |
-
#file_ids = [file_id.strip() for file_id in file_ids.split(",")]
|
152 |
-
|
153 |
-
# Get file contents from Google Drive
|
154 |
-
#file_contents = []
|
155 |
-
#for file_id in file_ids:
|
156 |
-
file_content = get_file_from_google_drive(drive_service, file_id)
|
157 |
-
file_contents.append(file_content)
|
158 |
-
#else:
|
159 |
-
# Allow user to upload files directly
|
160 |
uploaded_files = st.file_uploader("Upload files", accept_multiple_files=True)
|
161 |
file_contents = [file.read() for file in uploaded_files]
|
162 |
|
@@ -166,7 +121,7 @@ def main():
|
|
166 |
if file_contents and query:
|
167 |
response = answer_query_with_similarity(query, file_contents)
|
168 |
if response:
|
169 |
-
st.write("Answer:", response
|
170 |
else:
|
171 |
st.write("No answer found.")
|
172 |
else:
|
|
|
4 |
from langchain_community.llms import HuggingFaceEndpoint
|
5 |
from pdfminer.high_level import extract_text
|
6 |
import docx2txt
|
7 |
+
import io
|
8 |
import re
|
9 |
from typing import List
|
|
|
|
|
|
|
|
|
10 |
from langchain.vectorstores import Chroma
|
11 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
12 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
|
|
|
|
13 |
from sentence_transformers import SentenceTransformer
|
14 |
from sklearn.metrics.pairwise import cosine_similarity
|
15 |
import numpy as np
|
16 |
+
import os
|
|
|
|
|
|
|
|
|
17 |
|
18 |
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def extract_text_from_pdf(pdf_content):
|
21 |
return extract_text(io.BytesIO(pdf_content))
|
|
|
68 |
|
69 |
if not docs:
|
70 |
print("No documents match the query.")
|
71 |
+
return None
|
72 |
|
73 |
docs_content = [doc.page_content for doc in docs]
|
74 |
for i, content in enumerate(docs_content, start=1):
|
|
|
104 |
cleaned_answer = answer.split("Answer:")[-1].strip()
|
105 |
print(f"\n\nAnswer: {cleaned_answer}")
|
106 |
|
107 |
+
return cleaned_answer
|
108 |
except Exception as e:
|
109 |
+
print("An error occurred while getting the answer: ", str(e))
|
110 |
+
return None
|
111 |
|
112 |
def main():
|
113 |
st.title("Document Query App")
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
uploaded_files = st.file_uploader("Upload files", accept_multiple_files=True)
|
116 |
file_contents = [file.read() for file in uploaded_files]
|
117 |
|
|
|
121 |
if file_contents and query:
|
122 |
response = answer_query_with_similarity(query, file_contents)
|
123 |
if response:
|
124 |
+
st.write("Answer:", response)
|
125 |
else:
|
126 |
st.write("No answer found.")
|
127 |
else:
|