Luciferalive commited on
Commit
9b00b1d
·
verified ·
1 Parent(s): 3a54452

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -52
app.py CHANGED
@@ -4,45 +4,18 @@ from langchain.prompts import PromptTemplate
4
  from langchain_community.llms import HuggingFaceEndpoint
5
  from pdfminer.high_level import extract_text
6
  import docx2txt
7
- import os
8
  import re
9
  from typing import List
10
- from langchain.chains import LLMChain
11
- from langchain.prompts import PromptTemplate
12
- from langchain_community.llms import HuggingFaceEndpoint
13
- from pdfminer.high_level import extract_text
14
  from langchain.vectorstores import Chroma
15
  from langchain.text_splitter import RecursiveCharacterTextSplitter
16
  from langchain.embeddings import SentenceTransformerEmbeddings
17
- import os
18
- import re
19
  from sentence_transformers import SentenceTransformer
20
  from sklearn.metrics.pairwise import cosine_similarity
21
  import numpy as np
22
- from google.oauth2.credentials import Credentials
23
- from google_auth_oauthlib.flow import InstalledAppFlow
24
- from googleapiclient.discovery import build
25
- from googleapiclient.http import MediaIoBaseDownload
26
- import io
27
 
28
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
29
- GOOGLE_DRIVE_SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
30
- CLIENT_SECRET_FILE = 'client_secret_64686904440-0a015tg0h941o993tif67c9mq1jr4mio.apps.googleusercontent.com.json'
31
-
32
- def authenticate_google_drive():
33
- flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, GOOGLE_DRIVE_SCOPES)
34
- creds = flow.run_local_server(port=0)
35
- return build('drive', 'v3', credentials=creds)
36
-
37
- def get_file_from_google_drive(drive_service, file_id):
38
- request = drive_service.files().get_media(fileId=file_id)
39
- file_content = io.BytesIO()
40
- downloader = MediaIoBaseDownload(file_content, request, chunksize=1024*1024)
41
- done = False
42
- while done is False:
43
- status, done = downloader.next_chunk()
44
- file_content.seek(0)
45
- return file_content.read()
46
 
47
  def extract_text_from_pdf(pdf_content):
48
  return extract_text(io.BytesIO(pdf_content))
@@ -95,7 +68,7 @@ def answer_query_with_similarity(query, file_contents):
95
 
96
  if not docs:
97
  print("No documents match the query.")
98
- return None, None
99
 
100
  docs_content = [doc.page_content for doc in docs]
101
  for i, content in enumerate(docs_content, start=1):
@@ -131,32 +104,14 @@ def answer_query_with_similarity(query, file_contents):
131
  cleaned_answer = answer.split("Answer:")[-1].strip()
132
  print(f"\n\nAnswer: {cleaned_answer}")
133
 
134
- return cleaned_answer,
135
  except Exception as e:
136
- print("An error occurred to get the answer: ", str(e))
137
- return None, None
138
 
139
  def main():
140
  st.title("Document Query App")
141
 
142
- # Get user input for authentication method
143
- #auth_method = st.radio("Choose authentication method", ("Google Drive", "Upload Files"))
144
-
145
- #if auth_method == "Google Drive":
146
- # Authenticate with Google Drive
147
- #drive_service = authenticate_google_drive()
148
-
149
- # Get file IDs from user input
150
- #file_ids = st.text_input("Enter the file IDs (comma-separated):")
151
- #file_ids = [file_id.strip() for file_id in file_ids.split(",")]
152
-
153
- # Get file contents from Google Drive
154
- #file_contents = []
155
- #for file_id in file_ids:
156
- file_content = get_file_from_google_drive(drive_service, file_id)
157
- file_contents.append(file_content)
158
- #else:
159
- # Allow user to upload files directly
160
  uploaded_files = st.file_uploader("Upload files", accept_multiple_files=True)
161
  file_contents = [file.read() for file in uploaded_files]
162
 
@@ -166,7 +121,7 @@ def main():
166
  if file_contents and query:
167
  response = answer_query_with_similarity(query, file_contents)
168
  if response:
169
- st.write("Answer:", response[0])
170
  else:
171
  st.write("No answer found.")
172
  else:
 
4
  from langchain_community.llms import HuggingFaceEndpoint
5
  from pdfminer.high_level import extract_text
6
  import docx2txt
7
+ import io
8
  import re
9
  from typing import List
 
 
 
 
10
  from langchain.vectorstores import Chroma
11
  from langchain.text_splitter import RecursiveCharacterTextSplitter
12
  from langchain.embeddings import SentenceTransformerEmbeddings
 
 
13
  from sentence_transformers import SentenceTransformer
14
  from sklearn.metrics.pairwise import cosine_similarity
15
  import numpy as np
16
+ import os
 
 
 
 
17
 
18
  HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  def extract_text_from_pdf(pdf_content):
21
  return extract_text(io.BytesIO(pdf_content))
 
68
 
69
  if not docs:
70
  print("No documents match the query.")
71
+ return None
72
 
73
  docs_content = [doc.page_content for doc in docs]
74
  for i, content in enumerate(docs_content, start=1):
 
104
  cleaned_answer = answer.split("Answer:")[-1].strip()
105
  print(f"\n\nAnswer: {cleaned_answer}")
106
 
107
+ return cleaned_answer
108
  except Exception as e:
109
+ print("An error occurred while getting the answer: ", str(e))
110
+ return None
111
 
112
  def main():
113
  st.title("Document Query App")
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  uploaded_files = st.file_uploader("Upload files", accept_multiple_files=True)
116
  file_contents = [file.read() for file in uploaded_files]
117
 
 
121
  if file_contents and query:
122
  response = answer_query_with_similarity(query, file_contents)
123
  if response:
124
+ st.write("Answer:", response)
125
  else:
126
  st.write("No answer found.")
127
  else: