shukdevdatta123 commited on
Commit
f16e598
·
verified ·
1 Parent(s): 2b23280

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -18
app.py CHANGED
@@ -6,8 +6,6 @@ import pdfplumber
6
  from docx import Document as DocxDocument
7
  from dotenv import load_dotenv
8
  import json
9
- from PIL import Image
10
- import pytesseract
11
 
12
  # Load environment variables from .env file
13
  load_dotenv("")
@@ -50,15 +48,6 @@ def read_docx(file):
50
  text += paragraph.text + '\n'
51
  return text
52
 
53
- # Specify Tesseract path (adjust if necessary)
54
- pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
55
-
56
- # Function to read image files and extract text
57
- def read_image(file):
58
- image = Image.open(file)
59
- text = pytesseract.image_to_string(image)
60
- return text
61
-
62
  @st.cache_resource(show_spinner=False)
63
  def load_data(uploaded_files):
64
  llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
@@ -73,9 +62,6 @@ def load_data(uploaded_files):
73
  elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
74
  text = read_docx(uploaded_file)
75
  docs.append(Document(text=text))
76
- elif uploaded_file.type.startswith("image/"): # Check if it's an image file
77
- text = read_image(uploaded_file)
78
- docs.append(Document(text=text))
79
 
80
  index = VectorStoreIndex.from_documents(docs, settings=llm)
81
  return index
@@ -119,8 +105,8 @@ def delete_selected_conversations(selected_indices):
119
  json.dump(conv, f)
120
  f.write("\n")
121
 
122
- # File uploader for multiple PDF, DOCX, and image files
123
- uploaded_files = st.file_uploader("Upload PDF, DOCX, or image files", type=["pdf", "docx", "jpg", "jpeg", "png"], accept_multiple_files=True)
124
 
125
  if uploaded_files and st.session_state.openai_api_key:
126
  index = load_data(uploaded_files)
@@ -163,7 +149,7 @@ if uploaded_files and st.session_state.openai_api_key:
163
  st.success("Conversation ended. You can start a new one!")
164
 
165
  else:
166
- st.sidebar.warning("Please enter your OpenAI API key and upload PDF, DOCX, or image files to proceed.")
167
 
168
  # Sidebar to toggle visibility of previous conversations
169
  if 'show_conversations' not in st.session_state:
@@ -196,4 +182,4 @@ if st.session_state.show_conversations:
196
  else:
197
  st.sidebar.write("No previous conversations found.")
198
  else:
199
- st.sidebar.write("Previous conversations are hidden. Click 'Toggle Previous Conversations' to show.")
 
6
  from docx import Document as DocxDocument
7
  from dotenv import load_dotenv
8
  import json
 
 
9
 
10
  # Load environment variables from .env file
11
  load_dotenv("")
 
48
  text += paragraph.text + '\n'
49
  return text
50
 
 
 
 
 
 
 
 
 
 
51
  @st.cache_resource(show_spinner=False)
52
  def load_data(uploaded_files):
53
  llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
 
62
  elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
63
  text = read_docx(uploaded_file)
64
  docs.append(Document(text=text))
 
 
 
65
 
66
  index = VectorStoreIndex.from_documents(docs, settings=llm)
67
  return index
 
105
  json.dump(conv, f)
106
  f.write("\n")
107
 
108
+ # File uploader for multiple PDF and DOCX files
109
+ uploaded_files = st.file_uploader("Upload PDF or DOCX files", type=["pdf", "docx"], accept_multiple_files=True)
110
 
111
  if uploaded_files and st.session_state.openai_api_key:
112
  index = load_data(uploaded_files)
 
149
  st.success("Conversation ended. You can start a new one!")
150
 
151
  else:
152
+ st.sidebar.warning("Please enter your OpenAI API key and upload PDF or DOCX files to proceed.")
153
 
154
  # Sidebar to toggle visibility of previous conversations
155
  if 'show_conversations' not in st.session_state:
 
182
  else:
183
  st.sidebar.write("No previous conversations found.")
184
  else:
185
+ st.sidebar.write("Previous conversations are hidden. Click 'Toggle Previous Conversations' to show.")