shukdevdatta123 commited on
Commit
9d31ec0
·
verified ·
1 Parent(s): cf32ae6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -3
app.py CHANGED
@@ -6,6 +6,8 @@ import pdfplumber
6
  from docx import Document as DocxDocument
7
  from dotenv import load_dotenv
8
  import json
 
 
9
 
10
  # Load environment variables from .env file
11
  load_dotenv("")
@@ -48,6 +50,12 @@ def read_docx(file):
48
  text += paragraph.text + '\n'
49
  return text
50
 
 
 
 
 
 
 
51
  @st.cache_resource(show_spinner=False)
52
  def load_data(uploaded_files):
53
  llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
@@ -62,6 +70,9 @@ def load_data(uploaded_files):
62
  elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
63
  text = read_docx(uploaded_file)
64
  docs.append(Document(text=text))
 
 
 
65
 
66
  index = VectorStoreIndex.from_documents(docs, settings=llm)
67
  return index
@@ -105,8 +116,8 @@ def delete_selected_conversations(selected_indices):
105
  json.dump(conv, f)
106
  f.write("\n")
107
 
108
- # File uploader for multiple PDF and DOCX files
109
- uploaded_files = st.file_uploader("Upload PDF or DOCX files", type=["pdf", "docx"], accept_multiple_files=True)
110
 
111
  if uploaded_files and st.session_state.openai_api_key:
112
  index = load_data(uploaded_files)
@@ -149,7 +160,7 @@ if uploaded_files and st.session_state.openai_api_key:
149
  st.success("Conversation ended. You can start a new one!")
150
 
151
  else:
152
- st.sidebar.warning("Please enter your OpenAI API key and upload PDF or DOCX files to proceed.")
153
 
154
  # Sidebar to toggle visibility of previous conversations
155
  if 'show_conversations' not in st.session_state:
 
6
  from docx import Document as DocxDocument
7
  from dotenv import load_dotenv
8
  import json
9
+ from PIL import Image
10
+ import pytesseract
11
 
12
  # Load environment variables from .env file
13
  load_dotenv("")
 
50
  text += paragraph.text + '\n'
51
  return text
52
 
53
+ # Function to read image files and extract text
54
+ def read_image(file):
55
+ image = Image.open(file)
56
+ text = pytesseract.image_to_string(image)
57
+ return text
58
+
59
  @st.cache_resource(show_spinner=False)
60
  def load_data(uploaded_files):
61
  llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
 
70
  elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
71
  text = read_docx(uploaded_file)
72
  docs.append(Document(text=text))
73
+ elif uploaded_file.type.startswith("image/"): # Check if it's an image file
74
+ text = read_image(uploaded_file)
75
+ docs.append(Document(text=text))
76
 
77
  index = VectorStoreIndex.from_documents(docs, settings=llm)
78
  return index
 
116
  json.dump(conv, f)
117
  f.write("\n")
118
 
119
+ # File uploader for multiple PDF, DOCX, and image files
120
+ uploaded_files = st.file_uploader("Upload PDF, DOCX, or image files", type=["pdf", "docx", "jpg", "jpeg", "png"], accept_multiple_files=True)
121
 
122
  if uploaded_files and st.session_state.openai_api_key:
123
  index = load_data(uploaded_files)
 
160
  st.success("Conversation ended. You can start a new one!")
161
 
162
  else:
163
+ st.sidebar.warning("Please enter your OpenAI API key and upload PDF, DOCX, or image files to proceed.")
164
 
165
  # Sidebar to toggle visibility of previous conversations
166
  if 'show_conversations' not in st.session_state: