Spaces:

shukdevdatta123
/

Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Oct 21, 2024

Commit

9d31ec0

verified ·

1 Parent(s): cf32ae6

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -3

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import pdfplumber
 from docx import Document as DocxDocument
 from dotenv import load_dotenv
 import json
 # Load environment variables from .env file
 load_dotenv("")
@@ -48,6 +50,12 @@ def read_docx(file):
         text += paragraph.text + '\n'
     return text
 @st.cache_resource(show_spinner=False)
 def load_data(uploaded_files):
     llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
@@ -62,6 +70,9 @@ def load_data(uploaded_files):
             elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                 text = read_docx(uploaded_file)
                 docs.append(Document(text=text))
         index = VectorStoreIndex.from_documents(docs, settings=llm)
         return index
@@ -105,8 +116,8 @@ def delete_selected_conversations(selected_indices):
             json.dump(conv, f)
             f.write("\n")
-# File uploader for multiple PDF and DOCX files
-uploaded_files = st.file_uploader("Upload PDF or DOCX files", type=["pdf", "docx"], accept_multiple_files=True)
 if uploaded_files and st.session_state.openai_api_key:
     index = load_data(uploaded_files)
@@ -149,7 +160,7 @@ if uploaded_files and st.session_state.openai_api_key:
         st.success("Conversation ended. You can start a new one!")
 else:
-    st.sidebar.warning("Please enter your OpenAI API key and upload PDF or DOCX files to proceed.")
 # Sidebar to toggle visibility of previous conversations
 if 'show_conversations' not in st.session_state:

 from docx import Document as DocxDocument
 from dotenv import load_dotenv
 import json
+from PIL import Image
+import pytesseract
 # Load environment variables from .env file
 load_dotenv("")
         text += paragraph.text + '\n'
     return text
+# Function to read image files and extract text
+def read_image(file):
+    image = Image.open(file)
+    text = pytesseract.image_to_string(image)
+    return text
 @st.cache_resource(show_spinner=False)
 def load_data(uploaded_files):
     llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
             elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                 text = read_docx(uploaded_file)
                 docs.append(Document(text=text))
+            elif uploaded_file.type.startswith("image/"):  # Check if it's an image file
+                text = read_image(uploaded_file)
+                docs.append(Document(text=text))
         index = VectorStoreIndex.from_documents(docs, settings=llm)
         return index
             json.dump(conv, f)
             f.write("\n")
+# File uploader for multiple PDF, DOCX, and image files
+uploaded_files = st.file_uploader("Upload PDF, DOCX, or image files", type=["pdf", "docx", "jpg", "jpeg", "png"], accept_multiple_files=True)
 if uploaded_files and st.session_state.openai_api_key:
     index = load_data(uploaded_files)
         st.success("Conversation ended. You can start a new one!")
 else:
+    st.sidebar.warning("Please enter your OpenAI API key and upload PDF, DOCX, or image files to proceed.")
 # Sidebar to toggle visibility of previous conversations
 if 'show_conversations' not in st.session_state: