Spaces:

shukdevdatta123
/

Chatbot

Running

App Files Files Community

shukdevdatta123 commited on Oct 21, 2024

Commit

f16e598

verified ·

1 Parent(s): 2b23280

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -18

app.py CHANGED Viewed

@@ -6,8 +6,6 @@ import pdfplumber
 from docx import Document as DocxDocument
 from dotenv import load_dotenv
 import json
-from PIL import Image
-import pytesseract
 # Load environment variables from .env file
 load_dotenv("")
@@ -50,15 +48,6 @@ def read_docx(file):
         text += paragraph.text + '\n'
     return text
-# Specify Tesseract path (adjust if necessary)
-pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
-# Function to read image files and extract text
-def read_image(file):
-    image = Image.open(file)
-    text = pytesseract.image_to_string(image)
-    return text
 @st.cache_resource(show_spinner=False)
 def load_data(uploaded_files):
     llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
@@ -73,9 +62,6 @@ def load_data(uploaded_files):
             elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                 text = read_docx(uploaded_file)
                 docs.append(Document(text=text))
-            elif uploaded_file.type.startswith("image/"):  # Check if it's an image file
-                text = read_image(uploaded_file)
-                docs.append(Document(text=text))
         index = VectorStoreIndex.from_documents(docs, settings=llm)
         return index
@@ -119,8 +105,8 @@ def delete_selected_conversations(selected_indices):
             json.dump(conv, f)
             f.write("\n")
-# File uploader for multiple PDF, DOCX, and image files
-uploaded_files = st.file_uploader("Upload PDF, DOCX, or image files", type=["pdf", "docx", "jpg", "jpeg", "png"], accept_multiple_files=True)
 if uploaded_files and st.session_state.openai_api_key:
     index = load_data(uploaded_files)
@@ -163,7 +149,7 @@ if uploaded_files and st.session_state.openai_api_key:
         st.success("Conversation ended. You can start a new one!")
 else:
-    st.sidebar.warning("Please enter your OpenAI API key and upload PDF, DOCX, or image files to proceed.")
 # Sidebar to toggle visibility of previous conversations
 if 'show_conversations' not in st.session_state:
@@ -196,4 +182,4 @@ if st.session_state.show_conversations:
     else:
         st.sidebar.write("No previous conversations found.")
 else:
-    st.sidebar.write("Previous conversations are hidden. Click 'Toggle Previous Conversations' to show.")

 from docx import Document as DocxDocument
 from dotenv import load_dotenv
 import json
 # Load environment variables from .env file
 load_dotenv("")
         text += paragraph.text + '\n'
     return text
 @st.cache_resource(show_spinner=False)
 def load_data(uploaded_files):
     llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
             elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
                 text = read_docx(uploaded_file)
                 docs.append(Document(text=text))
         index = VectorStoreIndex.from_documents(docs, settings=llm)
         return index
             json.dump(conv, f)
             f.write("\n")
+# File uploader for multiple PDF and DOCX files
+uploaded_files = st.file_uploader("Upload PDF or DOCX files", type=["pdf", "docx"], accept_multiple_files=True)
 if uploaded_files and st.session_state.openai_api_key:
     index = load_data(uploaded_files)
         st.success("Conversation ended. You can start a new one!")
 else:
+    st.sidebar.warning("Please enter your OpenAI API key and upload PDF or DOCX files to proceed.")
 # Sidebar to toggle visibility of previous conversations
 if 'show_conversations' not in st.session_state:
     else:
         st.sidebar.write("No previous conversations found.")
 else:
+    st.sidebar.write("Previous conversations are hidden. Click 'Toggle Previous Conversations' to show.")