Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,6 @@ import pdfplumber
|
|
6 |
from docx import Document as DocxDocument
|
7 |
from dotenv import load_dotenv
|
8 |
import json
|
9 |
-
from PIL import Image
|
10 |
-
import pytesseract
|
11 |
|
12 |
# Load environment variables from .env file
|
13 |
load_dotenv("")
|
@@ -50,15 +48,6 @@ def read_docx(file):
|
|
50 |
text += paragraph.text + '\n'
|
51 |
return text
|
52 |
|
53 |
-
# Specify Tesseract path (adjust if necessary)
|
54 |
-
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
|
55 |
-
|
56 |
-
# Function to read image files and extract text
|
57 |
-
def read_image(file):
|
58 |
-
image = Image.open(file)
|
59 |
-
text = pytesseract.image_to_string(image)
|
60 |
-
return text
|
61 |
-
|
62 |
@st.cache_resource(show_spinner=False)
|
63 |
def load_data(uploaded_files):
|
64 |
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
|
@@ -73,9 +62,6 @@ def load_data(uploaded_files):
|
|
73 |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
74 |
text = read_docx(uploaded_file)
|
75 |
docs.append(Document(text=text))
|
76 |
-
elif uploaded_file.type.startswith("image/"): # Check if it's an image file
|
77 |
-
text = read_image(uploaded_file)
|
78 |
-
docs.append(Document(text=text))
|
79 |
|
80 |
index = VectorStoreIndex.from_documents(docs, settings=llm)
|
81 |
return index
|
@@ -119,8 +105,8 @@ def delete_selected_conversations(selected_indices):
|
|
119 |
json.dump(conv, f)
|
120 |
f.write("\n")
|
121 |
|
122 |
-
# File uploader for multiple PDF
|
123 |
-
uploaded_files = st.file_uploader("Upload PDF
|
124 |
|
125 |
if uploaded_files and st.session_state.openai_api_key:
|
126 |
index = load_data(uploaded_files)
|
@@ -163,7 +149,7 @@ if uploaded_files and st.session_state.openai_api_key:
|
|
163 |
st.success("Conversation ended. You can start a new one!")
|
164 |
|
165 |
else:
|
166 |
-
st.sidebar.warning("Please enter your OpenAI API key and upload PDF
|
167 |
|
168 |
# Sidebar to toggle visibility of previous conversations
|
169 |
if 'show_conversations' not in st.session_state:
|
@@ -196,4 +182,4 @@ if st.session_state.show_conversations:
|
|
196 |
else:
|
197 |
st.sidebar.write("No previous conversations found.")
|
198 |
else:
|
199 |
-
st.sidebar.write("Previous conversations are hidden. Click 'Toggle Previous Conversations' to show.")
|
|
|
6 |
from docx import Document as DocxDocument
|
7 |
from dotenv import load_dotenv
|
8 |
import json
|
|
|
|
|
9 |
|
10 |
# Load environment variables from .env file
|
11 |
load_dotenv("")
|
|
|
48 |
text += paragraph.text + '\n'
|
49 |
return text
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
@st.cache_resource(show_spinner=False)
|
52 |
def load_data(uploaded_files):
|
53 |
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
|
|
|
62 |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
63 |
text = read_docx(uploaded_file)
|
64 |
docs.append(Document(text=text))
|
|
|
|
|
|
|
65 |
|
66 |
index = VectorStoreIndex.from_documents(docs, settings=llm)
|
67 |
return index
|
|
|
105 |
json.dump(conv, f)
|
106 |
f.write("\n")
|
107 |
|
108 |
+
# File uploader for multiple PDF and DOCX files
|
109 |
+
uploaded_files = st.file_uploader("Upload PDF or DOCX files", type=["pdf", "docx"], accept_multiple_files=True)
|
110 |
|
111 |
if uploaded_files and st.session_state.openai_api_key:
|
112 |
index = load_data(uploaded_files)
|
|
|
149 |
st.success("Conversation ended. You can start a new one!")
|
150 |
|
151 |
else:
|
152 |
+
st.sidebar.warning("Please enter your OpenAI API key and upload PDF or DOCX files to proceed.")
|
153 |
|
154 |
# Sidebar to toggle visibility of previous conversations
|
155 |
if 'show_conversations' not in st.session_state:
|
|
|
182 |
else:
|
183 |
st.sidebar.write("No previous conversations found.")
|
184 |
else:
|
185 |
+
st.sidebar.write("Previous conversations are hidden. Click 'Toggle Previous Conversations' to show.")
|