Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ import pdfplumber
|
|
6 |
from docx import Document as DocxDocument
|
7 |
from dotenv import load_dotenv
|
8 |
import json
|
|
|
|
|
9 |
|
10 |
# Load environment variables from .env file
|
11 |
load_dotenv("")
|
@@ -48,6 +50,12 @@ def read_docx(file):
|
|
48 |
text += paragraph.text + '\n'
|
49 |
return text
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
@st.cache_resource(show_spinner=False)
|
52 |
def load_data(uploaded_files):
|
53 |
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
|
@@ -62,6 +70,9 @@ def load_data(uploaded_files):
|
|
62 |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
63 |
text = read_docx(uploaded_file)
|
64 |
docs.append(Document(text=text))
|
|
|
|
|
|
|
65 |
|
66 |
index = VectorStoreIndex.from_documents(docs, settings=llm)
|
67 |
return index
|
@@ -105,8 +116,8 @@ def delete_selected_conversations(selected_indices):
|
|
105 |
json.dump(conv, f)
|
106 |
f.write("\n")
|
107 |
|
108 |
-
# File uploader for multiple PDF and
|
109 |
-
uploaded_files = st.file_uploader("Upload PDF or
|
110 |
|
111 |
if uploaded_files and st.session_state.openai_api_key:
|
112 |
index = load_data(uploaded_files)
|
@@ -149,7 +160,7 @@ if uploaded_files and st.session_state.openai_api_key:
|
|
149 |
st.success("Conversation ended. You can start a new one!")
|
150 |
|
151 |
else:
|
152 |
-
st.sidebar.warning("Please enter your OpenAI API key and upload PDF or
|
153 |
|
154 |
# Sidebar to toggle visibility of previous conversations
|
155 |
if 'show_conversations' not in st.session_state:
|
|
|
6 |
from docx import Document as DocxDocument
|
7 |
from dotenv import load_dotenv
|
8 |
import json
|
9 |
+
from PIL import Image
|
10 |
+
import pytesseract
|
11 |
|
12 |
# Load environment variables from .env file
|
13 |
load_dotenv("")
|
|
|
50 |
text += paragraph.text + '\n'
|
51 |
return text
|
52 |
|
53 |
+
# Function to read image files and extract text
|
54 |
+
def read_image(file):
|
55 |
+
image = Image.open(file)
|
56 |
+
text = pytesseract.image_to_string(image)
|
57 |
+
return text
|
58 |
+
|
59 |
@st.cache_resource(show_spinner=False)
|
60 |
def load_data(uploaded_files):
|
61 |
llm = OpenAI(model="gpt-3.5-turbo", temperature=0.5,
|
|
|
70 |
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
71 |
text = read_docx(uploaded_file)
|
72 |
docs.append(Document(text=text))
|
73 |
+
elif uploaded_file.type.startswith("image/"): # Check if it's an image file
|
74 |
+
text = read_image(uploaded_file)
|
75 |
+
docs.append(Document(text=text))
|
76 |
|
77 |
index = VectorStoreIndex.from_documents(docs, settings=llm)
|
78 |
return index
|
|
|
116 |
json.dump(conv, f)
|
117 |
f.write("\n")
|
118 |
|
119 |
+
# File uploader for multiple PDF, DOCX, and image files
|
120 |
+
uploaded_files = st.file_uploader("Upload PDF, DOCX, or image files", type=["pdf", "docx", "jpg", "jpeg", "png"], accept_multiple_files=True)
|
121 |
|
122 |
if uploaded_files and st.session_state.openai_api_key:
|
123 |
index = load_data(uploaded_files)
|
|
|
160 |
st.success("Conversation ended. You can start a new one!")
|
161 |
|
162 |
else:
|
163 |
+
st.sidebar.warning("Please enter your OpenAI API key and upload PDF, DOCX, or image files to proceed.")
|
164 |
|
165 |
# Sidebar to toggle visibility of previous conversations
|
166 |
if 'show_conversations' not in st.session_state:
|