Update app.py
Browse files
app.py
CHANGED
@@ -76,116 +76,14 @@ def chat_with_ai(user_input, chat_history):
|
|
76 |
def clear_history():
|
77 |
return [], ""
|
78 |
|
79 |
-
import os
|
80 |
-
import PyPDF2
|
81 |
-
import docx
|
82 |
-
import pandas as pd
|
83 |
-
|
84 |
-
def extract_text_from_file(file_path):
|
85 |
-
"""
|
86 |
-
Extracts text from the file based on its extension.
|
87 |
-
Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
|
88 |
-
"""
|
89 |
-
ext = os.path.splitext(file_path)[1].lower()
|
90 |
-
text = ""
|
91 |
-
|
92 |
-
if ext == ".pdf":
|
93 |
-
try:
|
94 |
-
with open(file_path, "rb") as f:
|
95 |
-
pdf_reader = PyPDF2.PdfReader(f)
|
96 |
-
for page in pdf_reader.pages:
|
97 |
-
page_text = page.extract_text()
|
98 |
-
if page_text:
|
99 |
-
text += page_text + "\n"
|
100 |
-
except Exception as e:
|
101 |
-
text = f"Error processing PDF: {e}"
|
102 |
-
|
103 |
-
elif ext in [".doc", ".docx"]:
|
104 |
-
try:
|
105 |
-
doc = docx.Document(file_path)
|
106 |
-
text = "\n".join([para.text for para in doc.paragraphs])
|
107 |
-
except Exception as e:
|
108 |
-
text = f"Error processing Word document: {e}"
|
109 |
-
|
110 |
-
elif ext == ".txt":
|
111 |
-
try:
|
112 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
113 |
-
text = f.read()
|
114 |
-
except Exception as e:
|
115 |
-
text = f"Error processing TXT file: {e}"
|
116 |
-
|
117 |
-
elif ext in [".xls", ".xlsx"]:
|
118 |
-
try:
|
119 |
-
# Read the first sheet of the Excel file
|
120 |
-
df = pd.read_excel(file_path)
|
121 |
-
# Convert the dataframe to CSV format (or any format you prefer)
|
122 |
-
text = df.to_csv(index=False)
|
123 |
-
except Exception as e:
|
124 |
-
text = f"Error processing Excel file: {e}"
|
125 |
-
|
126 |
-
else:
|
127 |
-
text = "Unsupported file type for text extraction."
|
128 |
-
|
129 |
-
return text
|
130 |
-
|
131 |
def upload_file(file):
|
132 |
-
""
|
133 |
-
Handles file upload from Gradio.
|
134 |
-
Saves the file to the "new_file" directory and extracts text content based on file type.
|
135 |
-
Supports file-like objects, dictionaries, or file paths.
|
136 |
-
"""
|
137 |
-
# Check if a file was uploaded
|
138 |
-
if file is None:
|
139 |
-
return "No file uploaded!"
|
140 |
-
|
141 |
-
# If file is a list (multiple files), take the first one
|
142 |
-
if isinstance(file, list):
|
143 |
-
file = file[0]
|
144 |
-
|
145 |
-
# Initialize file_name and file_data based on the type of 'file'
|
146 |
-
if hasattr(file, 'read'):
|
147 |
-
# file is a file-like object
|
148 |
-
file_data = file.read()
|
149 |
-
file_name = getattr(file, 'name', "uploaded_file")
|
150 |
-
elif isinstance(file, dict):
|
151 |
-
# file is a dictionary with "name" and "data" keys
|
152 |
-
file_name = file.get("name", "uploaded_file")
|
153 |
-
file_data = file.get("data")
|
154 |
-
elif isinstance(file, str):
|
155 |
-
# file is a string (e.g., a NamedString representing a file path)
|
156 |
-
file_name = os.path.basename(file)
|
157 |
-
try:
|
158 |
-
with open(file, "rb") as f:
|
159 |
-
file_data = f.read()
|
160 |
-
except Exception as e:
|
161 |
-
return f"Error reading file from path: {e}"
|
162 |
-
else:
|
163 |
-
return "Uploaded file format not recognized."
|
164 |
-
|
165 |
-
# Validate that file_data is available
|
166 |
-
if file_data is None:
|
167 |
-
return "Uploaded file data not found!"
|
168 |
-
|
169 |
-
# Ensure the "new_file" directory exists
|
170 |
if not os.path.exists("new_file"):
|
171 |
os.makedirs("new_file")
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
with open(file_path, "wb") as f:
|
177 |
-
f.write(file_data)
|
178 |
-
except Exception as e:
|
179 |
-
return f"Error saving file: {e}"
|
180 |
-
|
181 |
-
# Extract text from the file for further processing
|
182 |
-
extracted_text = extract_text_from_file(file_path)
|
183 |
-
|
184 |
-
# Create a preview of the extracted text
|
185 |
-
preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
|
186 |
-
return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"
|
187 |
-
|
188 |
-
|
189 |
|
190 |
def gradio_chatbot():
|
191 |
with gr.Blocks() as demo:
|
|
|
76 |
def clear_history():
|
77 |
return [], ""
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def upload_file(file):
|
80 |
+
# Save the uploaded file to the "new_file" directory
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
if not os.path.exists("new_file"):
|
82 |
os.makedirs("new_file")
|
83 |
+
file_path = os.path.join("new_file", file.name)
|
84 |
+
with open(file_path, "wb") as f:
|
85 |
+
f.write(file.read())
|
86 |
+
return f"File {file.name} uploaded successfully!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
def gradio_chatbot():
|
89 |
with gr.Blocks() as demo:
|