Update app.py
Browse files
app.py
CHANGED
@@ -76,25 +76,98 @@ def chat_with_ai(user_input, chat_history):
|
|
76 |
def clear_history():
|
77 |
return [], ""
|
78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
def upload_file(file):
|
80 |
if file is None:
|
81 |
return "No file uploaded!"
|
82 |
|
83 |
if isinstance(file, list):
|
84 |
file = file[0]
|
85 |
-
|
86 |
if hasattr(file, 'name'):
|
87 |
file_name = file.name
|
|
|
88 |
elif isinstance(file, dict):
|
89 |
file_name = file.get("name", "uploaded_file")
|
|
|
90 |
else:
|
91 |
-
|
92 |
|
93 |
-
|
|
|
|
|
|
|
94 |
if not os.path.exists("new_file"):
|
95 |
os.makedirs("new_file")
|
96 |
|
97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
file_path = os.path.join("new_file", file_name)
|
99 |
if hasattr(file, "read"):
|
100 |
content = file.read()
|
|
|
76 |
def clear_history():
|
77 |
return [], ""
|
78 |
|
79 |
+
import os
|
80 |
+
import PyPDF2
|
81 |
+
import docx
|
82 |
+
import pandas as pd
|
83 |
+
|
84 |
+
def extract_text_from_file(file_path):
|
85 |
+
"""
|
86 |
+
Extracts text from the file based on its extension.
|
87 |
+
Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
|
88 |
+
"""
|
89 |
+
ext = os.path.splitext(file_path)[1].lower()
|
90 |
+
text = ""
|
91 |
+
|
92 |
+
if ext == ".pdf":
|
93 |
+
try:
|
94 |
+
with open(file_path, "rb") as f:
|
95 |
+
pdf_reader = PyPDF2.PdfReader(f)
|
96 |
+
for page in pdf_reader.pages:
|
97 |
+
page_text = page.extract_text()
|
98 |
+
if page_text:
|
99 |
+
text += page_text + "\n"
|
100 |
+
except Exception as e:
|
101 |
+
text = f"Error processing PDF: {e}"
|
102 |
+
|
103 |
+
elif ext in [".doc", ".docx"]:
|
104 |
+
try:
|
105 |
+
doc = docx.Document(file_path)
|
106 |
+
text = "\n".join([para.text for para in doc.paragraphs])
|
107 |
+
except Exception as e:
|
108 |
+
text = f"Error processing Word document: {e}"
|
109 |
+
|
110 |
+
elif ext == ".txt":
|
111 |
+
try:
|
112 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
113 |
+
text = f.read()
|
114 |
+
except Exception as e:
|
115 |
+
text = f"Error processing TXT file: {e}"
|
116 |
+
|
117 |
+
elif ext in [".xls", ".xlsx"]:
|
118 |
+
try:
|
119 |
+
# Read the first sheet of the Excel file
|
120 |
+
df = pd.read_excel(file_path)
|
121 |
+
# Convert the dataframe to CSV format (or any format you prefer)
|
122 |
+
text = df.to_csv(index=False)
|
123 |
+
except Exception as e:
|
124 |
+
text = f"Error processing Excel file: {e}"
|
125 |
+
|
126 |
+
else:
|
127 |
+
text = "Unsupported file type for text extraction."
|
128 |
+
|
129 |
+
return text
|
130 |
+
|
131 |
def upload_file(file):
|
132 |
if file is None:
|
133 |
return "No file uploaded!"
|
134 |
|
135 |
if isinstance(file, list):
|
136 |
file = file[0]
|
137 |
+
|
138 |
if hasattr(file, 'name'):
|
139 |
file_name = file.name
|
140 |
+
file_data = file.read()
|
141 |
elif isinstance(file, dict):
|
142 |
file_name = file.get("name", "uploaded_file")
|
143 |
+
file_data = file.get("data")
|
144 |
else:
|
145 |
+
return "Uploaded file format not recognized."
|
146 |
|
147 |
+
if file_data is None:
|
148 |
+
return "Uploaded file data not found!"
|
149 |
+
|
150 |
+
|
151 |
if not os.path.exists("new_file"):
|
152 |
os.makedirs("new_file")
|
153 |
|
154 |
|
155 |
+
file_path = os.path.join("new_file", file_name)
|
156 |
+
try:
|
157 |
+
with open(file_path, "wb") as f:
|
158 |
+
f.write(file_data)
|
159 |
+
except Exception as e:
|
160 |
+
return f"Error saving file: {e}"
|
161 |
+
|
162 |
+
|
163 |
+
extracted_text = extract_text_from_file(file_path)
|
164 |
+
|
165 |
+
|
166 |
+
preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
|
167 |
+
return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"
|
168 |
+
|
169 |
+
|
170 |
+
|
171 |
file_path = os.path.join("new_file", file_name)
|
172 |
if hasattr(file, "read"):
|
173 |
content = file.read()
|