anasmkh commited on
Commit
769d8f2
·
verified ·
1 Parent(s): 5f94cf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -3
app.py CHANGED
@@ -76,25 +76,98 @@ def chat_with_ai(user_input, chat_history):
76
  def clear_history():
77
  return [], ""
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def upload_file(file):
80
  if file is None:
81
  return "No file uploaded!"
82
 
83
  if isinstance(file, list):
84
  file = file[0]
85
-
86
  if hasattr(file, 'name'):
87
  file_name = file.name
 
88
  elif isinstance(file, dict):
89
  file_name = file.get("name", "uploaded_file")
 
90
  else:
91
- file_name = "uploaded_file"
92
 
93
-
 
 
 
94
  if not os.path.exists("new_file"):
95
  os.makedirs("new_file")
96
 
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  file_path = os.path.join("new_file", file_name)
99
  if hasattr(file, "read"):
100
  content = file.read()
 
76
  def clear_history():
77
  return [], ""
78
 
79
+ import os
80
+ import PyPDF2
81
+ import docx
82
+ import pandas as pd
83
+
84
+ def extract_text_from_file(file_path):
85
+ """
86
+ Extracts text from the file based on its extension.
87
+ Supports: PDF, DOC/DOCX, TXT, XLS/XLSX.
88
+ """
89
+ ext = os.path.splitext(file_path)[1].lower()
90
+ text = ""
91
+
92
+ if ext == ".pdf":
93
+ try:
94
+ with open(file_path, "rb") as f:
95
+ pdf_reader = PyPDF2.PdfReader(f)
96
+ for page in pdf_reader.pages:
97
+ page_text = page.extract_text()
98
+ if page_text:
99
+ text += page_text + "\n"
100
+ except Exception as e:
101
+ text = f"Error processing PDF: {e}"
102
+
103
+ elif ext in [".doc", ".docx"]:
104
+ try:
105
+ doc = docx.Document(file_path)
106
+ text = "\n".join([para.text for para in doc.paragraphs])
107
+ except Exception as e:
108
+ text = f"Error processing Word document: {e}"
109
+
110
+ elif ext == ".txt":
111
+ try:
112
+ with open(file_path, "r", encoding="utf-8") as f:
113
+ text = f.read()
114
+ except Exception as e:
115
+ text = f"Error processing TXT file: {e}"
116
+
117
+ elif ext in [".xls", ".xlsx"]:
118
+ try:
119
+ # Read the first sheet of the Excel file
120
+ df = pd.read_excel(file_path)
121
+ # Convert the dataframe to CSV format (or any format you prefer)
122
+ text = df.to_csv(index=False)
123
+ except Exception as e:
124
+ text = f"Error processing Excel file: {e}"
125
+
126
+ else:
127
+ text = "Unsupported file type for text extraction."
128
+
129
+ return text
130
+
131
  def upload_file(file):
132
  if file is None:
133
  return "No file uploaded!"
134
 
135
  if isinstance(file, list):
136
  file = file[0]
137
+
138
  if hasattr(file, 'name'):
139
  file_name = file.name
140
+ file_data = file.read()
141
  elif isinstance(file, dict):
142
  file_name = file.get("name", "uploaded_file")
143
+ file_data = file.get("data")
144
  else:
145
+ return "Uploaded file format not recognized."
146
 
147
+ if file_data is None:
148
+ return "Uploaded file data not found!"
149
+
150
+
151
  if not os.path.exists("new_file"):
152
  os.makedirs("new_file")
153
 
154
 
155
+ file_path = os.path.join("new_file", file_name)
156
+ try:
157
+ with open(file_path, "wb") as f:
158
+ f.write(file_data)
159
+ except Exception as e:
160
+ return f"Error saving file: {e}"
161
+
162
+
163
+ extracted_text = extract_text_from_file(file_path)
164
+
165
+
166
+ preview = extracted_text[:200] + "..." if len(extracted_text) > 200 else extracted_text
167
+ return f"File {file_name} uploaded and processed successfully!\nExtracted text preview:\n{preview}"
168
+
169
+
170
+
171
  file_path = os.path.join("new_file", file_name)
172
  if hasattr(file, "read"):
173
  content = file.read()