bupa1018 commited on
Commit
f3a8770
·
1 Parent(s): c923abc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -35
app.py CHANGED
@@ -91,7 +91,7 @@ def download_gitlab_repo():
91
  print("Upload complete")
92
 
93
 
94
- def extract_single_zip_file(directory):
95
  zip_files = [file for file in os.listdir(directory) if file.endswith('.zip')]
96
 
97
  if not zip_files:
@@ -117,40 +117,49 @@ def process_directory(directory):
117
  all_texts = []
118
  file_references = []
119
 
120
- directory_path = extract_single_zip_file(directory)
121
- has_files = any(os.path.isfile(os.path.join(directory_path, file)) for file in os.listdir(directory_path))
122
- os.listdir(directory_path))
123
- if not has_files:
124
- print(f"No files found in the directory: {directory_path}")
125
- return all_texts,
126
- for root, _, files in os.walk(directory_path):
127
- for file in files:
128
- print(f"Any files??: {file}")
129
- file_path = os.path.join(root, file)
130
- file_ext = os.path.splitext(file_path)[1]
131
-
132
- if os.path.getsize(file_path) == 0:
133
- print(f"Skipping an empty file: {file_path}")
134
- continue
135
-
136
- with open(file_path, 'rb') as f:
137
- if file_ext in ['.rst', '.md', '.txt', '.html', '.json', '.yaml', '.py']:
138
- text = f.read().decode('utf-8')
139
- print(f"Extracted text from {file_path}:\n{text[:200]}...\n")
140
- elif file_ext == '.pdf':
141
- reader = PdfReader(f)
142
- text = ""
143
- for page in reader.pages:
144
- text += page.extract_text()
145
- elif file_ext in ['.svg']:
146
- text = f"SVG file content from {file_path}"
147
- elif file_ext in ['.png', '.ico']:
148
- text = f"Image metadata from {file_path}"
149
- else:
150
- continue
151
-
152
- all_texts.append(text)
153
- file_references.append(file_path)
 
 
 
 
 
 
 
 
 
154
 
155
  print(f"All extracted texts:\n{all_texts}")
156
  return all_texts, file_references
 
91
  print("Upload complete")
92
 
93
 
94
+ #def extract_single_zip_file(directory):
95
  zip_files = [file for file in os.listdir(directory) if file.endswith('.zip')]
96
 
97
  if not zip_files:
 
117
  all_texts = []
118
  file_references = []
119
 
120
+ zip_files = [file for file in os.listdir(directory) if file.endswith('.zip')]
121
+
122
+ if not zip_files:
123
+ print("No zip file found in the directory.")
124
+ return all_texts, file_references
125
+
126
+ if len(zip_files) > 1:
127
+ print("More than one zip file found.")
128
+ return all_texts, file_references
129
+ else:
130
+ zip_file_path = os.path.join(directory, zip_files[0])
131
+
132
+ # Create a temporary directory for the zip file
133
+ with tempfile.TemporaryDirectory() as tmpdirname:
134
+ # Unzip the file into the temporary directory
135
+ with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
136
+ zip_ref.extractall(tmpdirname)
137
+ print(f"Extracted {zip_file_path} to {tmpdirname}")
138
+
139
+ # Process the files in the temporary directory
140
+ for root, _, files in os.walk(tmpdirname):
141
+ for file in files:
142
+ print(f"Any files??: {file}")
143
+ file_path = os.path.join(root, file)
144
+ file_ext = os.path.splitext(file_path)[1]
145
+
146
+ if os.path.getsize(file_path) == 0:
147
+ print(f"Skipping an empty file: {file_path}")
148
+ continue
149
+
150
+ with open(file_path, 'rb') as f:
151
+ if file_ext in ['.rst', '.md', '.txt', '.html', '.json', '.yaml', '.py']:
152
+ text = f.read().decode('utf-8')
153
+ print(f"Extracted text from {file_path}:\n{text[:200]}...\n")
154
+ elif file_ext in ['.svg']:
155
+ text = f"SVG file content from {file_path}"
156
+ elif file_ext in ['.png', '.ico']:
157
+ text = f"Image metadata from {file_path}"
158
+ else:
159
+ continue
160
+
161
+ all_texts.append(text)
162
+ file_references.append(file_path)
163
 
164
  print(f"All extracted texts:\n{all_texts}")
165
  return all_texts, file_references