bupa1018 commited on
Commit
e2a26e1
·
1 Parent(s): 128f251

Delete process_repo.py

Browse files
Files changed (1) hide show
  1. process_repo.py +0 -72
process_repo.py DELETED
@@ -1,72 +0,0 @@
1
- import os
2
- import tempfile
3
- import zipfile
4
-
5
- def extract_repo_files(directory, folder_paths, file_paths):
6
- all_texts = []
7
- file_references = []
8
-
9
- zip_filename = next((file for file in os.listdir(directory) if file.endswith('.zip')), None)
10
- zip_file_path = os.path.join(directory, zip_filename)
11
- print(zip_file_path)
12
-
13
- with tempfile.TemporaryDirectory() as tmpdirname:
14
- # Unzip the file into the temporary directory
15
- with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
16
- zip_ref.extractall(tmpdirname)
17
-
18
- files = []
19
- print("tmpdirname: " , tmpdirname)
20
- unzipped_root = os.listdir(tmpdirname)
21
- print("unzipped_root ", unzipped_root)
22
-
23
- tmpsubdirpath= os.path.join(tmpdirname, unzipped_root[0])
24
- print("tempsubdirpath: ", tmpsubdirpath)
25
-
26
- if folder_paths:
27
- for folder_path in folder_paths:
28
- files += _get_all_files_in_folder(tmpsubdirpath, folder_path)
29
- if file_paths:
30
- files += [_get_file(tmpsubdirpath, file_path) for file_path in file_paths]
31
-
32
-
33
- print(f"Total number of files: {len(files)}")
34
-
35
- for file_path in files:
36
- # print("111111111:", file_path)
37
- file_ext = os.path.splitext(file_path)[1]
38
- # print("222222222:", file_ext)
39
- if os.path.getsize(file_path) == 0:
40
- print(f"Skipping an empty file: {file_path}")
41
- continue
42
-
43
- with open(file_path, 'rb') as f:
44
- if file_ext in ['.rst', '.py']:
45
- text = f.read().decode('utf-8')
46
-
47
- all_texts.append(text)
48
- print("Filepaths brother:", file_path)
49
- relative_path = os.path.relpath(file_path, tmpsubdirpath)
50
- print("Relative Filepaths brother:", relative_path)
51
- file_references.append(relative_path)
52
-
53
- return all_texts, file_references
54
-
55
-
56
-
57
- def _get_all_files_in_folder(temp_dir, folder_path):
58
-
59
- all_files = []
60
- target_dir = os.path.join(temp_dir, folder_path)
61
-
62
- for root, dirs, files in os.walk(target_dir):
63
- print(f"Files in current directory ({root}): {files}")
64
- for file in files:
65
- print(f"Processing file: {file}")
66
- all_files.append(os.path.join(root, file))
67
-
68
- return all_files
69
-
70
- def _get_file(temp_dir, file_path):
71
- full_path = os.path.join(temp_dir, file_path)
72
- return full_path