File size: 2,640 Bytes
fc98e50
d37481c
 
fc98e50
bdd0183
781ec79
 
 
 
 
776b230
781ec79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import os 
import tempfile  
import zipfile  

def extract_repo_files(directory, folder_paths, file_paths):
    all_texts = []
    file_references = []

    zip_filename = next((file for file in os.listdir(directory) if file.endswith('.zip')), None)
    zip_file_path = os.path.join(directory, zip_filename)
    print(zip_file_path)

    with tempfile.TemporaryDirectory() as tmpdirname:
        # Unzip the file into the temporary directory
        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
            zip_ref.extractall(tmpdirname)
            
            files = []
            print("tmpdirname: " , tmpdirname)   
            unzipped_root = os.listdir(tmpdirname)
            print("unzipped_root ", unzipped_root)

            tmpsubdirpath= os.path.join(tmpdirname, unzipped_root[0])  
            print("tempsubdirpath: ", tmpsubdirpath)

            if folder_paths:
                for folder_path in folder_paths:
                    files += _get_all_files_in_folder(tmpsubdirpath, folder_path) 
            if file_paths:
                files += [_get_file(tmpsubdirpath, file_path) for file_path in file_paths] 

            
            print(f"Total number of files: {len(files)}")
            
            for file_path in files:
              #  print("111111111:", file_path)    
                file_ext = os.path.splitext(file_path)[1]
             #   print("222222222:", file_ext)
                if os.path.getsize(file_path) == 0:
                    print(f"Skipping an empty file: {file_path}")
                    continue

                with open(file_path, 'rb') as f:
                    if file_ext in ['.rst', '.py']:
                        text = f.read().decode('utf-8')
                    
                        all_texts.append(text)
                        print("Filepaths brother:", file_path)
                        relative_path = os.path.relpath(file_path, tmpsubdirpath)
                        print("Relative Filepaths brother:", relative_path)
                        file_references.append(relative_path)
                    
    return all_texts, file_references



def _get_all_files_in_folder(temp_dir, folder_path):
    
    all_files = [] 
    target_dir = os.path.join(temp_dir, folder_path)

    for root, dirs, files in os.walk(target_dir):
        print(f"Files in current directory ({root}): {files}")
        for file in files:
            print(f"Processing file: {file}")
            all_files.append(os.path.join(root, file))

    return all_files

def _get_file(temp_dir, file_path):
    full_path = os.path.join(temp_dir, file_path)
    return full_path