Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import os
|
2 |
import subprocess
|
3 |
import gradio as gr
|
|
|
4 |
|
5 |
def clone_repo(url, repo_dir):
|
6 |
env = os.environ.copy()
|
@@ -23,6 +24,23 @@ def read_file_content(file_path):
|
|
23 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
24 |
return file.read()
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def extract_repo_content(url):
|
27 |
repo_dir = "./temp_repo"
|
28 |
if os.path.exists(repo_dir):
|
@@ -32,24 +50,21 @@ def extract_repo_content(url):
|
|
32 |
if not success:
|
33 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": error}]
|
34 |
|
|
|
35 |
extracted_content = []
|
36 |
-
for
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
else:
|
50 |
-
content["content"] = "File too large or binary, content not captured."
|
51 |
-
|
52 |
-
extracted_content.append(content)
|
53 |
|
54 |
return extracted_content
|
55 |
|
|
|
1 |
import os
|
2 |
import subprocess
|
3 |
import gradio as gr
|
4 |
+
from magika import Magika
|
5 |
|
6 |
def clone_repo(url, repo_dir):
|
7 |
env = os.environ.copy()
|
|
|
24 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
25 |
return file.read()
|
26 |
|
27 |
+
def validate_file_types(directory):
|
28 |
+
m = Magika()
|
29 |
+
file_types = {}
|
30 |
+
for root, _, files in os.walk(directory):
|
31 |
+
if '.git' in root:
|
32 |
+
continue
|
33 |
+
for file_name in files:
|
34 |
+
file_path = os.path.join(root, file_name)
|
35 |
+
try:
|
36 |
+
with open(file_path, 'rb') as file:
|
37 |
+
file_bytes = file.read()
|
38 |
+
result = m.identify_bytes(file_bytes)
|
39 |
+
file_types[file_path] = result.output.ct_label
|
40 |
+
except Exception as e:
|
41 |
+
file_types[file_path] = f"Error: {str(e)}"
|
42 |
+
return file_types
|
43 |
+
|
44 |
def extract_repo_content(url):
|
45 |
repo_dir = "./temp_repo"
|
46 |
if os.path.exists(repo_dir):
|
|
|
50 |
if not success:
|
51 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": error}]
|
52 |
|
53 |
+
file_types = validate_file_types(repo_dir)
|
54 |
extracted_content = []
|
55 |
+
for file_path, file_type in file_types.items():
|
56 |
+
file_summary = get_file_summary(file_path)
|
57 |
+
content = {"header": file_summary}
|
58 |
+
|
59 |
+
if file_type.startswith("text") and file_summary["size"] <= 1024 * 1024:
|
60 |
+
try:
|
61 |
+
content["content"] = read_file_content(file_path)
|
62 |
+
except Exception as e:
|
63 |
+
content["content"] = f"Failed to read file content: {str(e)}"
|
64 |
+
else:
|
65 |
+
content["content"] = "File too large or binary, content not captured."
|
66 |
+
|
67 |
+
extracted_content.append(content)
|
|
|
|
|
|
|
|
|
68 |
|
69 |
return extracted_content
|
70 |
|