Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -13,10 +13,7 @@ if not hf_token:
|
|
13 |
if not hf_user:
|
14 |
raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
|
15 |
|
16 |
-
|
17 |
-
# login(token=hf_token, add_to_git_credential=True)
|
18 |
-
|
19 |
-
SUPPORTED_FILE_TYPES = ["txt", "shell", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html", "ini"]
|
20 |
|
21 |
def validate_url(url):
|
22 |
return url.startswith('https://')
|
@@ -24,7 +21,6 @@ def validate_url(url):
|
|
24 |
def clone_repo(url, repo_dir, hf_token, hf_user):
|
25 |
env = os.environ.copy()
|
26 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
27 |
-
# Construct the Git URL with the token and author name for authentication
|
28 |
token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
|
29 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
30 |
if result.returncode != 0:
|
@@ -37,11 +33,16 @@ def get_file_summary(file_path, file_type):
|
|
37 |
"name": os.path.relpath(file_path),
|
38 |
"type": file_type,
|
39 |
"size": size,
|
|
|
|
|
40 |
}
|
41 |
|
42 |
-
def read_file_content(file_path):
|
43 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
44 |
-
|
|
|
|
|
|
|
45 |
|
46 |
def validate_file_types(directory):
|
47 |
m = Magika()
|
@@ -88,7 +89,6 @@ def extract_repo_content(url, hf_token, hf_user):
|
|
88 |
|
89 |
extracted_content.append(content)
|
90 |
|
91 |
-
# Cleanup temporary directory
|
92 |
subprocess.run(["rm", "-rf", repo_dir])
|
93 |
|
94 |
return extracted_content
|
@@ -100,6 +100,8 @@ def format_output(extracted_content, repo_url):
|
|
100 |
formatted_output += f"### File: {file_data['header']['name']}\n"
|
101 |
formatted_output += f"**Type:** {file_data['header']['type']}\n"
|
102 |
formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
|
|
|
|
|
103 |
formatted_output += "#### Content:\n"
|
104 |
formatted_output += f"```\n{file_data['content']}\n```\n\n"
|
105 |
else:
|
@@ -130,4 +132,4 @@ with app:
|
|
130 |
|
131 |
extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
|
132 |
|
133 |
-
app.launch()
|
|
|
13 |
if not hf_user:
|
14 |
raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
|
15 |
|
16 |
+
SUPPORTED_FILE_TYPES = ["txt", "shell", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html", "ini", "jsonl", "ipynb"]
|
|
|
|
|
|
|
17 |
|
18 |
def validate_url(url):
|
19 |
return url.startswith('https://')
|
|
|
21 |
def clone_repo(url, repo_dir, hf_token, hf_user):
|
22 |
env = os.environ.copy()
|
23 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
|
|
24 |
token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
|
25 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
26 |
if result.returncode != 0:
|
|
|
33 |
"name": os.path.relpath(file_path),
|
34 |
"type": file_type,
|
35 |
"size": size,
|
36 |
+
"creation_date": os.path.getctime(file_path),
|
37 |
+
"modification_date": os.path.getmtime(file_path)
|
38 |
}
|
39 |
|
40 |
+
def read_file_content(file_path, max_size=32*1024):
|
41 |
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
|
42 |
+
if os.path.getsize(file_path) > max_size:
|
43 |
+
return file.read(max_size) + "\n... [Content Truncated] ..."
|
44 |
+
else:
|
45 |
+
return file.read()
|
46 |
|
47 |
def validate_file_types(directory):
|
48 |
m = Magika()
|
|
|
89 |
|
90 |
extracted_content.append(content)
|
91 |
|
|
|
92 |
subprocess.run(["rm", "-rf", repo_dir])
|
93 |
|
94 |
return extracted_content
|
|
|
100 |
formatted_output += f"### File: {file_data['header']['name']}\n"
|
101 |
formatted_output += f"**Type:** {file_data['header']['type']}\n"
|
102 |
formatted_output += f"**Size:** {file_data['header']['size']} bytes\n"
|
103 |
+
formatted_output += f"**Created:** {file_data['header']['creation_date']}\n"
|
104 |
+
formatted_output += f"**Modified:** {file_data['header']['modification_date']}\n"
|
105 |
formatted_output += "#### Content:\n"
|
106 |
formatted_output += f"```\n{file_data['content']}\n```\n\n"
|
107 |
else:
|
|
|
132 |
|
133 |
extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
|
134 |
|
135 |
+
app.launch()
|