Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -4,22 +4,28 @@ import gradio as gr
|
|
4 |
from magika import Magika
|
5 |
from huggingface_hub import login
|
6 |
|
7 |
-
# Get the HF token from environment variables
|
8 |
hf_token = os.getenv("HF_TOKEN")
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
raise ValueError("HF_TOKEN environment variable is not set")
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
SUPPORTED_FILE_TYPES = ["txt", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html"]
|
15 |
|
16 |
def validate_url(url):
|
17 |
return url.startswith('https://')
|
18 |
|
19 |
-
def clone_repo(url, repo_dir,
|
20 |
env = os.environ.copy()
|
21 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
22 |
-
|
|
|
23 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
24 |
if result.returncode != 0:
|
25 |
return False, result.stderr
|
@@ -54,7 +60,7 @@ def validate_file_types(directory):
|
|
54 |
file_types[file_path] = f"Error: {str(e)}"
|
55 |
return file_types
|
56 |
|
57 |
-
def extract_repo_content(url,
|
58 |
if not validate_url(url):
|
59 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": "Invalid URL"}]
|
60 |
|
@@ -62,7 +68,7 @@ def extract_repo_content(url, token):
|
|
62 |
if os.path.exists(repo_dir):
|
63 |
subprocess.run(["rm", "-rf", repo_dir])
|
64 |
|
65 |
-
success, error = clone_repo(url, repo_dir,
|
66 |
if not success:
|
67 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": f"Failed to clone repository: {error}"}]
|
68 |
|
@@ -101,7 +107,7 @@ def format_output(extracted_content, repo_url):
|
|
101 |
return formatted_output
|
102 |
|
103 |
def extract_and_display(url):
|
104 |
-
extracted_content = extract_repo_content(url, hf_token)
|
105 |
formatted_output = format_output(extracted_content, url)
|
106 |
return formatted_output
|
107 |
|
|
|
4 |
from magika import Magika
|
5 |
from huggingface_hub import login
|
6 |
|
7 |
+
# Get the HF token and space author name from environment variables
|
8 |
hf_token = os.getenv("HF_TOKEN")
|
9 |
+
hf_user = os.getenv("SPACE_AUTHOR_NAME")
|
10 |
+
|
11 |
+
if not hf_token:
|
12 |
raise ValueError("HF_TOKEN environment variable is not set")
|
13 |
+
if not hf_user:
|
14 |
+
raise ValueError("SPACE_AUTHOR_NAME environment variable is not set")
|
15 |
+
|
16 |
+
# Perform login using the token
|
17 |
+
login(token=hf_token, add_to_git_credential=True)
|
18 |
|
19 |
SUPPORTED_FILE_TYPES = ["txt", "python", "markdown", "yaml", "json", "csv", "tsv", "xml", "html"]
|
20 |
|
21 |
def validate_url(url):
|
22 |
return url.startswith('https://')
|
23 |
|
24 |
+
def clone_repo(url, repo_dir, hf_token, hf_user):
|
25 |
env = os.environ.copy()
|
26 |
env['GIT_LFS_SKIP_SMUDGE'] = '1'
|
27 |
+
# Construct the Git URL with the token and author name for authentication
|
28 |
+
token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
|
29 |
result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
|
30 |
if result.returncode != 0:
|
31 |
return False, result.stderr
|
|
|
60 |
file_types[file_path] = f"Error: {str(e)}"
|
61 |
return file_types
|
62 |
|
63 |
+
def extract_repo_content(url, hf_token, hf_user):
|
64 |
if not validate_url(url):
|
65 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": "Invalid URL"}]
|
66 |
|
|
|
68 |
if os.path.exists(repo_dir):
|
69 |
subprocess.run(["rm", "-rf", repo_dir])
|
70 |
|
71 |
+
success, error = clone_repo(url, repo_dir, hf_token, hf_user)
|
72 |
if not success:
|
73 |
return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": f"Failed to clone repository: {error}"}]
|
74 |
|
|
|
107 |
return formatted_output
|
108 |
|
109 |
def extract_and_display(url):
|
110 |
+
extracted_content = extract_repo_content(url, hf_token, hf_user)
|
111 |
formatted_output = format_output(extracted_content, url)
|
112 |
return formatted_output
|
113 |
|