dwb2023 commited on
Commit
98d1d12
1 Parent(s): bbd42f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -31
app.py CHANGED
@@ -1,44 +1,53 @@
1
- import requests
 
2
  import gradio as gr
3
- import json
4
 
5
- def get_file_summary(file_info):
 
 
 
 
 
 
 
 
 
 
6
  return {
7
- "name": file_info['path'],
8
- "type": "binary" if file_info['size'] > 1024 * 1024 else "text",
9
- "size": file_info['size'],
10
  }
11
 
 
 
 
 
12
  def extract_repo_content(url):
13
- if "huggingface.co" not in url:
14
- return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": "Invalid URL. Please provide a valid Hugging Face URL."}]
15
-
16
- repo_name = url.split('/')[-2]
17
- repo_type = url.split('/')[-3]
18
- api_url = f"https://huggingface.co/api/{repo_type}/{repo_name}/tree/main"
19
 
20
- response = requests.get(api_url)
21
- if response.status_code != 200:
22
- return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": f"Failed to fetch repository content. Status code: {response.status_code}"}]
23
 
24
- repo_content = response.json()
25
  extracted_content = []
26
-
27
- for file_info in repo_content:
28
- file_summary = get_file_summary(file_info)
29
- content = {"header": file_summary}
30
-
31
- if file_summary["type"] == "text" and file_summary["size"] <= 1024 * 1024:
32
- file_url = f"https://huggingface.co/{repo_type}/{repo_name}/resolve/main/{file_info['path']}"
33
- file_response = requests.get(file_url)
34
- if file_response.status_code == 200:
35
- content["content"] = file_response.text
 
36
  else:
37
- content["content"] = "Failed to fetch file content."
38
- else:
39
- content["content"] = "File too large or binary, content not captured."
40
-
41
- extracted_content.append(content)
42
 
43
  return extracted_content
44
 
 
1
+ import os
2
+ import subprocess
3
  import gradio as gr
 
4
 
5
+ def clone_repo(url, repo_dir):
6
+ env = os.environ.copy()
7
+ env['GIT_LFS_SKIP_SMUDGE'] = '1'
8
+ result = subprocess.run(["git", "clone", url, repo_dir], env=env, capture_output=True, text=True)
9
+ if result.returncode != 0:
10
+ return False, result.stderr
11
+ return True, None
12
+
13
+ def get_file_summary(file_path):
14
+ size = os.path.getsize(file_path)
15
+ file_type = "binary" if size > 1024 * 1024 else "text"
16
  return {
17
+ "name": os.path.relpath(file_path),
18
+ "type": file_type,
19
+ "size": size,
20
  }
21
 
22
+ def read_file_content(file_path):
23
+ with open(file_path, "r") as file:
24
+ return file.read()
25
+
26
  def extract_repo_content(url):
27
+ repo_dir = "./temp_repo"
28
+ if os.path.exists(repo_dir):
29
+ subprocess.run(["rm", "-rf", repo_dir])
 
 
 
30
 
31
+ success, error = clone_repo(url, repo_dir)
32
+ if not success:
33
+ return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": error}]
34
 
 
35
  extracted_content = []
36
+ for root, _, files in os.walk(repo_dir):
37
+ for file in files:
38
+ file_path = os.path.join(root, file)
39
+ file_summary = get_file_summary(file_path)
40
+ content = {"header": file_summary}
41
+
42
+ if file_summary["type"] == "text" and file_summary["size"] <= 1024 * 1024:
43
+ try:
44
+ content["content"] = read_file_content(file_path)
45
+ except Exception as e:
46
+ content["content"] = f"Failed to read file content: {str(e)}"
47
  else:
48
+ content["content"] = "File too large or binary, content not captured."
49
+
50
+ extracted_content.append(content)
 
 
51
 
52
  return extracted_content
53