dwb2023 commited on
Commit
2f3bed4
1 Parent(s): 4b78f6f

Update file_utils.py

Browse files
Files changed (1) hide show
  1. file_utils.py +26 -47
file_utils.py CHANGED
@@ -1,51 +1,30 @@
1
- import os
2
- import subprocess
3
- from file_utils import validate_file_types, get_file_summary, read_file_content, summarize_content
4
 
5
- def validate_url(url):
6
- return url.startswith('https://')
 
 
 
7
 
8
- def clone_repo(url, repo_dir, hf_token, hf_user):
9
- env = os.environ.copy()
10
- env['GIT_LFS_SKIP_SMUDGE'] = '1'
11
- token_url = url.replace('https://', f'https://{hf_user}:{hf_token}@')
12
- result = subprocess.run(["git", "clone", token_url, repo_dir], env=env, capture_output=True, text=True)
13
- if result.returncode != 0:
14
- return False, result.stderr
15
- return True, None
16
 
17
- def extract_repo_content(url, hf_token, hf_user):
18
- if not validate_url(url):
19
- return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": "Invalid URL"}]
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- repo_dir = "./temp_repo"
22
- if os.path.exists(repo_dir):
23
- subprocess.run(["rm", "-rf", repo_dir])
24
-
25
- success, error = clone_repo(url, repo_dir, hf_token, hf_user)
26
- if not success:
27
- return [{"header": {"name": "Error", "type": "error", "size": 0}, "content": f"Failed to clone repository: {error}"}]
28
-
29
- file_types = validate_file_types(repo_dir)
30
- extracted_content = []
31
- for file_path, file_type in file_types.items():
32
- file_summary = get_file_summary(file_path, file_type)
33
- content = {"header": file_summary}
34
-
35
- if file_type in SUPPORTED_FILE_TYPES and file_summary["size"] <= 32 * 1024:
36
- try:
37
- file_content = read_file_content(file_path)
38
- content["content"] = file_content
39
- content["summary"] = summarize_content(file_content)
40
- except Exception as e:
41
- content["content"] = f"Failed to read file content: {str(e)}"
42
- content["summary"] = ""
43
- else:
44
- content["content"] = "File too large or binary, content not captured."
45
- content["summary"] = ""
46
-
47
- extracted_content.append(content)
48
-
49
- subprocess.run(["rm", "-rf", repo_dir])
50
-
51
- return extracted_content
 
1
+ import gradio as gr
2
+ from repo_utils import extract_repo_content
3
+ from display_utils import format_output
4
 
5
+ # Extract and display function
6
+ def extract_and_display(url):
7
+ extracted_content = extract_repo_content(url, hf_token, hf_user)
8
+ formatted_output = format_output(extracted_content, url)
9
+ return formatted_output
10
 
11
+ app = gr.Blocks(theme="sudeepshouche/minimalist")
 
 
 
 
 
 
 
12
 
13
+ with app:
14
+ gr.Markdown("# Hugging Face Space / Model Repository Content Extractor")
15
+ url_input = gr.Textbox(label="https:// URL of Repository", placeholder="Enter the repository URL here OR select an example below...")
16
+ url_examples = gr.Examples(
17
+ examples=[
18
+ ["https://huggingface.co/spaces/big-vision/paligemma-hf"],
19
+ ["https://huggingface.co/google/paligemma-3b-mix-224"],
20
+ ["https://huggingface.co/microsoft/Phi-3-vision-128k-instruct"],
21
+ ["https://huggingface.co/llava-hf/llava-v1.6-mistral-7b-hf"]
22
+ ],
23
+ inputs=url_input
24
+ )
25
+ output_display = gr.Textbox(label="Extracted Repository Content", show_copy_button=True, lines=20, placeholder="Repository content will be extracted here...\n\nMetadata is captured for all files, but text content provided only for files less than 32 kb\n\n\n\nReview and search through the content here OR simply copy it for offline analysis!!. 🤖")
26
+ extract_button = gr.Button("Extract Content")
27
 
28
+ extract_button.click(fn=extract_and_display, inputs=url_input, outputs=output_display)
29
+
30
+ app.launch()