Spaces:
Sleeping
Sleeping
File size: 4,996 Bytes
dbc6e65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import gradio as gr
import sys
import requests
import zipfile
import io
import ast
def is_file_type(file_path, file_extension):
"""Check if the file has the specified file extension."""
return file_path.endswith(file_extension)
def is_likely_useful_file(file_path, lang="python"):
"""Determine if the file is likely to be useful by excluding certain directories and specific file types."""
excluded_dirs = ["docs", "examples", "tests", "test", "scripts", "utils", "benchmarks"]
utility_or_config_files = []
github_workflow_or_docs = [".github", ".gitignore", "LICENSE"]
if lang == "python":
excluded_dirs.append("__pycache__")
utility_or_config_files.extend(["hubconf.py", "setup.py"])
github_workflow_or_docs.extend(["stale.py", "gen-card-", "write_model_card"])
elif lang == "go":
excluded_dirs.append("vendor")
utility_or_config_files.extend(["go.mod", "go.sum", "Makefile"])
if any(part.startswith(".") for part in file_path.split("/")):
return False
if "test" in file_path.lower():
return False
for excluded_dir in excluded_dirs:
if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"):
return False
for file_name in utility_or_config_files:
if file_name in file_path:
return False
for doc_file in github_workflow_or_docs:
if doc_file in file_path:
return False
return True
def is_test_file(file_content, lang):
"""Determine if the file content suggests it is a test file."""
test_indicators = {"python": ["unittest", "pytest"], "go": ["testing"]}.get(lang, [])
if lang == "python":
try:
module = ast.parse(file_content)
for node in ast.walk(module):
if isinstance(node, ast.Import):
for alias in node.names:
if alias.name in test_indicators:
return True
elif isinstance(node, ast.ImportFrom):
if node.module in test_indicators:
return True
except SyntaxError:
pass
return False
def has_sufficient_content(file_content, min_line_count=10):
"""Check if the file has a minimum number of substantive lines."""
lines = [line for line in file_content.split("\n") if line.strip() and not line.strip().startswith(("#", "//"))]
return len(lines) >= min_line_count
def remove_comments_and_docstrings(source):
"""Remove comments and docstrings from the Python source code."""
tree = ast.parse(source)
for node in ast.walk(tree):
if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node):
node.body = node.body[1:] # Remove docstring
elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str):
node.value.s = "" # Remove comments
return ast.unparse(tree)
def download_repo(repo_url, branch_or_tag="master"):
"""Download and process files from a GitHub repository."""
download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip"
lang = "python"
print(download_url)
response = requests.get(download_url)
if response.status_code == 200:
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
file_contents = ""
print(zip_file.namelist())
for file_path in zip_file.namelist():
# Skip directories, non-language files, less likely useful files, hidden directories, and test files
if file_path.endswith("/") or not is_file_type(file_path, ".py") or not is_likely_useful_file(file_path):
print("Dir or non-lang or useless:", file_path)
continue
file_content = zip_file.read(file_path).decode("utf-8")
# Skip test files based on content
if is_test_file(file_content, lang):
print("Test file:", file_path)
continue
print("Appending", file_path)
file_contents += f"// File: {file_path}\n" if lang == "go" else f"# File: {file_path}\n"
file_contents += file_content
file_contents += "\n\n"
return file_contents
else:
print(f"Failed to download the repository. Status code: {response.status_code}")
sys.exit(1)
def download_and_process(repo_url, branch_or_tag="master"):
file_contents = download_repo(repo_url, branch_or_tag)
return file_contents
iface = gr.Interface(
fn=download_and_process,
inputs=[
gr.components.Textbox(label="GitHub Repository URL", value="https://github.com/cognitivecomputations/github2file"),
gr.components.Textbox(label="Branch or Tag", value="master"),
],
outputs=gr.components.Code(
label="Output File",
language="python",
interactive=True,
),
)
if __name__ == "__main__":
iface.launch()
|