Spaces:
Running
Running
import gradio as gr | |
import sys | |
import requests | |
import zipfile | |
import io | |
import ast | |
def is_file_type(file_path, file_extension): | |
"""Check if the file has the specified file extension.""" | |
return file_path.endswith(file_extension) | |
def is_likely_useful_file(file_path, lang="python"): | |
"""Determine if the file is likely to be useful by excluding certain directories and specific file types.""" | |
excluded_dirs = ["docs", "examples", "tests", "test", "scripts", "utils", "benchmarks"] | |
utility_or_config_files = [] | |
github_workflow_or_docs = [".github", ".gitignore", "LICENSE"] | |
if lang == "python": | |
excluded_dirs.append("__pycache__") | |
utility_or_config_files.extend(["hubconf.py", "setup.py"]) | |
github_workflow_or_docs.extend(["stale.py", "gen-card-", "write_model_card"]) | |
elif lang == "go": | |
excluded_dirs.append("vendor") | |
utility_or_config_files.extend(["go.mod", "go.sum", "Makefile"]) | |
if any(part.startswith(".") for part in file_path.split("/")): | |
return False | |
if "test" in file_path.lower(): | |
return False | |
for excluded_dir in excluded_dirs: | |
if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"): | |
return False | |
for file_name in utility_or_config_files: | |
if file_name in file_path: | |
return False | |
for doc_file in github_workflow_or_docs: | |
if doc_file in file_path: | |
return False | |
return True | |
def is_test_file(file_content, lang): | |
"""Determine if the file content suggests it is a test file.""" | |
test_indicators = {"python": ["unittest", "pytest"], "go": ["testing"]}.get(lang, []) | |
if lang == "python": | |
try: | |
module = ast.parse(file_content) | |
for node in ast.walk(module): | |
if isinstance(node, ast.Import): | |
for alias in node.names: | |
if alias.name in test_indicators: | |
return True | |
elif isinstance(node, ast.ImportFrom): | |
if node.module in test_indicators: | |
return True | |
except SyntaxError: | |
pass | |
return False | |
def has_sufficient_content(file_content, min_line_count=10): | |
"""Check if the file has a minimum number of substantive lines.""" | |
lines = [line for line in file_content.split("\n") if line.strip() and not line.strip().startswith(("#", "//"))] | |
return len(lines) >= min_line_count | |
def remove_comments_and_docstrings(source): | |
"""Remove comments and docstrings from the Python source code.""" | |
tree = ast.parse(source) | |
for node in ast.walk(tree): | |
if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node): | |
node.body = node.body[1:] # Remove docstring | |
elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): | |
node.value.s = "" # Remove comments | |
return ast.unparse(tree) | |
def download_repo(repo_url, branch_or_tag="master"): | |
"""Download and process files from a GitHub repository.""" | |
download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip" | |
lang = "python" | |
print(download_url) | |
response = requests.get(download_url) | |
if response.status_code == 200: | |
zip_file = zipfile.ZipFile(io.BytesIO(response.content)) | |
file_contents = "" | |
print(zip_file.namelist()) | |
for file_path in zip_file.namelist(): | |
# Skip directories, non-language files, less likely useful files, hidden directories, and test files | |
if file_path.endswith("/") or not is_file_type(file_path, ".py") or not is_likely_useful_file(file_path): | |
print("Dir or non-lang or useless:", file_path) | |
continue | |
file_content = zip_file.read(file_path).decode("utf-8") | |
# Skip test files based on content | |
if is_test_file(file_content, lang): | |
print("Test file:", file_path) | |
continue | |
print("Appending", file_path) | |
file_contents += f"// File: {file_path}\n" if lang == "go" else f"# File: {file_path}\n" | |
file_contents += file_content | |
file_contents += "\n\n" | |
return file_contents | |
else: | |
print(f"Failed to download the repository. Status code: {response.status_code}") | |
sys.exit(1) | |
def download_and_process(repo_url, branch_or_tag="master"): | |
file_contents = download_repo(repo_url, branch_or_tag) | |
return file_contents | |
iface = gr.Interface( | |
fn=download_and_process, | |
inputs=[ | |
gr.components.Textbox(label="GitHub Repository URL", value="https://github.com/cognitivecomputations/github2file"), | |
gr.components.Textbox(label="Branch or Tag", value="master"), | |
], | |
outputs=gr.components.Code( | |
label="Output File", | |
language="python", | |
interactive=True, | |
), | |
) | |
if __name__ == "__main__": | |
iface.launch() | |