import gradio as gr import sys import requests import zipfile import io import ast def is_file_type(file_path, file_extension): """Check if the file has the specified file extension.""" return file_path.endswith(file_extension) def is_likely_useful_file(file_path, lang="python"): """Determine if the file is likely to be useful by excluding certain directories and specific file types.""" excluded_dirs = ["docs", "examples", "tests", "test", "scripts", "utils", "benchmarks"] utility_or_config_files = [] github_workflow_or_docs = [".github", ".gitignore", "LICENSE"] if lang == "python": excluded_dirs.append("__pycache__") utility_or_config_files.extend(["hubconf.py", "setup.py"]) github_workflow_or_docs.extend(["stale.py", "gen-card-", "write_model_card"]) elif lang == "go": excluded_dirs.append("vendor") utility_or_config_files.extend(["go.mod", "go.sum", "Makefile"]) if any(part.startswith(".") for part in file_path.split("/")): return False if "test" in file_path.lower(): return False for excluded_dir in excluded_dirs: if f"/{excluded_dir}/" in file_path or file_path.startswith(excluded_dir + "/"): return False for file_name in utility_or_config_files: if file_name in file_path: return False for doc_file in github_workflow_or_docs: if doc_file in file_path: return False return True def is_test_file(file_content, lang): """Determine if the file content suggests it is a test file.""" test_indicators = {"python": ["unittest", "pytest"], "go": ["testing"]}.get(lang, []) if lang == "python": try: module = ast.parse(file_content) for node in ast.walk(module): if isinstance(node, ast.Import): for alias in node.names: if alias.name in test_indicators: return True elif isinstance(node, ast.ImportFrom): if node.module in test_indicators: return True except SyntaxError: pass return False def has_sufficient_content(file_content, min_line_count=10): """Check if the file has a minimum number of substantive lines.""" lines = [line for line in file_content.split("\n") if line.strip() and not line.strip().startswith(("#", "//"))] return len(lines) >= min_line_count def remove_comments_and_docstrings(source): """Remove comments and docstrings from the Python source code.""" tree = ast.parse(source) for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.ClassDef, ast.AsyncFunctionDef)) and ast.get_docstring(node): node.body = node.body[1:] # Remove docstring elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): node.value.s = "" # Remove comments return ast.unparse(tree) def download_repo(repo_url, branch_or_tag="master"): """Download and process files from a GitHub repository.""" download_url = f"{repo_url}/archive/refs/heads/{branch_or_tag}.zip" lang = "python" print(download_url) response = requests.get(download_url) if response.status_code == 200: zip_file = zipfile.ZipFile(io.BytesIO(response.content)) file_contents = "" print(zip_file.namelist()) for file_path in zip_file.namelist(): # Skip directories, non-language files, less likely useful files, hidden directories, and test files if file_path.endswith("/") or not is_file_type(file_path, ".py") or not is_likely_useful_file(file_path): print("Dir or non-lang or useless:", file_path) continue file_content = zip_file.read(file_path).decode("utf-8") # Skip test files based on content if is_test_file(file_content, lang): print("Test file:", file_path) continue print("Appending", file_path) file_contents += f"// File: {file_path}\n" if lang == "go" else f"# File: {file_path}\n" file_contents += file_content file_contents += "\n\n" return file_contents else: print(f"Failed to download the repository. Status code: {response.status_code}") sys.exit(1) def download_and_process(repo_url, branch_or_tag="master"): file_contents = download_repo(repo_url, branch_or_tag) return file_contents iface = gr.Interface( fn=download_and_process, inputs=[ gr.components.Textbox(label="GitHub Repository URL", value="https://github.com/cognitivecomputations/github2file"), gr.components.Textbox(label="Branch or Tag", value="master"), ], outputs=gr.components.Code( label="Output File", language="python", interactive=True, ), ) if __name__ == "__main__": iface.launch()