Spaces:
Sleeping
Sleeping
import json | |
import logging | |
import os | |
import subprocess | |
import sys | |
import tempfile | |
import time | |
from pathlib import Path | |
from typing import Dict | |
import requests | |
GITHUB_USERNAME = os.getenv("GITHUB_USERNAME") | |
GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_TOKEN") | |
GITHUB_EMAIL = os.getenv("GITHUB_EMAIL") | |
GITHUB_ORG = os.getenv("MAI_GITHUB_ORG") | |
MAI_TM_PUBLISH_TODO_REPO = os.environ["MAI_TMS_PUBLISH_TODO_REPO"] | |
GITHUB_API_ENDPOINT = f"https://api.github.com/orgs/{GITHUB_ORG}/repos" | |
DEBUG = os.getenv("DEBUG", False) | |
quiet = "-q" if DEBUG else "" | |
def create_github_repo(repo_path: Path, repo_name: str): | |
logging.info("[INFO] Creating GitHub repo...") | |
# configure git users | |
subprocess.run(f"git config --global user.name {GITHUB_USERNAME}".split()) | |
subprocess.run(f"git config --global user.email {GITHUB_EMAIL}".split()) | |
# Initialize a Git repository | |
subprocess.run(f"git init {quiet}".split(), cwd=str(repo_path)) | |
# Commit the changes | |
subprocess.run("git add . ".split(), cwd=str(repo_path)) | |
subprocess.run( | |
f"git commit {quiet} -m".split() + ["Initial commit"], cwd=str(repo_path) | |
) | |
# Create a new repository on GitHub | |
response = requests.post( | |
GITHUB_API_ENDPOINT, | |
json={ | |
"name": repo_name, | |
"private": True, | |
}, | |
auth=(GITHUB_USERNAME, GITHUB_ACCESS_TOKEN), | |
) | |
response.raise_for_status() | |
time.sleep(3) | |
# Add the GitHub remote to the local Git repository and push the changes | |
remote_url = f"https://{GITHUB_ORG}:{GITHUB_ACCESS_TOKEN}@github.com/{GITHUB_ORG}/{repo_name}.git" | |
subprocess.run( | |
f"git remote add origin {remote_url}", cwd=str(repo_path), shell=True | |
) | |
# rename default branch to main | |
subprocess.run("git branch -M main".split(), cwd=str(repo_path)) | |
subprocess.run(f"git push {quiet} -u origin main".split(), cwd=str(repo_path)) | |
return response.json()["html_url"] | |
def convert_raw_align_to_tm(align_fn: Path, tm_path: Path): | |
if DEBUG: | |
logging.debug("[INFO] Conerting raw alignment to TM repo...") | |
def load_alignment(fn: Path): | |
content = fn.read_text() | |
if not content: | |
return [] | |
for seg_pair in content.splitlines(): | |
if not seg_pair: | |
continue | |
if "\t" in seg_pair: | |
try: | |
bo_seg, en_seg = seg_pair.split("\t", 1) | |
except Exception as e: | |
logging.error(f"{e} in {fn}") | |
raise | |
else: | |
bo_seg = seg_pair | |
en_seg = "\n" | |
yield bo_seg, en_seg | |
text_bo_fn = tm_path / f"{tm_path.name}-bo.txt" | |
text_en_fn = tm_path / f"{tm_path.name}-en.txt" | |
with open(text_bo_fn, "w", encoding="utf-8") as bo_file, open( | |
text_en_fn, "w", encoding="utf-8" | |
) as en_file: | |
for bo_seg, en_seg in load_alignment(align_fn): | |
bo_file.write(bo_seg + "\n") | |
en_file.write(en_seg + "\n") | |
return tm_path | |
def get_github_dev_url(raw_github_url: str) -> str: | |
base_url = "https://github.dev" | |
_, file_path = raw_github_url.split(".com") | |
blob_file_path = file_path.replace("main", "blob/main") | |
return base_url + blob_file_path | |
def add_input_in_readme(input_dict: Dict[str, str], path: Path) -> Path: | |
input_readme_fn = path / "README.md" | |
text_id = input_dict["text_id"] | |
bo_file_url = get_github_dev_url(input_dict["bo_file_url"]) | |
en_file_url = get_github_dev_url(input_dict["en_file_url"]) | |
input_string = "## Input\n- [BO{}]({})\n- [EN{}]({})".format( | |
text_id, bo_file_url, text_id, en_file_url | |
) | |
input_readme_fn.write_text(input_string) | |
return path | |
def add_to_publish_todo_repo(org, repo_name, file_path, access_token): | |
base_url = f"https://api.github.com/repos/{org}/{repo_name}/contents/" | |
headers = { | |
"Authorization": f"Bearer {access_token}", | |
"Accept": "application/vnd.github.v3+json", | |
} | |
url = base_url + file_path | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
print(f"[INFO] '{file_path}' already added.") | |
return | |
payload = {"message": f"Add {file_path}", "content": ""} | |
response = requests.put(url, headers=headers, json=payload) | |
if response.status_code == 201: | |
print(f"[INFO] '{file_path}' added to publish todo") | |
else: | |
print(f"[ERROR] Failed to add '{file_path}'.") | |
print(f"[ERROR] Response: {response.text}") | |
def create_tm(align_fn: Path, text_pair: Dict[str, str]): | |
align_fn = Path(align_fn) | |
text_id = text_pair["text_id"] | |
with tempfile.TemporaryDirectory() as tmp_dir: | |
output_dir = Path(tmp_dir) | |
repo_name = f"TM{text_id}" | |
tm_path = output_dir / repo_name | |
tm_path.mkdir(exist_ok=True, parents=True) | |
repo_path = convert_raw_align_to_tm(align_fn, tm_path) | |
repo_path = add_input_in_readme(text_pair, tm_path) | |
repo_url = create_github_repo(repo_path, repo_name) | |
logging.info(f"TM repo created: {repo_url}") | |
add_to_publish_todo_repo(GITHUB_ORG, MAI_TM_PUBLISH_TODO_REPO, repo_name, GITHUB_ACCESS_TOKEN) | |
return repo_url | |
if __name__ == "__main__": | |
align_fn = Path(sys.argv[1]) | |
create_tm(align_fn) | |