|
import os |
|
from pathlib import Path |
|
|
|
import requests |
|
|
|
from .model import Languages, Summary, TranslationDoc |
|
|
|
URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1" |
|
|
|
|
|
def get_github_repo_files(): |
|
""" |
|
Get github repo files |
|
""" |
|
response = requests.get(URL) |
|
|
|
data = response.json() |
|
all_items = data.get("tree", []) |
|
|
|
file_paths = [ |
|
item["path"] |
|
for item in all_items |
|
if item["type"] == "blob" and (item["path"].startswith("docs")) |
|
] |
|
return file_paths |
|
|
|
|
|
def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]: |
|
""" |
|
Retrieve missing docs |
|
""" |
|
|
|
report = f""" |
|
| Item | Count | Percentage | |
|
|------|-------|------------| |
|
| 📂 HuggingFaces docs | {summary.files_analyzed} | - | |
|
| 🪹 Missing translations | {summary.files_missing_translation} | {summary.percentage_missing_translation:.2f}% | |
|
""" |
|
print(report) |
|
first_missing_docs = list() |
|
for file in summary.first_missing_translation_files(table_size): |
|
first_missing_docs.append(file.original_file) |
|
|
|
print(first_missing_docs) |
|
return report, first_missing_docs |
|
|
|
|
|
def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]: |
|
""" |
|
Generate a report for the translated docs |
|
""" |
|
docs_file = get_github_repo_files() |
|
|
|
base_docs_path = Path("docs/source") |
|
en_docs_path = Path("docs/source/en") |
|
|
|
lang = Languages[target_lang] |
|
summary = Summary(lang=lang.value) |
|
|
|
for file in docs_file: |
|
if file.endswith(".md"): |
|
try: |
|
file_relative_path = Path(file).relative_to(en_docs_path) |
|
except ValueError: |
|
continue |
|
|
|
translated_path = os.path.join( |
|
base_docs_path, lang.value, file_relative_path |
|
) |
|
translation_exists = translated_path in docs_file |
|
|
|
doc = TranslationDoc( |
|
translation_lang=lang.value, |
|
original_file=file, |
|
translation_file=translated_path, |
|
translation_exists=translation_exists, |
|
) |
|
summary.append_file(doc) |
|
return retrieve(summary, top_k) |
|
|