harheem's picture
Upload project files
3bfe3dd verified
raw
history blame
2.19 kB
import os
from pathlib import Path
import requests
from .model import Languages, Summary, TranslationDoc
URL = "https://api.github.com/repos/huggingface/transformers/git/trees/main?recursive=1"
def get_github_repo_files():
"""
Get github repo files
"""
response = requests.get(URL)
data = response.json()
all_items = data.get("tree", [])
file_paths = [
item["path"]
for item in all_items
if item["type"] == "blob" and (item["path"].startswith("docs"))
]
return file_paths
def retrieve(summary: Summary, table_size: int = 10) -> tuple[str, list[str]]:
"""
Retrieve missing docs
"""
report = f"""
| Item | Count | Percentage |
|------|-------|------------|
| 📂 HuggingFaces docs | {summary.files_analyzed} | - |
| 🪹 Missing translations | {summary.files_missing_translation} | {summary.percentage_missing_translation:.2f}% |
"""
print(report)
first_missing_docs = list()
for file in summary.first_missing_translation_files(table_size):
first_missing_docs.append(file.original_file)
print(first_missing_docs)
return report, first_missing_docs
def report(target_lang: str, top_k: int = 1) -> tuple[str, list[str]]:
"""
Generate a report for the translated docs
"""
docs_file = get_github_repo_files()
base_docs_path = Path("docs/source")
en_docs_path = Path("docs/source/en")
lang = Languages[target_lang]
summary = Summary(lang=lang.value)
for file in docs_file:
if file.endswith(".md"):
try:
file_relative_path = Path(file).relative_to(en_docs_path)
except ValueError:
continue
translated_path = os.path.join(
base_docs_path, lang.value, file_relative_path
)
translation_exists = translated_path in docs_file
doc = TranslationDoc(
translation_lang=lang.value,
original_file=file,
translation_file=translated_path,
translation_exists=translation_exists,
)
summary.append_file(doc)
return retrieve(summary, top_k)