|
import os |
|
from huggingface_hub import HfApi |
|
import time |
|
from concurrent.futures import ThreadPoolExecutor |
|
from tqdm import tqdm |
|
|
|
def upload_single_file(args): |
|
file_path, path_in_repo, repo_id, max_retries = args |
|
api = HfApi() |
|
|
|
for attempt in range(max_retries): |
|
try: |
|
api.upload_file( |
|
path_or_fileobj=file_path, |
|
path_in_repo=path_in_repo, |
|
repo_id=repo_id, |
|
repo_type="model" |
|
) |
|
return True, path_in_repo, None |
|
except Exception as e: |
|
if attempt < max_retries - 1: |
|
time.sleep(10 * (attempt + 1)) |
|
else: |
|
return False, path_in_repo, str(e) |
|
|
|
def upload_with_retry(folder_path, repo_id, max_retries=10, max_workers=4): |
|
|
|
files = [] |
|
for root, _, filenames in os.walk(folder_path): |
|
for filename in filenames: |
|
if not any(pattern in filename for pattern in [".git"]): |
|
full_path = os.path.join(root, filename) |
|
relative_path = os.path.relpath(full_path, folder_path) |
|
files.append((full_path, relative_path)) |
|
|
|
print(f"Found {len(files)} files to upload") |
|
|
|
|
|
upload_args = [ |
|
(file_path, path_in_repo, repo_id, max_retries) |
|
for file_path, path_in_repo in files |
|
] |
|
|
|
|
|
failed_files = [] |
|
with ThreadPoolExecutor(max_workers=max_workers) as executor: |
|
futures = list(tqdm( |
|
executor.map(upload_single_file, upload_args), |
|
total=len(files), |
|
desc="Uploading files" |
|
)) |
|
|
|
|
|
for success, file_name, error in futures: |
|
if not success: |
|
failed_files.append((file_name, error)) |
|
|
|
|
|
print(f"\nUpload completed!") |
|
print(f"Successfully uploaded: {len(files) - len(failed_files)} files") |
|
if failed_files: |
|
print("\nFailed uploads:") |
|
for file_name, error in failed_files: |
|
print(f"- {file_name}: {error}") |
|
|
|
|
|
upload_with_retry( |
|
".", |
|
"Neph0s/CoSER-Llama-3.1-70B", |
|
max_retries=10, |
|
max_workers=10 |
|
) |