Spaces:
Sleeping
Sleeping
# msdl/utils.py | |
import os | |
import re | |
import shutil | |
import sys | |
import yaml | |
from functools import lru_cache | |
from pathlib import Path | |
from msdl.config import ( | |
BACKEND_DOCKERFILE_DIR, | |
CLOUD_LLM_DOCKERFILE, | |
FRONTEND_DOCKERFILE_DIR, | |
LOCAL_LLM_DOCKERFILE, | |
PACKAGE_DIR, | |
REACT_DOCKERFILE, | |
TEMP_DIR, | |
ENV_FILE_PATH, | |
) | |
from msdl.i18n import t | |
def get_env_variable(var_name, default=None): | |
if ENV_FILE_PATH.exists(): | |
with ENV_FILE_PATH.open("r") as env_file: | |
for line in env_file: | |
if line.startswith(f"{var_name}="): | |
return line.strip().split("=", 1)[1] | |
return os.getenv(var_name, default) | |
def get_existing_api_key(env_var_name): | |
env_vars = read_env_file() | |
return env_vars.get(env_var_name) | |
def read_env_file(): | |
env_vars = {} | |
if ENV_FILE_PATH.exists(): | |
with ENV_FILE_PATH.open("r") as env_file: | |
for line in env_file: | |
if "=" in line and not line.strip().startswith("#"): | |
key, value = line.strip().split("=", 1) | |
env_vars[key] = value.strip('"').strip("'") | |
return env_vars | |
def clean_api_key(api_key): | |
cleaned_key = api_key.strip() | |
cleaned_key = re.sub(r"\s+", "", cleaned_key) | |
return cleaned_key | |
def validate_api_key(api_key, key_type, t): | |
basic_pattern = r"^sk-[A-Za-z0-9]+$" | |
web_search_pattern = r"^[A-Za-z0-9_\-\.]+$" | |
tencent_pattern = r"^[A-Za-z0-9]+$" | |
validation_rules = { | |
# Model API Keys | |
"SILICON_API_KEY": basic_pattern, | |
"OPENAI_API_KEY": basic_pattern, | |
"QWEN_API_KEY": basic_pattern, | |
# Search Engine API Keys | |
"BING_SEARCH_API_KEY": web_search_pattern, | |
"BRAVE_SEARCH_API_KEY": web_search_pattern, | |
"GOOGLE_SERPER_API_KEY": web_search_pattern, | |
"TENCENT_SEARCH_SECRET_ID": tencent_pattern, | |
"TENCENT_SEARCH_SECRET_KEY": tencent_pattern, | |
# Legacy support | |
"WEB_SEARCH_API_KEY": web_search_pattern, | |
} | |
if key_type not in validation_rules: | |
raise ValueError(t("UNKNOWN_API_KEY_TYPE", KEY_TYPE=key_type)) | |
pattern = validation_rules[key_type] | |
return re.match(pattern, api_key) is not None | |
def save_api_key_to_env(key_type, api_key, t): | |
"""Save API key to .env file | |
Args: | |
key_type: Environment variable name or model format | |
api_key: API key value | |
t: Translation function | |
""" | |
# Convert model format to env var name if needed | |
env_var_name = { | |
"internlm_silicon": "SILICON_API_KEY", | |
"gpt4": "OPENAI_API_KEY", | |
"qwen": "QWEN_API_KEY", | |
}.get(key_type, key_type) # If not a model format, use key_type directly | |
if not validate_api_key(api_key, env_var_name, t): | |
raise ValueError(t("INVALID_API_KEY", KEY_TYPE=env_var_name)) | |
env_vars = read_env_file() | |
env_vars[env_var_name] = api_key | |
with ENV_FILE_PATH.open("w") as env_file: | |
for key, value in env_vars.items(): | |
env_file.write(f"{key}={value}\n") | |
print(t("API_KEY_SAVED", ENV_VAR_NAME=env_var_name)) | |
def ensure_directory(path): | |
path = Path(path) | |
if not path.exists(): | |
path.mkdir(parents=True, exist_ok=True) | |
print(t("DIR_CREATED", dir=path)) | |
def copy_templates_to_temp(template_files): | |
template_dir = PACKAGE_DIR / "templates" | |
ensure_directory(TEMP_DIR) | |
for filename in template_files: | |
src = template_dir / filename | |
dst = TEMP_DIR / filename | |
if src.exists(): | |
shutil.copy2(src, dst) | |
print(t("FILE_COPIED", file=filename)) | |
else: | |
print(t("FILE_NOT_FOUND", file=filename)) | |
sys.exit(1) | |
def modify_docker_compose(model_type, backend_language, model_format, search_engine): | |
"""Modify docker-compose.yaml based on user choices""" | |
docker_compose_path = os.path.join(TEMP_DIR, "docker-compose.yaml") | |
with open(docker_compose_path, "r") as file: | |
compose_data = yaml.safe_load(file) | |
# Set the name of the project | |
compose_data["name"] = "mindsearch" | |
# Configure backend service | |
backend_service = compose_data["services"]["backend"] | |
# Set environment variables | |
if "environment" not in backend_service: | |
backend_service["environment"] = [] | |
# Add or update environment variables | |
env_vars = { | |
"LANG": backend_language, | |
"MODEL_FORMAT": model_format, | |
"SEARCH_ENGINE": search_engine | |
} | |
# Ensure .env file is included | |
if "env_file" not in backend_service: | |
backend_service["env_file"] = [".env"] | |
elif ".env" not in backend_service["env_file"]: | |
backend_service["env_file"].append(".env") | |
# Set command with all parameters | |
command = f"python -m mindsearch.app --lang {backend_language} --model_format {model_format} --search_engine {search_engine}" | |
backend_service["command"] = command | |
# Convert environment variables to docker-compose format | |
backend_service["environment"] = [ | |
f"{key}={value}" for key, value in env_vars.items() | |
] | |
# Configure based on model type | |
if model_type == CLOUD_LLM_DOCKERFILE: | |
if "deploy" in backend_service: | |
del backend_service["deploy"] | |
# Remove volumes for cloud deployment | |
if "volumes" in backend_service: | |
del backend_service["volumes"] | |
elif model_type == LOCAL_LLM_DOCKERFILE: | |
# Add GPU configuration for local deployment | |
if "deploy" not in backend_service: | |
backend_service["deploy"] = { | |
"resources": { | |
"reservations": { | |
"devices": [ | |
{"driver": "nvidia", "count": 1, "capabilities": ["gpu"]} | |
] | |
} | |
} | |
} | |
# Add volume for cache in local deployment | |
backend_service["volumes"] = ["/root/.cache:/root/.cache"] | |
else: | |
raise ValueError(t("UNKNOWN_DOCKERFILE", dockerfile=model_type)) | |
# Save the modified docker-compose.yaml | |
with open(docker_compose_path, "w") as file: | |
yaml.dump(compose_data, file) | |
print( | |
t( | |
"docker_compose_updated", | |
mode=(t("CLOUD") if model_type == CLOUD_LLM_DOCKERFILE else t("LOCAL")), | |
format=model_format, | |
) | |
) | |
def get_model_formats(model_type): | |
if model_type == CLOUD_LLM_DOCKERFILE: | |
return ["internlm_silicon", "qwen", "gpt4"] | |
elif model_type == LOCAL_LLM_DOCKERFILE: | |
return ["internlm_server", "internlm_client", "internlm_hf"] | |
else: | |
raise ValueError(t("UNKNOWN_MODEL_TYPE", model_type=model_type)) | |
def copy_backend_dockerfile(choice): | |
"""Copy backend Dockerfile to temp directory based on user choice""" | |
source_file = Path(BACKEND_DOCKERFILE_DIR) / choice | |
dest_file = "backend.dockerfile" | |
source_path = PACKAGE_DIR / "templates" / source_file | |
dest_path = TEMP_DIR / dest_file | |
if not source_path.exists(): | |
raise FileNotFoundError(t("FILE_NOT_FOUND", file=source_file)) | |
dest_path.parent.mkdir(parents=True, exist_ok=True) | |
dest_path.write_text(source_path.read_text()) | |
print( | |
t( | |
"BACKEND_DOCKERFILE_COPIED", | |
source_path=str(source_path), | |
dest_path=str(dest_path), | |
)) | |
def copy_frontend_dockerfile(): | |
"""Copy frontend Dockerfile to temp directory""" | |
source_file = Path(FRONTEND_DOCKERFILE_DIR) / REACT_DOCKERFILE | |
dest_file = "frontend.dockerfile" | |
source_path = PACKAGE_DIR / "templates" / source_file | |
dest_path = TEMP_DIR / dest_file | |
if not source_path.exists(): | |
raise FileNotFoundError(t("FILE_NOT_FOUND", file=source_file)) | |
dest_path.parent.mkdir(parents=True, exist_ok=True) | |
dest_path.write_text(source_path.read_text()) | |
print( | |
t( | |
"FRONTEND_DOCKERFILE_COPIED", | |
source_path=str(source_path), | |
dest_path=str(dest_path), | |
)) | |