import os import tempfile # 设置 Hugging Face 缓存目录 def setup_hf_cache(): """设置 Hugging Face 缓存目录,在 Hugging Face Spaces 中使用临时目录""" if os.environ.get('SPACE_ID') or os.environ.get('HF_SPACE_ID'): # 在 Hugging Face Spaces 中使用临时目录作为缓存 cache_dir = tempfile.mkdtemp() os.environ['HF_HOME'] = cache_dir os.environ['HF_HUB_CACHE'] = os.path.join(cache_dir, 'hub') print(f"Using Hugging Face cache directory: {cache_dir}") return cache_dir else: # 本地环境使用默认缓存目录 return None # 全局缓存,保证多个调用共享同一组路径 _DATA_PATHS = None def get_data_paths(): """返回统一的数据路径字典。 在 Hugging Face Space 中只生成一次临时目录并缓存到 _DATA_PATHS, 避免多次调用导致路径不一致。""" global _DATA_PATHS if _DATA_PATHS is not None: return _DATA_PATHS if os.environ.get('SPACE_ID') or os.environ.get('HF_SPACE_ID'): # 创建一次临时目录并缓存 temp_dir = tempfile.mkdtemp() print(f"Using shared temp data directory: {temp_dir}") _DATA_PATHS = { 'DATA_PATH': os.path.join(temp_dir, 'pdf/'), 'TXT_PATH': os.path.join(temp_dir, 'txt/'), 'TSV_PATH': os.path.join(temp_dir, 'tsv/'), 'MD_PATH': os.path.join(temp_dir, 'md/'), 'INFO_PATH': os.path.join(temp_dir, 'info/'), 'IMG_PATH': os.path.join(temp_dir, 'img/'), 'RESULTS_PATH': os.path.join(temp_dir, 'results/') } else: _DATA_PATHS = { 'DATA_PATH': './src/static/data/pdf/', 'TXT_PATH': './src/static/data/txt/', 'TSV_PATH': './src/static/data/tsv/', 'MD_PATH': './src/static/data/md/', 'INFO_PATH': './src/static/data/info/', 'IMG_PATH': './src/static/img/', 'RESULTS_PATH': './src/static/data/results/' } return _DATA_PATHS # 全局路径管理函数 def get_path(path_type, survey_id=None, filename=None): """ 获取动态路径 path_type: 'pdf', 'txt', 'tsv', 'md', 'info', 'img', 'results' survey_id: 可选的调查ID filename: 可选的文件名 """ paths_config = get_data_paths() if path_type == 'pdf': base_path = paths_config['DATA_PATH'] elif path_type == 'txt': base_path = paths_config['TXT_PATH'] elif path_type == 'tsv': base_path = paths_config['TSV_PATH'] elif path_type == 'md': base_path = paths_config['MD_PATH'] elif path_type == 'info': base_path = paths_config['INFO_PATH'] elif path_type == 'img': base_path = paths_config['IMG_PATH'] elif path_type == 'results': base_path = paths_config['RESULTS_PATH'] else: raise ValueError(f"Unknown path type: {path_type}") if survey_id: base_path = os.path.join(base_path, str(survey_id)) if filename: return os.path.join(base_path, filename) return base_path