import os
import random
import base64
import requests
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException, TimeoutException
from PIL import Image
from io import BytesIO
from datetime import datetime
import gradio as gr
from typing import Tuple
import time
from pathlib import Path # 추가
from datetime import datetime, timedelta
from huggingface_hub import InferenceClient
from dotenv import load_dotenv
# .env 파일에서 환경 변수 로드
load_dotenv()
# HuggingFace 인퍼런스 클라이언트 설정
hf_client = InferenceClient(
"CohereForAI/c4ai-command-r-plus-08-2024",
token=os.getenv("HF_TOKEN")
)
# 스크린샷 캐시 디렉토리 설정
CACHE_DIR = Path("screenshot_cache")
CACHE_DIR.mkdir(exist_ok=True)
# 전역 변수로 스크린샷 캐시 선언
SCREENSHOT_CACHE = {}
def get_cached_screenshot(url: str) -> str:
"""캐시된 스크린샷 가져오기 또는 새로 생성"""
try:
# URL을 안전한 파일명으로 변환
safe_filename = base64.urlsafe_b64encode(url.encode()).decode()
cache_file = CACHE_DIR / f"{safe_filename[:200]}.jpg" # PNG 대신 JPG 사용
if cache_file.exists():
try:
with Image.open(cache_file) as img:
buffered = BytesIO()
img.save(buffered, format="JPEG", quality=85, optimize=True)
return base64.b64encode(buffered.getvalue()).decode()
except Exception as e:
print(f"Cache read error for {url}: {e}")
if cache_file.exists():
cache_file.unlink()
return take_screenshot(url)
except Exception as e:
print(f"Screenshot cache error for {url}: {e}")
return ""
def take_screenshot(url: str) -> str:
"""웹사이트 스크린샷 촬영"""
if not url.startswith('http'):
url = f"https://{url}"
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--window-size=1080,720')
driver = None
try:
driver = webdriver.Chrome(options=options)
driver.get(url)
# 페이지 로딩 대기
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# 추가 대기 시간
time.sleep(3)
# 스크린샷 촬영 및 최적화
screenshot = driver.get_screenshot_as_png()
img = Image.open(BytesIO(screenshot))
# 이미지 크기 최적화
max_size = (800, 600)
img.thumbnail(max_size, Image.Resampling.LANCZOS)
# JPEG로 변환 및 최적화
if img.mode in ('RGBA', 'LA'):
background = Image.new('RGB', img.size, (255, 255, 255))
background.paste(img, mask=img.split()[-1])
img = background
# 캐시 저장
safe_filename = base64.urlsafe_b64encode(url.encode()).decode()
cache_file = CACHE_DIR / f"{safe_filename[:200]}.jpg"
img.save(cache_file, format="JPEG", quality=85, optimize=True)
# 반환용 이미지 생성
buffered = BytesIO()
img.save(buffered, format="JPEG", quality=85, optimize=True)
return base64.b64encode(buffered.getvalue()).decode()
except Exception as e:
print(f"Screenshot error for {url}: {e}")
return ""
finally:
if driver:
driver.quit()
def cleanup_cache():
"""캐시 정리"""
try:
current_time = time.time()
for cache_file in CACHE_DIR.glob("*.jpg"):
try:
# 24시간 이상 된 파일 또는 0바이트 파일 삭제
if (current_time - cache_file.stat().st_mtime > 86400) or cache_file.stat().st_size == 0:
cache_file.unlink()
except Exception as e:
print(f"Error cleaning cache file {cache_file}: {e}")
except Exception as e:
print(f"Cache cleanup error: {e}")
# 앱 시작 시 캐시 정리
cleanup_cache()
def calculate_rising_rate(created_date: str, rank: int) -> int:
"""AI Rising Rate 계산"""
# 생성일 기준 점수 계산
created = datetime.strptime(created_date.split('T')[0], '%Y-%m-%d')
today = datetime.now()
days_diff = (today - created).days
date_score = max(0, 300 - days_diff) # 최대 300점
# 순위 기준 점수 계산
rank_score = max(0, 600 - rank) # 최대 300점
# 총점 계산
total_score = date_score + rank_score
# 별 개수 계산 (0~5)
if total_score <= 200:
stars = 1
elif total_score <= 400:
stars = 2
elif total_score <= 600:
stars = 3
elif total_score <= 800:
stars = 4
else:
stars = 5
return stars
def get_popularity_grade(likes: int, stars: int) -> tuple:
"""AI Popularity Score 등급 계산"""
# 기본 점수 (likes)
base_score = min(likes, 10000) # 최대 10000점
# 별점 추가 점수 (별 하나당 500점)
star_score = stars * 1000
# 총점
total_score = base_score + star_score
# 등급 테이블 (18단계)
grades = [
(14500, "AAA+"), (14000, "AAA"), (13500, "AAA-"),
(13000, "AA+"), (12500, "AA"), (12000, "AA-"),
(11500, "A+"), (11000, "A"), (10000, "A-"),
(9000, "BBB+"), (8000, "BBB"), (7000, "BBB-"),
(6000, "BB+"), (5000, "BB"), (4000, "BB-"),
(3000, "B+"), (2000, "B"), (1000, "B-")
]
for threshold, grade in grades:
if total_score >= threshold:
return grade, total_score
return "B-", total_score
# get_card 함수 내의 hardware_info 부분을 다음으로 교체:
def get_rating_info(item: dict, index: int) -> str:
"""평가 정보 HTML 생성"""
created = item.get('createdAt', '').split('T')[0]
likes = int(str(item.get('likes', '0')).replace(',', ''))
# AI Rising Rate 계산
stars = calculate_rising_rate(created, index + 1)
star_html = "★" * stars + "☆" * (5 - stars) # 채워진 별과 빈 별 조합
# AI Popularity Score 계산
grade, score = get_popularity_grade(likes, stars)
# 등급별 색상 설정
grade_colors = {
'AAA': '#FFD700', 'AA': '#FFA500', 'A': '#FF4500',
'BBB': '#4169E1', 'BB': '#1E90FF', 'B': '#00BFFF'
}
grade_base = grade.rstrip('+-')
grade_color = grade_colors.get(grade_base, '#666666')
return f"""
AI Rising Rate:
{star_html}
AI Popularity Score:
{grade} ({score:,})
"""
def get_hardware_info(item: dict) -> tuple:
"""하드웨어 정보 추출"""
try:
# runtime 정보 확인
runtime = item.get('runtime', {})
# CPU 정보 처리
cpu_info = runtime.get('cpu', 'Standard')
# GPU 정보 처리
gpu_info = "None"
if runtime.get('accelerator') == "gpu":
gpu_type = runtime.get('gpu', {}).get('name', '')
gpu_memory = runtime.get('gpu', {}).get('memory', '')
if gpu_type:
gpu_info = f"{gpu_type}"
if gpu_memory:
gpu_info += f" ({gpu_memory}GB)"
# spaces decorator 확인
if '@spaces.GPU' in str(item.get('sdk_version', '')):
if gpu_info == "None":
gpu_info = "GPU Enabled"
# SDK 정보 처리
sdk = item.get('sdk', 'N/A')
print(f"Debug - Runtime Info: {runtime}") # 디버그 출력
print(f"Debug - GPU Info: {gpu_info}") # 디버그 출력
return cpu_info, gpu_info, sdk
except Exception as e:
print(f"Error parsing hardware info: {str(e)}")
return 'Standard', 'None', 'N/A'
def get_card(item: dict, index: int, card_type: str = "space") -> str:
"""통합 카드 HTML 생성"""
item_id = item.get('id', '')
author, title = item_id.split('/', 1)
likes = format(item.get('likes', 0), ',')
created = item.get('createdAt', '').split('T')[0]
# short_description 가져오기
short_description = item.get('cardData', {}).get('short_description', '')
# URL 정의
if card_type == "space":
url = f"https://huggingface.co/spaces/{item_id}"
elif card_type == "model":
url = f"https://huggingface.co/{item_id}"
else: # dataset
url = f"https://huggingface.co/datasets/{item_id}"
# 메타데이터 처리
tags = item.get('tags', [])
pipeline_tag = item.get('pipeline_tag', '')
license = item.get('license', '')
sdk = item.get('sdk', 'N/A')
# AI Rating 정보 가져오기
rating_info = get_rating_info(item, index)
# 카드 타입별 그라데이션 설정
if card_type == "space":
gradient_colors = """
rgba(255, 182, 193, 0.7), /* 파스텔 핑크 */
rgba(173, 216, 230, 0.7), /* 파스텔 블루 */
rgba(255, 218, 185, 0.7) /* 파스텔 피치 */
"""
bg_content = f"""
background-image: url(data:image/png;base64,{get_cached_screenshot(url) if get_cached_screenshot(url) else ''});
background-size: cover;
background-position: center;
"""
type_icon = "🎯"
type_label = "SPACE"
elif card_type == "model":
gradient_colors = """
rgba(110, 142, 251, 0.7), /* 모델 블루 */
rgba(130, 158, 251, 0.7),
rgba(150, 174, 251, 0.7)
"""
bg_content = f"""
background: linear-gradient(135deg, #6e8efb, #4a6cf7);
padding: 15px;
"""
type_icon = "🤖"
type_label = "MODEL"
else: # dataset
gradient_colors = """
rgba(255, 107, 107, 0.7), /* 데이터셋 레드 */
rgba(255, 127, 127, 0.7),
rgba(255, 147, 147, 0.7)
"""
bg_content = f"""
background: linear-gradient(135deg, #ff6b6b, #ff8787);
padding: 15px;
"""
type_icon = "📊"
type_label = "DATASET"
content_bg = f"""
background: linear-gradient(135deg, {gradient_colors});
backdrop-filter: blur(10px);
"""
# 태그 표시 (models와 datasets용)
tags_html = ""
if card_type != "space":
tags_html = f"""
{' '.join([f'''
#{tag}
''' for tag in tags[:5]])}
"""
# 카드 HTML 반환
return f"""
#{index + 1}
{type_icon} {type_label}
{tags_html}
{title}
{f'''
{short_description}
''' if card_type == "space" and short_description else ''}
👤 {author}
❤️ {likes}
📅 {created}
{rating_info}
"""
def get_trending_spaces(search_query="", sort_by="rank", progress=gr.Progress()) -> Tuple[str, str]:
"""트렌딩 스페이스 가져오기"""
url = "https://huggingface.co/api/spaces"
try:
progress(0, desc="Fetching spaces data...")
params = {
'full': 'true',
'limit': 24
}
response = requests.get(url, params=params)
response.raise_for_status()
spaces = response.json()
# 검색어로 필터링
if search_query:
spaces = [space for space in spaces if search_query.lower() in
(space.get('id', '') + ' ' + space.get('title', '')).lower()]
# 정렬
sort_by = sort_by.lower()
if sort_by == "rising_rate":
spaces.sort(key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True)
elif sort_by == "popularity":
spaces.sort(key=lambda x: get_popularity_grade(
int(str(x.get('likes', '0')).replace(',', '')),
calculate_rising_rate(x.get('createdAt', ''), 0))[1],
reverse=True)
progress(0.1, desc="Creating gallery...")
html_content = """
"""
for idx, space in enumerate(spaces):
html_content += get_card(space, idx, "space")
progress((0.1 + 0.9 * idx/len(spaces)), desc=f"Loading space {idx+1}/{len(spaces)}...")
html_content += "
"
progress(1.0, desc="Complete!")
return html_content, f"Found {len(spaces)} spaces"
except Exception as e:
error_html = f'Error: {str(e)}
'
return error_html, f"Error: {str(e)}"
def get_models(search_query="", sort_by="rank", progress=gr.Progress()) -> Tuple[str, str]:
"""인기 모델 가져오기"""
url = "https://huggingface.co/api/models"
try:
progress(0, desc="Fetching models data...")
params = {
'full': 'true',
'limit': 24
}
response = requests.get(url, params=params)
response.raise_for_status()
models = response.json()
# 검색어로 필터링
if search_query:
models = [model for model in models if search_query.lower() in
(model.get('id', '') + ' ' + model.get('title', '')).lower()]
# 정렬
sort_by = sort_by.lower()
if sort_by == "rising_rate":
models.sort(key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True)
elif sort_by == "popularity":
models.sort(key=lambda x: get_popularity_grade(
int(str(x.get('likes', '0')).replace(',', '')),
calculate_rising_rate(x.get('createdAt', ''), 0))[1],
reverse=True)
progress(0.1, desc="Creating gallery...")
html_content = """
"""
for idx, model in enumerate(models):
html_content += get_card(model, idx, "model")
progress((0.1 + 0.9 * idx/len(models)), desc=f"Loading model {idx+1}/{len(models)}...")
html_content += "
"
progress(1.0, desc="Complete!")
return html_content, f"Found {len(models)} models"
except Exception as e:
error_html = f'Error: {str(e)}
'
return error_html, f"Error: {str(e)}"
def get_datasets(search_query="", sort_by="rank", progress=gr.Progress()) -> Tuple[str, str]:
"""인기 데이터셋 가져오기"""
url = "https://huggingface.co/api/datasets"
try:
progress(0, desc="Fetching datasets data...")
params = {
'full': 'true',
'limit': 24
}
response = requests.get(url, params=params)
response.raise_for_status()
datasets = response.json()
# 검색어로 필터링
if search_query:
datasets = [dataset for dataset in datasets if search_query.lower() in
(dataset.get('id', '') + ' ' + dataset.get('title', '')).lower()]
# 정렬
sort_by = sort_by.lower()
if sort_by == "rising_rate":
datasets.sort(key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True)
elif sort_by == "popularity":
datasets.sort(key=lambda x: get_popularity_grade(
int(str(x.get('likes', '0')).replace(',', '')),
calculate_rising_rate(x.get('createdAt', ''), 0))[1],
reverse=True)
progress(0.1, desc="Creating gallery...")
html_content = """
"""
for idx, dataset in enumerate(datasets):
html_content += get_card(dataset, idx, "dataset")
progress((0.1 + 0.9 * idx/len(datasets)), desc=f"Loading dataset {idx+1}/{len(datasets)}...")
html_content += "
"
progress(1.0, desc="Complete!")
return html_content, f"Found {len(datasets)} datasets"
except Exception as e:
error_html = f'Error: {str(e)}
'
return error_html, f"Error: {str(e)}"
# 정렬 함수 추가
def sort_items(items, sort_by):
if sort_by == "rank":
return items # 이미 순위대로 정렬되어 있음
elif sort_by == "rising_rate":
return sorted(items, key=lambda x: calculate_rising_rate(x.get('createdAt', ''), 0), reverse=True)
elif sort_by == "popularity":
return sorted(items, key=lambda x: get_popularity_grade(int(str(x.get('likes', '0')).replace(',', '')),
calculate_rising_rate(x.get('createdAt', ''), 0))[1], reverse=True)
return items
# API 호출 함수 수정
def fetch_items(item_type, search_query="", sort_by="rank", limit=1000):
"""아이템 가져오기 (spaces/models/datasets)"""
base_url = f"https://huggingface.co/api/{item_type}"
params = {
'full': 'true',
'limit': limit,
'search': search_query
}
try:
response = requests.get(base_url, params=params)
response.raise_for_status()
items = response.json()
# 검색어로 필터링
if search_query:
items = [item for item in items if search_query.lower() in
(item.get('id', '') + item.get('title', '')).lower()]
# 정렬
items = sort_items(items, sort_by)
return items[:300] # 상위 300개만 반환
except Exception as e:
print(f"Error fetching items: {e}")
return []
# 스페이스 분석 함수 추가
def analyze_space(space_info: dict) -> str:
"""스페이스 코드 분석 및 요약"""
try:
space_id = space_info.get('id', '')
url = f"https://huggingface.co/spaces/{space_id}"
# LLM에 전달할 프롬프트 구성
prompt = f"""
다음 HuggingFace 스페이스({url})를 분석하여 각 항목을 한 줄로 요약해주세요:
1. 개요:
2. 주요 기능 요약:
3. 특징 및 장점:
4. 주요 사용 대상:
5. 사용 방법:
6. 유사 서비스와의 차별점:
각 항목은 명확하고 간단하게 한 줄로 작성해주세요.
"""
# LLM 추론 실행
messages = [
{"role": "system", "content": "당신은 HuggingFace 스페이스 분석 전문가입니다."},
{"role": "user", "content": prompt}
]
response = hf_client.chat_completion(
messages,
max_tokens=1024,
temperature=0.7,
top_p=0.9
)
analysis = response.choices[0].message.content
return f"""
#{space_info.get('rank', '0')} {space_id}
{analysis}
"""
except Exception as e:
return f"분석 오류: {str(e)}
"
def analyze_top_spaces(progress=gr.Progress()) -> Tuple[str, str]:
"""상위 24개 스페이스 분석"""
try:
progress(0, desc="Fetching spaces data...")
spaces = fetch_items("spaces", limit=24)
html_content = """
🔍 Top 24 Spaces Analysis
"""
for idx, space in enumerate(spaces):
progress((idx + 1) / len(spaces), desc=f"Analyzing space {idx+1}/{len(spaces)}...")
space['rank'] = idx + 1
html_content += analyze_space(space)
html_content += ""
return html_content, f"Analyzed {len(spaces)} spaces"
except Exception as e:
error_html = f'Error: {str(e)}
'
return error_html, f"Error: {str(e)}"
def create_interface():
with gr.Blocks(title="HuggingFace Trending Board", css="""
.search-sort-container {
background: linear-gradient(135deg, rgba(255,255,255,0.95), rgba(240,240,255,0.95));
border-radius: 15px;
padding: 20px;
margin: 10px 0;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
overflow: visible;
}
.search-box {
border: 2px solid #e1e1e1;
border-radius: 10px;
padding: 12px;
transition: all 0.3s ease;
background: linear-gradient(135deg, #ffffff, #f8f9ff);
width: 100%;
}
.search-box:focus {
border-color: #7b61ff;
box-shadow: 0 0 0 2px rgba(123,97,255,0.2);
background: linear-gradient(135deg, #ffffff, #f0f3ff);
}
.refresh-btn {
background: linear-gradient(135deg, #7b61ff, #6366f1);
color: white;
border: none;
padding: 10px 20px;
border-radius: 10px;
cursor: pointer;
transition: all 0.3s ease;
width: 120px;
height: 80px !important;
display: flex;
align-items: center;
justify-content: center;
margin-left: auto;
font-size: 1.2em !important;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
}
.refresh-btn:hover {
transform: translateY(-2px);
box-shadow: 0 6px 12px rgba(0,0,0,0.2);
background: linear-gradient(135deg, #8b71ff, #7376f1);
}
""") as interface:
gr.Markdown("""
# 🤗 HuggingFace Trending TOP 24 Board
Explore, search, and sort through the Shows Top 24 Trending spaces with AI Ratings
""")
with gr.Tabs() as tabs:
# Spaces 탭
with gr.Tab("🎯 Trending Spaces"):
with gr.Row(elem_classes="search-sort-container"):
with gr.Column(scale=2):
spaces_search = gr.Textbox(
label="🔍 Search Spaces",
placeholder="Enter keywords to search...",
elem_classes="search-box"
)
with gr.Column(scale=2):
spaces_sort = gr.Radio(
choices=["rank", "rising_rate", "popularity"],
value="rank",
label="Sort by",
interactive=True
)
with gr.Column(scale=1):
spaces_refresh_btn = gr.Button(
"🔄 Refresh",
variant="primary",
elem_classes="refresh-btn"
)
spaces_gallery = gr.HTML()
spaces_status = gr.Markdown("Loading...")
# Models 탭
with gr.Tab("🤖 Trending Models"):
with gr.Row(elem_classes="search-sort-container"):
with gr.Column(scale=2):
models_search = gr.Textbox(
label="🔍 Search Models",
placeholder="Enter keywords to search...",
elem_classes="search-box"
)
with gr.Column(scale=2):
models_sort = gr.Radio(
choices=["rank", "rising_rate", "popularity"],
value="rank",
label="Sort by",
interactive=True
)
with gr.Column(scale=1):
models_refresh_btn = gr.Button(
"🔄 Refresh",
variant="primary",
elem_classes="refresh-btn"
)
models_gallery = gr.HTML()
models_status = gr.Markdown("Loading...")
# Datasets 탭
with gr.Tab("📊 Trending Datasets"):
with gr.Row(elem_classes="search-sort-container"):
with gr.Column(scale=2):
datasets_search = gr.Textbox(
label="🔍 Search Datasets",
placeholder="Enter keywords to search...",
elem_classes="search-box"
)
with gr.Column(scale=2):
datasets_sort = gr.Radio(
choices=["rank", "rising_rate", "popularity"],
value="rank",
label="Sort by",
interactive=True
)
with gr.Column(scale=1):
datasets_refresh_btn = gr.Button(
"🔄 Refresh",
variant="primary",
elem_classes="refresh-btn"
)
datasets_gallery = gr.HTML()
datasets_status = gr.Markdown("Loading...")
# 스페이스 분석 탭 추가
with gr.Tab("🔍 Space Analysis"):
with gr.Row(elem_classes="search-sort-container"):
analysis_refresh_btn = gr.Button(
"🔄 Analyze Spaces",
variant="primary",
elem_classes="refresh-btn"
)
analysis_gallery = gr.HTML()
analysis_status = gr.Markdown("Click 'Analyze Spaces' to start analysis...")
# 이벤트 핸들러 추가
analysis_refresh_btn.click(
fn=analyze_top_spaces,
outputs=[analysis_gallery, analysis_status]
)
# 초기 데이터 로드에 분석 탭 추가
interface.load(
fn=analyze_top_spaces,
outputs=[analysis_gallery, analysis_status]
)
# Event handlers
spaces_refresh_btn.click(
fn=get_trending_spaces,
inputs=[spaces_search, spaces_sort],
outputs=[spaces_gallery, spaces_status]
)
models_refresh_btn.click(
fn=get_models,
inputs=[models_search, models_sort],
outputs=[models_gallery, models_status]
)
datasets_refresh_btn.click(
fn=get_datasets,
inputs=[datasets_search, datasets_sort],
outputs=[datasets_gallery, datasets_status]
)
# 검색어 변경 시 자동 새로고침
spaces_search.change(
fn=get_trending_spaces,
inputs=[spaces_search, spaces_sort],
outputs=[spaces_gallery, spaces_status]
)
models_search.change(
fn=get_models,
inputs=[models_search, models_sort],
outputs=[models_gallery, models_status]
)
datasets_search.change(
fn=get_datasets,
inputs=[datasets_search, datasets_sort],
outputs=[datasets_gallery, datasets_status]
)
# 정렬 방식 변경 시 자동 새로고침
spaces_sort.change(
fn=get_trending_spaces,
inputs=[spaces_search, spaces_sort],
outputs=[spaces_gallery, spaces_status]
)
models_sort.change(
fn=get_models,
inputs=[models_search, models_sort],
outputs=[models_gallery, models_status]
)
datasets_sort.change(
fn=get_datasets,
inputs=[datasets_search, datasets_sort],
outputs=[datasets_gallery, datasets_status]
)
# 초기 데이터 로드
interface.load(
fn=get_trending_spaces,
inputs=[spaces_search, spaces_sort],
outputs=[spaces_gallery, spaces_status]
)
interface.load(
fn=get_models,
inputs=[models_search, models_sort],
outputs=[models_gallery, models_status]
)
interface.load(
fn=get_datasets,
inputs=[datasets_search, datasets_sort],
outputs=[datasets_gallery, datasets_status]
)
return interface
if __name__ == "__main__":
try:
# 캐시 디렉토리 생성
CACHE_DIR.mkdir(exist_ok=True)
# 캐시 정리
cleanup_cache()
# 인터페이스 생성 및 실행
demo = create_interface()
demo.launch(
share=True,
inbrowser=True,
show_api=False,
max_threads=4 # 스레드 수 제한
)
except Exception as e:
print(f"Application error: {e}")