import gradio as gr import requests from bs4 import BeautifulSoup import json from typing import List, Dict, Any, Optional import re from urllib.parse import urljoin import time import functools import logging from datetime import datetime, timedelta # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Renamed class for brevity to avoid long tool names class HF_API: def __init__(self): self.base_url = "https://huggingface.co" self.docs_url = "https://huggingface.co/docs" self.api_url = "https://huggingface.co/api" self.session = requests.Session() self.session.headers.update({ 'User-Agent': 'HF-Info-Server/1.0 (Educational Purpose)', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1' }) self.cache = {} self.cache_ttl = 3600 # 1 hour cache TTL def _is_cache_valid(self, cache_key: str) -> bool: if cache_key not in self.cache: return False cache_time = self.cache[cache_key].get('timestamp', 0) return time.time() - cache_time < self.cache_ttl def _get_from_cache(self, cache_key: str) -> Optional[str]: if self._is_cache_valid(cache_key): return self.cache[cache_key]['content'] return None def _store_in_cache(self, cache_key: str, content: str): self.cache[cache_key] = { 'content': content, 'timestamp': time.time() } def _fetch_with_retry(self, url: str, max_retries: int = 3) -> Optional[str]: cache_key = f"url_{hash(url)}" cached_content = self._get_from_cache(cache_key) if cached_content: logger.info(f"Cache hit for {url}") return cached_content for attempt in range(max_retries): try: logger.info(f"Fetching {url} (attempt {attempt + 1})") response = self.session.get(url, timeout=20) response.raise_for_status() content = response.text self._store_in_cache(cache_key, content) return content except requests.exceptions.RequestException as e: logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}") if attempt < max_retries - 1: time.sleep(2 ** attempt) else: logger.error(f"All attempts failed for {url}") return None return None def _extract_code_examples(self, soup: BeautifulSoup) -> List[Dict[str, str]]: code_blocks = [] code_elements = soup.find_all(['code', 'pre']) for code_elem in code_elements: lang_class = code_elem.get('class', []) language = 'python' for cls in lang_class: if 'language-' in str(cls): language = str(cls).replace('language-', '') break elif any(lang in str(cls).lower() for lang in ['python', 'bash', 'javascript', 'json']): language = str(cls).lower() break code_text = code_elem.get_text(strip=True) if len(code_text) > 20 and any(keyword in code_text.lower() for keyword in ['import', 'from', 'def', 'class', 'pip install', 'transformers']): code_blocks.append({'code': code_text, 'language': language, 'type': 'usage' if any(word in code_text.lower() for word in ['import', 'load', 'pipeline']) else 'example'}) highlight_blocks = soup.find_all('div', class_=re.compile(r'highlight|code-block|language')) for block in highlight_blocks: code_text = block.get_text(strip=True) if len(code_text) > 20: code_blocks.append({'code': code_text, 'language': 'python', 'type': 'example'}) seen = set() unique_blocks = [] for block in code_blocks: code_hash = hash(block['code'][:100]) if code_hash not in seen: seen.add(code_hash) unique_blocks.append(block) if len(unique_blocks) >= 5: break return unique_blocks def _extract_practical_content(self, soup: BeautifulSoup, topic: str) -> Dict[str, Any]: content = {'overview': '', 'code_examples': [], 'usage_instructions': [], 'parameters': [], 'methods': [], 'installation': '', 'quickstart': ''} main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=re.compile(r'content|docs|prose')) if not main_content: return content overview_sections = main_content.find_all('p', limit=5) overview_texts = [] for p in overview_sections: text = p.get_text(strip=True) if len(text) > 30 and not text.startswith('Table of contents'): overview_texts.append(text) if overview_texts: overview = ' '.join(overview_texts) content['overview'] = overview[:1000] + "..." if len(overview) > 1000 else overview content['code_examples'] = self._extract_code_examples(main_content) install_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4'], string=re.compile(r'install|setup|getting started', re.IGNORECASE)) for heading in install_headings: next_elem = heading.find_next_sibling() install_text = [] while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4'] and len(install_text) < 3: if next_elem.name in ['p', 'pre', 'code']: text = next_elem.get_text(strip=True) if text and len(text) > 10: install_text.append(text) next_elem = next_elem.find_next_sibling() if install_text: content['installation'] = ' '.join(install_text) break usage_headings = main_content.find_all(['h1', 'h2', 'h3', 'h4']) for heading in usage_headings: heading_text = heading.get_text(strip=True).lower() if any(keyword in heading_text for keyword in ['usage', 'example', 'how to', 'quickstart', 'getting started']): next_elem = heading.find_next_sibling() instruction_parts = [] while next_elem and next_elem.name not in ['h1', 'h2', 'h3', 'h4']: if next_elem.name in ['p', 'li', 'div', 'ol', 'ul']: text = next_elem.get_text(strip=True) if text and len(text) > 15: instruction_parts.append(text) next_elem = next_elem.find_next_sibling() if len(instruction_parts) >= 5: break if instruction_parts: content['usage_instructions'].extend(instruction_parts) tables = main_content.find_all('table') for table in tables: headers = [th.get_text(strip=True).lower() for th in table.find_all('th')] if any(keyword in ' '.join(headers) for keyword in ['parameter', 'argument', 'option', 'attribute', 'name', 'type']): rows = table.find_all('tr')[1:] for row in rows[:8]: cells = [td.get_text(strip=True) for td in row.find_all('td')] if len(cells) >= 2: param_info = {'name': cells[0], 'description': cells[1] if len(cells) > 1 else '', 'type': cells[2] if len(cells) > 2 else '', 'default': cells[3] if len(cells) > 3 else ''} content['parameters'].append(param_info) return content def search_documentation(self, query: str, max_results: int = 3) -> str: """ Searches the official Hugging Face documentation for a specific topic and returns a summary. This tool is useful for finding how-to guides, explanations of concepts like 'pipeline' or 'tokenizer', and usage examples. Args: query (str): The topic or keyword to search for in the documentation (e.g., 'fine-tuning', 'peft', 'datasets'). max_results (int): The maximum number of documentation pages to retrieve and summarize. Defaults to 3. """ try: max_results = int(max_results) if isinstance(max_results, str) else max_results max_results = min(max_results, 5) query_lower = query.lower().strip() if not query_lower: return "Please provide a search query." doc_sections = { 'transformers': {'base_url': 'https://huggingface.co/docs/transformers', 'topics': {'pipeline': '/main_classes/pipelines', 'tokenizer': '/main_classes/tokenizer', 'trainer': '/main_classes/trainer', 'model': '/main_classes/model', 'quicktour': '/quicktour', 'installation': '/installation', 'fine-tuning': '/training', 'training': '/training', 'inference': '/main_classes/pipelines', 'preprocessing': '/preprocessing', 'tutorial': '/tutorials', 'configuration': '/main_classes/configuration', 'peft': '/peft', 'lora': '/peft', 'quantization': '/main_classes/quantization', 'generation': '/main_classes/text_generation', 'optimization': '/perf_train_gpu_one', 'deployment': '/deployment', 'custom': '/custom_models'}}, 'datasets': {'base_url': 'https://huggingface.co/docs/datasets', 'topics': {'loading': '/load_hub', 'load': '/load_hub', 'processing': '/process', 'streaming': '/stream', 'audio': '/audio_process', 'image': '/image_process', 'text': '/nlp_process', 'arrow': '/about_arrow', 'cache': '/cache', 'upload': '/upload_dataset', 'custom': '/dataset_script'}}, 'diffusers': {'base_url': 'https://huggingface.co/docs/diffusers', 'topics': {'pipeline': '/using-diffusers/loading', 'stable diffusion': '/using-diffusers/stable_diffusion', 'controlnet': '/using-diffusers/controlnet', 'inpainting': '/using-diffusers/inpaint', 'training': '/training/overview', 'optimization': '/optimization/fp16', 'schedulers': '/using-diffusers/schedulers'}}, 'hub': {'base_url': 'https://huggingface.co/docs/hub', 'topics': {'repositories': '/repositories', 'git': '/repositories-getting-started', 'spaces': '/spaces', 'models': '/models', 'datasets': '/datasets'}} } relevant_urls = [] for section_name, section_data in doc_sections.items(): base_url = section_data['base_url'] topics = section_data['topics'] for topic, path in topics.items(): relevance = 0 if query_lower == topic.lower(): relevance = 1.0 elif query_lower in topic.lower(): relevance = 0.9 elif any(word in topic.lower() for word in query_lower.split()): relevance = 0.7 elif any(word in query_lower for word in topic.lower().split()): relevance = 0.6 if relevance > 0: full_url = base_url + path relevant_urls.append({'url': full_url, 'topic': topic, 'section': section_name, 'relevance': relevance}) relevant_urls.sort(key=lambda x: x['relevance'], reverse=True) relevant_urls = relevant_urls[:max_results] if not relevant_urls: return f"āŒ No documentation found for '{query}'. Try: pipeline, tokenizer, trainer, model, fine-tuning, datasets, diffusers, or peft." result = f"# šŸ“š Hugging Face Documentation: {query}\n\n" for i, url_info in enumerate(relevant_urls, 1): section_emoji = {'transformers': 'šŸ¤–', 'datasets': 'šŸ“Š', 'diffusers': 'šŸŽØ', 'hub': '🌐'}.get(url_info['section'], 'šŸ“„') result += f"## {i}. {section_emoji} {url_info['topic'].title()} ({url_info['section'].title()})\n\n" content = self._fetch_with_retry(url_info['url']) if content: soup = BeautifulSoup(content, 'html.parser') practical_content = self._extract_practical_content(soup, url_info['topic']) if practical_content['overview']: result += f"**šŸ“– Overview:**\n{practical_content['overview']}\n\n" if practical_content['installation']: result += f"**āš™ļø Installation:**\n{practical_content['installation']}\n\n" if practical_content['code_examples']: result += "**šŸ’» Code Examples:**\n\n" for j, code_block in enumerate(practical_content['code_examples'][:3], 1): lang = code_block.get('language', 'python') code_type = code_block.get('type', 'example') result += f"*{code_type.title()} {j}:*\n```{lang}\n{code_block['code']}\n```\n\n" if practical_content['usage_instructions']: result += "**šŸ› ļø Usage Instructions:**\n" for idx, instruction in enumerate(practical_content['usage_instructions'][:4], 1): result += f"{idx}. {instruction}\n" result += "\n" if practical_content['parameters']: result += "**āš™ļø Parameters:**\n" for param in practical_content['parameters'][:6]: param_type = f" (`{param['type']}`)" if param.get('type') else "" default_val = f" *Default: {param['default']}*" if param.get('default') else "" result += f"• **{param['name']}**{param_type}: {param['description']}{default_val}\n" result += "\n" result += f"**šŸ”— Full Documentation:** {url_info['url']}\n\n" else: result += f"āš ļø Could not fetch content. Visit directly: {url_info['url']}\n\n" result += "---\n\n" return result except Exception as e: logger.error(f"Error in search_documentation: {e}") return f"āŒ Error searching documentation: {str(e)}\n\nTry a simpler search term or check your internet connection." def get_model_info(self, model_name: str) -> str: """ Fetches comprehensive information about a specific model from the Hugging Face Hub. Provides statistics like downloads and likes, a description, usage examples, and a quick-start code snippet. Args: model_name (str): The full identifier of the model on the Hub, such as 'bert-base-uncased' or 'meta-llama/Llama-2-7b-hf'. """ try: model_name = model_name.strip() if not model_name: return "Please provide a model name." api_url = f"{self.api_url}/models/{model_name}" response = self.session.get(api_url, timeout=15) if response.status_code == 404: return f"āŒ Model '{model_name}' not found. Please check the model name." elif response.status_code != 200: return f"āŒ Error fetching model info (Status: {response.status_code})" model_data = response.json() result = f"# šŸ¤– Model: {model_name}\n\n" downloads = model_data.get('downloads', 0) likes = model_data.get('likes', 0) task = model_data.get('pipeline_tag', 'N/A') library = model_data.get('library_name', 'N/A') result += f"**šŸ“Š Statistics:**\n• **Downloads:** {downloads:,}\n• **Likes:** {likes:,}\n• **Task:** {task}\n• **Library:** {library}\n• **Created:** {model_data.get('createdAt', 'N/A')[:10]}\n• **Updated:** {model_data.get('lastModified', 'N/A')[:10]}\n\n" if 'tags' in model_data and model_data['tags']: result += f"**šŸ·ļø Tags:** {', '.join(model_data['tags'][:10])}\n\n" model_url = f"{self.base_url}/{model_name}" page_content = self._fetch_with_retry(model_url) if page_content: soup = BeautifulSoup(page_content, 'html.parser') readme_content = soup.find('div', class_=re.compile(r'prose|readme|model-card')) if readme_content: paragraphs = readme_content.find_all('p')[:3] description_parts = [] for p in paragraphs: text = p.get_text(strip=True) if len(text) > 30 and not any(skip in text.lower() for skip in ['table of contents', 'toc']): description_parts.append(text) if description_parts: description = ' '.join(description_parts) result += f"**šŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n" code_examples = self._extract_code_examples(soup) if code_examples: result += "**šŸ’» Usage Examples:**\n\n" for i, code_block in enumerate(code_examples[:3], 1): lang = code_block.get('language', 'python') result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n" if task and task != 'N/A': result += f"**šŸš€ Quick Start Template:**\n" if library == 'transformers': result += f"```python\nfrom transformers import pipeline\n\n# Load the model\nmodel = pipeline('{task}', model='{model_name}')\n\n# Use the model\n# result = model(your_input_here)\n# print(result)\n```\n\n" else: result += f"```python\n# Load and use {model_name}\n# Refer to the documentation for specific usage\n```\n\n" if 'siblings' in model_data: files = [f['rfilename'] for f in model_data['siblings'][:10]] if files: result += f"**šŸ“ Model Files:** {', '.join(files)}\n\n" result += f"**šŸ”— Model Page:** {model_url}\n" return result except requests.exceptions.RequestException as e: return f"āŒ Network error: {str(e)}" except Exception as e: logger.error(f"Error in get_model_info: {e}") return f"āŒ Error fetching model info: {str(e)}" def get_dataset_info(self, dataset_name: str) -> str: """ Retrieves detailed information about a specific dataset from the Hugging Face Hub. Includes statistics, a description, and a quick-start code snippet showing how to load the dataset. Args: dataset_name (str): The full identifier of the dataset on the Hub, for example 'squad' or 'imdb'. """ try: dataset_name = dataset_name.strip() if not dataset_name: return "Please provide a dataset name." api_url = f"{self.api_url}/datasets/{dataset_name}" response = self.session.get(api_url, timeout=15) if response.status_code == 404: return f"āŒ Dataset '{dataset_name}' not found. Please check the dataset name." elif response.status_code != 200: return f"āŒ Error fetching dataset info (Status: {response.status_code})" dataset_data = response.json() result = f"# šŸ“Š Dataset: {dataset_name}\n\n" downloads = dataset_data.get('downloads', 0) likes = dataset_data.get('likes', 0) result += f"**šŸ“ˆ Statistics:**\n• **Downloads:** {downloads:,}\n• **Likes:** {likes:,}\n• **Created:** {dataset_data.get('createdAt', 'N/A')[:10]}\n• **Updated:** {dataset_data.get('lastModified', 'N/A')[:10]}\n\n" if 'tags' in dataset_data and dataset_data['tags']: result += f"**šŸ·ļø Tags:** {', '.join(dataset_data['tags'][:10])}\n\n" dataset_url = f"{self.base_url}/datasets/{dataset_name}" page_content = self._fetch_with_retry(dataset_url) if page_content: soup = BeautifulSoup(page_content, 'html.parser') readme_content = soup.find('div', class_=re.compile(r'prose|readme|dataset-card')) if readme_content: paragraphs = readme_content.find_all('p')[:3] description_parts = [] for p in paragraphs: text = p.get_text(strip=True) if len(text) > 30: description_parts.append(text) if description_parts: description = ' '.join(description_parts) result += f"**šŸ“ Description:**\n{description[:800]}{'...' if len(description) > 800 else ''}\n\n" code_examples = self._extract_code_examples(soup) if code_examples: result += "**šŸ’» Usage Examples:**\n\n" for i, code_block in enumerate(code_examples[:3], 1): lang = code_block.get('language', 'python') result += f"*Example {i}:*\n```{lang}\n{code_block['code']}\n```\n\n" result += f"**šŸš€ Quick Start Template:**\n" result += f"```python\nfrom datasets import load_dataset\n\n# Load the dataset\ndataset = load_dataset('{dataset_name}')\n\n# Explore the dataset\n# print(dataset)\n# print(f\"Dataset keys: {{list(dataset.keys())}}\")\n\n# Access first example\n# if 'train' in dataset:\n# print(\"First example:\")\n# print(dataset['train'][0])\n```\n\n" result += f"**šŸ”— Dataset Page:** {dataset_url}\n" return result except requests.exceptions.RequestException as e: return f"āŒ Network error: {str(e)}" except Exception as e: logger.error(f"Error in get_dataset_info: {e}") return f"āŒ Error fetching dataset info: {str(e)}" def search_models(self, task: str, limit: str = "5") -> str: """ Searches the Hugging Face Hub for models based on a specified task or keyword and returns a list of top models. Each result includes statistics and a quick usage example. Args: task (str): The task to search for, such as 'text-classification', 'image-generation', or 'question-answering'. limit (str): The maximum number of models to return. Defaults to '5'. """ try: task = task.strip() if not task: return "Please provide a search task or keyword." limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 5 limit = min(max(limit, 1), 10) params = {'search': task, 'limit': limit * 3, 'sort': 'downloads', 'direction': -1} response = self.session.get(f"{self.api_url}/models", params=params, timeout=20) response.raise_for_status() models = response.json() if not models: return f"āŒ No models found for task: '{task}'. Try different keywords." filtered_models = [] for model in models: if (model.get('downloads', 0) > 0 or model.get('likes', 0) > 0 or 'pipeline_tag' in model): filtered_models.append(model) if len(filtered_models) >= limit: break if not filtered_models: filtered_models = models[:limit] result = f"# šŸ” Top {len(filtered_models)} Models for '{task}'\n\n" for i, model in enumerate(filtered_models, 1): model_id = model.get('id', 'Unknown') downloads = model.get('downloads', 0) likes = model.get('likes', 0) task_type = model.get('pipeline_tag', 'N/A') library = model.get('library_name', 'N/A') quality_score = "" if downloads > 10000: quality_score = "⭐ Popular" elif downloads > 1000: quality_score = "šŸ”„ Active" elif likes > 10: quality_score = "šŸ‘ Liked" result += f"## {i}. {model_id} {quality_score}\n\n" result += f"**šŸ“Š Stats:**\n• **Downloads:** {downloads:,}\n• **Likes:** {likes}\n• **Task:** {task_type}\n• **Library:** {library}\n\n" if task_type and task_type != 'N/A': result += f"**šŸš€ Quick Usage:**\n" if library == 'transformers': result += f"```python\nfrom transformers import pipeline\n\n# Load model\nmodel = pipeline('{task_type}', model='{model_id}')\n\n# Use model\n# result = model(\"Your input here\")\n# print(result)\n```\n\n" else: result += f"```python\n# Load and use {model_id}\n# Check model page for specific usage instructions\n```\n\n" result += f"**šŸ”— Model Page:** {self.base_url}/{model_id}\n\n---\n\n" return result except requests.exceptions.RequestException as e: return f"āŒ Network error: {str(e)}" except Exception as e: logger.error(f"Error in search_models: {e}") return f"āŒ Error searching models: {str(e)}" def get_transformers_docs(self, topic: str) -> str: """ Fetches detailed documentation specifically for the Hugging Face Transformers library on a given topic. This provides in-depth explanations, code examples, and parameter descriptions for core library components. Args: topic (str): The Transformers library topic to look up, such as 'pipeline', 'tokenizer', 'trainer', or 'generation'. """ try: topic = topic.strip().lower() if not topic: return "Please provide a topic to search for." docs_url = "https://huggingface.co/docs/transformers" topic_map = {'pipeline': f"{docs_url}/main_classes/pipelines", 'pipelines': f"{docs_url}/main_classes/pipelines", 'tokenizer': f"{docs_url}/main_classes/tokenizer", 'tokenizers': f"{docs_url}/main_classes/tokenizer", 'trainer': f"{docs_url}/main_classes/trainer", 'training': f"{docs_url}/training", 'model': f"{docs_url}/main_classes/model", 'models': f"{docs_url}/main_classes/model", 'configuration': f"{docs_url}/main_classes/configuration", 'config': f"{docs_url}/main_classes/configuration", 'quicktour': f"{docs_url}/quicktour", 'quick': f"{docs_url}/quicktour", 'installation': f"{docs_url}/installation", 'install': f"{docs_url}/installation", 'tutorial': f"{docs_url}/tutorials", 'tutorials': f"{docs_url}/tutorials", 'generation': f"{docs_url}/main_classes/text_generation", 'text_generation': f"{docs_url}/main_classes/text_generation", 'preprocessing': f"{docs_url}/preprocessing", 'preprocess': f"{docs_url}/preprocessing", 'peft': f"{docs_url}/peft", 'lora': f"{docs_url}/peft", 'quantization': f"{docs_url}/main_classes/quantization", 'optimization': f"{docs_url}/perf_train_gpu_one", 'performance': f"{docs_url}/perf_train_gpu_one", 'deployment': f"{docs_url}/deployment", 'custom': f"{docs_url}/custom_models", 'fine-tuning': f"{docs_url}/training", 'finetuning': f"{docs_url}/training"} url = topic_map.get(topic) if not url: for key, value in topic_map.items(): if topic in key or key in topic: url = value topic = key break if not url: url = f"{docs_url}/quicktour" topic = "quicktour" content = self._fetch_with_retry(url) if not content: return f"āŒ Could not fetch documentation for '{topic}'. Please try again or visit: {url}" soup = BeautifulSoup(content, 'html.parser') practical_content = self._extract_practical_content(soup, topic) result = f"# šŸ“š Transformers Documentation: {topic.replace('_', ' ').title()}\n\n" if practical_content['overview']: result += f"**šŸ“– Overview:**\n{practical_content['overview']}\n\n" if practical_content['installation']: result += f"**āš™ļø Installation:**\n{practical_content['installation']}\n\n" if practical_content['code_examples']: result += "**šŸ’» Code Examples:**\n\n" for i, code_block in enumerate(practical_content['code_examples'][:4], 1): lang = code_block.get('language', 'python') code_type = code_block.get('type', 'example') result += f"### {code_type.title()} {i}:\n```{lang}\n{code_block['code']}\n```\n\n" if practical_content['usage_instructions']: result += "**šŸ› ļø Step-by-Step Usage:**\n" for i, instruction in enumerate(practical_content['usage_instructions'][:6], 1): result += f"{i}. {instruction}\n" result += "\n" if practical_content['parameters']: result += "**āš™ļø Key Parameters:**\n" for param in practical_content['parameters'][:10]: param_type = f" (`{param['type']}`)" if param.get('type') else "" default_val = f" *Default: `{param['default']}`*" if param.get('default') else "" result += f"• **`{param['name']}`**{param_type}: {param['description']}{default_val}\n" result += "\n" related_topics = [k for k in topic_map.keys() if k != topic][:5] if related_topics: result += f"**šŸ”— Related Topics:** {', '.join(related_topics)}\n\n" result += f"**šŸ“„ Full Documentation:** {url}\n" return result except Exception as e: logger.error(f"Error in get_transformers_docs: {e}") return f"āŒ Error fetching Transformers documentation: {str(e)}" def get_trending_models(self, limit: str = "10") -> str: """ Fetches a list of the most downloaded models currently trending on the Hugging Face Hub. This is useful for discovering popular and widely-used models. Args: limit (str): The number of trending models to return. Defaults to '10'. """ try: limit = int(limit) if isinstance(limit, str) and limit.isdigit() else 10 limit = min(max(limit, 1), 20) params = {'sort': 'downloads', 'direction': -1, 'limit': limit} response = self.session.get(f"{self.api_url}/models", params=params, timeout=20) response.raise_for_status() models = response.json() if not models: return "āŒ Could not fetch trending models." result = f"# šŸ”„ Trending Models (Top {len(models)})\n\n" for i, model in enumerate(models, 1): model_id = model.get('id', 'Unknown') downloads = model.get('downloads', 0) likes = model.get('likes', 0) task = model.get('pipeline_tag', 'N/A') if downloads > 1000000: trend = "šŸš€ Mega Popular" elif downloads > 100000: trend = "šŸ”„ Very Popular" elif downloads > 10000: trend = "⭐ Popular" else: trend = "šŸ“ˆ Trending" result += f"## {i}. {model_id} {trend}\n" result += f"• **Downloads:** {downloads:,} | **Likes:** {likes} | **Task:** {task}\n" result += f"• **Link:** {self.base_url}/{model_id}\n\n" return result except Exception as e: logger.error(f"Error in get_trending_models: {e}") return f"āŒ Error fetching trending models: {str(e)}" # Initialize the API server hf_api = HF_API() # --- Named Functions for Gradio UI --- def clear_output(): """Clears a Gradio output component.""" return "" def set_textbox_value(text): """Sets a Gradio Textbox to a specific value.""" return text # --- Doc Search Tab Functions --- def run_doc_search(query, max_results): return hf_api.search_documentation(query, int(max_results) if str(max_results).isdigit() else 2) # --- Model Info Tab Functions --- def run_model_info(model_name): return hf_api.get_model_info(model_name) # --- Dataset Info Tab Functions --- def run_dataset_info(dataset_name): return hf_api.get_dataset_info(dataset_name) # --- Model Search Tab Functions --- def run_model_search(task, limit): return hf_api.search_models(task, int(limit) if str(limit).isdigit() else 5) # --- Transformers Docs Tab Functions --- def run_transformers_docs(topic): return hf_api.get_transformers_docs(topic) # --- Trending Models Tab Functions --- def run_trending_models(limit): return hf_api.get_trending_models(int(limit) if str(limit).isdigit() else 10) # --- Create Gradio Interface --- with gr.Blocks( title="šŸ¤— Hugging Face Information Server", theme=gr.themes.Soft(), css=""" .gradio-container { font-family: 'Inter', sans-serif; } .main-header { text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px; } """) as demo: # Header with gr.Row(): gr.HTML("""

šŸ¤— Hugging Face Information Server

Get comprehensive documentation with real code examples, usage instructions, and practical content

""") with gr.Tab("šŸ“š Documentation Search", elem_id="docs"): gr.Markdown("### Search for documentation with **comprehensive code examples** and **step-by-step instructions**") with gr.Row(): with gr.Column(scale=3): doc_query = gr.Textbox(label="šŸ” Search Query", placeholder="e.g., tokenizer, pipeline, fine-tuning, peft, trainer, quantization") with gr.Column(scale=1): doc_max_results = gr.Number(label="Max Results", value=2, minimum=1, maximum=5) doc_output = gr.Textbox(label="šŸ“– Documentation with Examples", lines=25, max_lines=30) with gr.Row(): doc_btn = gr.Button("šŸ” Search Documentation", variant="primary", size="lg") doc_clear = gr.Button("šŸ—‘ļø Clear", variant="secondary") gr.Markdown("**Quick Examples:**") with gr.Row(): gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=doc_query) gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=doc_query) gr.Button("Fine-tuning", size="sm").click(functools.partial(set_textbox_value, "fine-tuning"), outputs=doc_query) gr.Button("PEFT", size="sm").click(functools.partial(set_textbox_value, "peft"), outputs=doc_query) doc_btn.click(run_doc_search, inputs=[doc_query, doc_max_results], outputs=doc_output) doc_clear.click(clear_output, outputs=doc_output) with gr.Tab("šŸ¤– Model Information", elem_id="models"): gr.Markdown("### Get detailed model information with **usage examples** and **code snippets**") model_name = gr.Textbox(label="šŸ¤– Model Name", placeholder="e.g., bert-base-uncased, gpt2, microsoft/DialoGPT-medium, meta-llama/Llama-2-7b-hf") model_output = gr.Textbox(label="šŸ“Š Model Information + Usage Examples", lines=25, max_lines=30) with gr.Row(): model_btn = gr.Button("šŸ“Š Get Model Info", variant="primary", size="lg") model_clear = gr.Button("šŸ—‘ļø Clear", variant="secondary") gr.Markdown("**Popular Models:**") with gr.Row(): gr.Button("BERT", size="sm").click(functools.partial(set_textbox_value, "bert-base-uncased"), outputs=model_name) gr.Button("GPT-2", size="sm").click(functools.partial(set_textbox_value, "gpt2"), outputs=model_name) gr.Button("T5", size="sm").click(functools.partial(set_textbox_value, "t5-small"), outputs=model_name) gr.Button("DistilBERT", size="sm").click(functools.partial(set_textbox_value, "distilbert-base-uncased"), outputs=model_name) model_btn.click(run_model_info, inputs=model_name, outputs=model_output) model_clear.click(clear_output, outputs=model_output) with gr.Tab("šŸ“Š Dataset Information", elem_id="datasets"): gr.Markdown("### Get dataset information with **loading examples** and **usage code**") dataset_name = gr.Textbox(label="šŸ“Š Dataset Name", placeholder="e.g., squad, imdb, glue, common_voice, wikitext") dataset_output = gr.Textbox(label="šŸ“ˆ Dataset Information + Usage Examples", lines=25, max_lines=30) with gr.Row(): dataset_btn = gr.Button("šŸ“ˆ Get Dataset Info", variant="primary", size="lg") dataset_clear = gr.Button("šŸ—‘ļø Clear", variant="secondary") gr.Markdown("**Popular Datasets:**") with gr.Row(): gr.Button("SQuAD", size="sm").click(functools.partial(set_textbox_value, "squad"), outputs=dataset_name) gr.Button("IMDB", size="sm").click(functools.partial(set_textbox_value, "imdb"), outputs=dataset_name) gr.Button("GLUE", size="sm").click(functools.partial(set_textbox_value, "glue"), outputs=dataset_name) gr.Button("Common Voice", size="sm").click(functools.partial(set_textbox_value, "common_voice"), outputs=dataset_name) dataset_btn.click(run_dataset_info, inputs=dataset_name, outputs=dataset_output) dataset_clear.click(clear_output, outputs=dataset_output) with gr.Tab("šŸ” Model Search", elem_id="search"): gr.Markdown("### Search models with **quick usage examples** and **quality indicators**") with gr.Row(): with gr.Column(scale=3): search_task = gr.Textbox(label="šŸ” Task or Keyword", placeholder="e.g., text-classification, image-generation, question-answering, sentiment-analysis") with gr.Column(scale=1): search_limit = gr.Number(label="Max Results", value=5, minimum=1, maximum=10) search_output = gr.Textbox(label="šŸš€ Models with Usage Examples", lines=25, max_lines=30) with gr.Row(): search_btn = gr.Button("šŸš€ Search Models", variant="primary", size="lg") search_clear = gr.Button("šŸ—‘ļø Clear", variant="secondary") gr.Markdown("**Popular Tasks:**") with gr.Row(): gr.Button("Text Classification", size="sm").click(functools.partial(set_textbox_value, "text-classification"), outputs=search_task) gr.Button("Question Answering", size="sm").click(functools.partial(set_textbox_value, "question-answering"), outputs=search_task) gr.Button("Text Generation", size="sm").click(functools.partial(set_textbox_value, "text-generation"), outputs=search_task) gr.Button("Image Classification", size="sm").click(functools.partial(set_textbox_value, "image-classification"), outputs=search_task) search_btn.click(run_model_search, inputs=[search_task, search_limit], outputs=search_output) search_clear.click(clear_output, outputs=search_output) with gr.Tab("⚔ Transformers Docs", elem_id="transformers"): gr.Markdown("### Get comprehensive Transformers documentation with **detailed examples** and **parameters**") transformers_topic = gr.Textbox(label="šŸ“š Topic", placeholder="e.g., pipeline, tokenizer, trainer, model, peft, generation, quantization") transformers_output = gr.Textbox(label="šŸ“– Comprehensive Documentation", lines=25, max_lines=30) with gr.Row(): transformers_btn = gr.Button("šŸ“– Get Documentation", variant="primary", size="lg") transformers_clear = gr.Button("šŸ—‘ļø Clear", variant="secondary") gr.Markdown("**Core Topics:**") with gr.Row(): gr.Button("Pipeline", size="sm").click(functools.partial(set_textbox_value, "pipeline"), outputs=transformers_topic) gr.Button("Tokenizer", size="sm").click(functools.partial(set_textbox_value, "tokenizer"), outputs=transformers_topic) gr.Button("Trainer", size="sm").click(functools.partial(set_textbox_value, "trainer"), outputs=transformers_topic) gr.Button("Generation", size="sm").click(functools.partial(set_textbox_value, "generation"), outputs=transformers_topic) transformers_btn.click(run_transformers_docs, inputs=transformers_topic, outputs=transformers_output) transformers_clear.click(clear_output, outputs=transformers_output) with gr.Tab("šŸ”„ Trending Models", elem_id="trending"): gr.Markdown("### Discover the most popular and trending models") trending_limit = gr.Number(label="Number of Models", value=10, minimum=1, maximum=20) trending_output = gr.Textbox(label="šŸ”„ Trending Models", lines=20, max_lines=25) with gr.Row(): trending_btn = gr.Button("šŸ”„ Get Trending Models", variant="primary", size="lg") trending_clear = gr.Button("šŸ—‘ļø Clear", variant="secondary") trending_btn.click(run_trending_models, inputs=trending_limit, outputs=trending_output) trending_clear.click(clear_output, outputs=trending_output) # Footer with gr.Row(): gr.HTML("""

šŸ’” Features

āœ… Real code examples • āœ… Step-by-step instructions • āœ… Parameter documentation • āœ… Quality indicators

Get practical, actionable information, directly from the source.

šŸ“– Read the Guide on Hugging Face Spaces

""") if __name__ == "__main__": print("šŸš€ Starting Hugging Face Information Server...") print("šŸ“Š Features: Code examples, usage instructions, comprehensive documentation") # Kept your original launch parameters demo.launch( mcp_server=True )