from __future__ import annotations import numpy as np import pandas as pd import requests from huggingface_hub.hf_api import SpaceInfo url = 'https://docs.google.com/spreadsheets/d/1XH7Jo3LXXfbSJ14z-QrSIQs21ArJMiV6_hMSAwY85PU/edit#gid=0' csv_url = url.replace('/edit#gid=', '/export?format=csv&gid=') class ModelList: def __init__(self): self.table = pd.read_csv(csv_url) self._preprocess_table() self.table_header = ''' Model Name Type Year Paper Code on Github Weights on 🤗 Other Weights ''' def _preprocess_table(self) -> None: self.table['name_lowercase'] = self.table.name.str.lower() rows = [] for row in self.table.itertuples(): paper = f'Paper' if isinstance( row.paper, str) else '' github = f'GitHub' if isinstance( row.github, str) else '' hf_model = f'Hub Model' if isinstance( row.hub, str) else '' other_model = f'Other Weights' if isinstance( row.other, str) else '' data_type = f'{row.data_type}' if isinstance( row.data_type, str) else '' base_model = f'{row.base_model}' if isinstance( row.base_model, str) else '' year = f'{row.year}' if isinstance( row.year, str) else '' row = f''' {row.name} {data_type} {year} {paper} {github} {hf_model} {other_model} ''' rows.append(row) self.table['html_table_content'] = rows def render(self, search_query: str, case_sensitive: bool, filter_names: list[str], data_types: list[str], years: list[str], #model_types: list[str] ) -> tuple[int, str]: df = self.table if search_query: if case_sensitive: df = df[df.name.str.contains(search_query)] else: df = df[df.name_lowercase.str.contains(search_query.lower())] has_paper = 'Paper' in filter_names has_github = 'Code' in filter_names has_model = 'Model Weights' in filter_names df = self.filter_table(df, has_paper, has_github, has_model, data_types, years) #df = self.filter_table(df, has_paper, has_github, has_model, data_types, model_types) return len(df), self.to_html(df, self.table_header) @staticmethod def filter_table(df: pd.DataFrame, has_paper: bool, has_github: bool, has_model: bool, data_types: list[str], years: list[str], #model_types: list[str] ) -> pd.DataFrame: if has_paper: df = df[~df.paper.isna()] if has_github: df = df[~df.github.isna()] if has_model: df = df[~df.hub.isna() | ~df.other.isna()] df = df[df.data_type.isin(set(data_types))] #df = df[df.base_model.isin(set(model_types))] df = df[df.year.isin(set(years))] return df @staticmethod def to_html(df: pd.DataFrame, table_header: str) -> str: table_data = ''.join(df.html_table_content) html = f''' {table_header} {table_data}
''' return html