import pandas as pd import streamlit as st import math class ModelFinder: def __init__(self, models_df): self.setup_inputs() self.models_df = models_df self.n_per_page = 10 def setup_page(self): st.title("Huggingface model explorer") st.text(f"search {len(models_df)} models by name or readme") st.text( "note that there are many more models but here we only show those with readme" ) def setup_inputs(self): col1, col2, col3, col4, col5 = st.columns(5) self.query_input = col1.text_input("model name query", value="") self.author_query_input = col2.text_input("author query", value="") self.id_query_input = col3.text_input("modelId query", value="") self.readme_query_input = col4.text_input("readme query", value="") self.page = col5 def get_selected_models_df(self, query, readme_query, id_query, author_query): return self.models_df[ self.models_df["readme"].str.lower().str.contains(readme_query) & self.models_df["modelId"].str.lower().str.contains(id_query) & self.models_df["author"].str.lower().str.contains(author_query) & self.models_df["model_name"].str.lower().str.contains(query) ] def show_paged_selected_model_info(self, selected_models_df): page = self.page.number_input("page", 0, math.ceil(len(selected_models_df) / 10)) selected_models_df_subset = selected_models_df.iloc[ page * self.n_per_page : (page + 1) * self.n_per_page ] st.write(f"found {len(selected_models_df)} models") for (model_name, tag, readme) in selected_models_df_subset[ ["modelId", "pipeline_tag", "readme"] ].itertuples(index=False): model_url = f"http://huggingface.co/{model_name}" with st.expander(f"[{model_name}]({model_url}) ({tag})"): st.write(readme) def run(self): self.setup_page() selected_models_df = self.get_selected_models_df( self.query_input, self.readme_query_input, self.id_query_input, self.author_query_input, ) self.show_paged_selected_model_info(selected_models_df) def prepare_models_df(path): df = pd.read_parquet(path).dropna(subset=["readme"]) sep_tuples = [ tp if len(tp) == 2 else ("", tp[0]) for tp in df["modelId"].str.split("/").to_list() ] authors, model_names = zip(*sep_tuples) df["author"] = authors df["model_name"] = model_names return df model_path = "models_with_readmes.parquet" models_df = prepare_models_df(model_path) app = ModelFinder(models_df) app.run()