Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
from model.search.vector import RuriDuckDBSearchClient | |
from model.data.notion_db import fetch_sakurap_corpus | |
def search(search_client: RuriDuckDBSearchClient): | |
def _search(query: str) -> pd.DataFrame: | |
results = search_client.search_top_n(query) | |
result = results[0] | |
result["rank"] = result["rank"] + 1 | |
result = result[["rank", "title", "content", "score"]] | |
result.columns = ["rank", "title", "rap lyric", "distance"] | |
return result | |
return _search | |
if __name__ == "__main__": | |
# Load dataset | |
sakurap_df = fetch_sakurap_corpus("./data/sakurap_corpus.csv") | |
# Initialize search client | |
search_client = RuriDuckDBSearchClient.from_dataframe(sakurap_df, "content") | |
with gr.Blocks() as search_interface: | |
gr.Markdown(""" | |
# π Cobalt DuckDB π¦ | |
Demo app for vector search using [Ruri](https://huggingface.co/cl-nagoya/ruri-large) and DuckDB. | |
You can search ARASHI's songs with rap lyrics by Sho Sakurai. | |
""") | |
# Input query | |
search_query = gr.Textbox(label="Sakurap Words", submit_btn=True) | |
gr.Markdown(""" | |
## Search Results | |
""") | |
# Search result | |
result_table = gr.DataFrame(label="Result", | |
column_widths=["5%", "20%", "65%", "5%", "5%"], | |
wrap=True, | |
datatype=["str", "str", "markdown", "str", "str"], | |
interactive=False) | |
# Event handler | |
search_query.submit(fn=search(search_client), inputs=search_query, outputs=result_table) | |
# App launch | |
search_interface.queue() | |
search_interface.launch(server_name="0.0.0.0") | |