|
import os |
|
import duckdb |
|
import streamlit as st |
|
from huggingface_hub import hf_hub_download |
|
|
|
HF_REPO_ID = "stcoats/temp-duckdb-upload" |
|
HF_FILENAME = "ycsep.duckdb" |
|
LOCAL_PATH = "./ycsep.duckdb" |
|
|
|
st.title("YCSEP Audio Dataset Viewer") |
|
|
|
|
|
if not os.path.exists(LOCAL_PATH): |
|
st.write("Database not found locally. Downloading from HF Hub...") |
|
try: |
|
downloaded_path = hf_hub_download( |
|
repo_id=HF_REPO_ID, |
|
repo_type="dataset", |
|
filename=HF_FILENAME, |
|
local_dir="." |
|
) |
|
st.success(f"Downloaded: {downloaded_path}") |
|
except Exception as e: |
|
st.error(f"Download failed: {e}") |
|
st.stop() |
|
else: |
|
st.write("Found local DuckDB file.") |
|
|
|
|
|
try: |
|
con = duckdb.connect(LOCAL_PATH, read_only=True) |
|
st.success("Connected to DuckDB.") |
|
except Exception as e: |
|
st.error(f"Failed to connect to DuckDB: {e}") |
|
st.stop() |
|
|
|
|
|
st.write("Querying first 10 rows...") |
|
|
|
try: |
|
cursor = con.execute("SELECT speaker, text, audio FROM data LIMIT 10") |
|
rows = cursor.fetchall() |
|
for speaker, text, audio in rows: |
|
st.markdown(f"**Speaker:** {speaker}") |
|
st.markdown(f"**Text:** {text}") |
|
if isinstance(audio, str) and audio.startswith("http"): |
|
st.audio(audio, format="audio/mp3") |
|
else: |
|
st.warning("Audio not available") |
|
st.markdown("---") |
|
except Exception as e: |
|
st.error(f"DuckDB query failed: {e}") |
|
|
|
|