tspace / app.py
stcoats
Add application file
af343b5
raw
history blame
1.64 kB
import os
import duckdb
import streamlit as st
from huggingface_hub import hf_hub_download
HF_REPO_ID = "stcoats/temp-duckdb-upload" # Replace with your actual dataset repo if needed
HF_FILENAME = "ycsep.duckdb"
LOCAL_PATH = "./ycsep.duckdb"
st.title("YCSEP Audio Dataset Viewer")
# Check if file exists
if not os.path.exists(LOCAL_PATH):
st.write("Database not found locally. Downloading from HF Hub...")
try:
downloaded_path = hf_hub_download(
repo_id=HF_REPO_ID,
repo_type="dataset",
filename=HF_FILENAME,
local_dir="." # Saves as ./ycsep.duckdb automatically
)
st.success(f"Downloaded: {downloaded_path}")
except Exception as e:
st.error(f"Download failed: {e}")
st.stop()
else:
st.write("Found local DuckDB file.")
# Try connecting to the DB
try:
con = duckdb.connect(LOCAL_PATH, read_only=True)
st.success("Connected to DuckDB.")
except Exception as e:
st.error(f"Failed to connect to DuckDB: {e}")
st.stop()
# Query first page without loading everything into memory
st.write("Querying first 10 rows...")
try:
cursor = con.execute("SELECT speaker, text, audio FROM data LIMIT 10")
rows = cursor.fetchall()
for speaker, text, audio in rows:
st.markdown(f"**Speaker:** {speaker}")
st.markdown(f"**Text:** {text}")
if isinstance(audio, str) and audio.startswith("http"):
st.audio(audio, format="audio/mp3")
else:
st.warning("Audio not available")
st.markdown("---")
except Exception as e:
st.error(f"DuckDB query failed: {e}")