Spaces:

stcoats
/

tspace

Sleeping

tspace / app.py

stcoats

Add application file

af343b5 about 2 months ago

1.64 kB

	import os
	import duckdb
	import streamlit as st
	from huggingface_hub import hf_hub_download

	HF_REPO_ID = "stcoats/temp-duckdb-upload" # Replace with your actual dataset repo if needed
	HF_FILENAME = "ycsep.duckdb"
	LOCAL_PATH = "./ycsep.duckdb"

	st.title("YCSEP Audio Dataset Viewer")

	# Check if file exists
	if not os.path.exists(LOCAL_PATH):
	st.write("Database not found locally. Downloading from HF Hub...")
	try:
	downloaded_path = hf_hub_download(
	repo_id=HF_REPO_ID,
	repo_type="dataset",
	filename=HF_FILENAME,
	local_dir="." # Saves as ./ycsep.duckdb automatically
	)
	st.success(f"Downloaded: {downloaded_path}")
	except Exception as e:
	st.error(f"Download failed: {e}")
	st.stop()
	else:
	st.write("Found local DuckDB file.")

	# Try connecting to the DB
	try:
	con = duckdb.connect(LOCAL_PATH, read_only=True)
	st.success("Connected to DuckDB.")
	except Exception as e:
	st.error(f"Failed to connect to DuckDB: {e}")
	st.stop()

	# Query first page without loading everything into memory
	st.write("Querying first 10 rows...")

	try:
	cursor = con.execute("SELECT speaker, text, audio FROM data LIMIT 10")
	rows = cursor.fetchall()
	for speaker, text, audio in rows:
	st.markdown(f"Speaker: {speaker}")
	st.markdown(f"Text: {text}")
	if isinstance(audio, str) and audio.startswith("http"):
	st.audio(audio, format="audio/mp3")
	else:
	st.warning("Audio not available")
	st.markdown("---")
	except Exception as e:
	st.error(f"DuckDB query failed: {e}")