stcoats commited on
Commit
0659f43
·
1 Parent(s): 65c9ca0

Add application file

Browse files
Files changed (1) hide show
  1. app.py +20 -60
app.py CHANGED
@@ -2,77 +2,37 @@ import os
2
  import duckdb
3
  import streamlit as st
4
  from huggingface_hub import hf_hub_download
5
- import pandas as pd
6
- import tempfile
7
 
8
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
9
  HF_FILENAME = "ycsep.duckdb"
10
  LOCAL_PATH = "./ycsep.duckdb"
11
 
12
- st.set_page_config(layout="wide")
13
- st.title("YCSEP Audio Dataset Viewer")
14
 
15
- # Download database if missing
16
  if not os.path.exists(LOCAL_PATH):
17
- st.write("Downloading from HF Hub...")
18
  hf_hub_download(
19
  repo_id=HF_REPO_ID,
20
  repo_type="dataset",
21
  filename=HF_FILENAME,
22
  local_dir="."
23
  )
24
- st.success("Download complete.")
25
-
26
- # Connect
27
- try:
28
- con = duckdb.connect(LOCAL_PATH, read_only=True)
29
- st.success("Connected to DuckDB.")
30
- except Exception as e:
31
- st.error(f"DuckDB connection failed: {e}")
32
- st.stop()
33
-
34
- # Search
35
- query = st.text_input("Search text or speaker", "")
36
- query = query.strip().lower()
37
-
38
- if query:
39
- sql = """
40
- SELECT speaker, text, audio
41
- FROM data
42
- WHERE LOWER(CAST(speaker AS VARCHAR)) LIKE ? OR LOWER(CAST(text AS VARCHAR)) LIKE ?
43
- LIMIT 100
44
- """
45
- df = con.execute(sql, [f"%{query}%", f"%{query}%"]).df()
46
- else:
47
- df = con.execute("SELECT speaker, text, audio FROM data LIMIT 100").df()
48
-
49
- st.markdown(f"### Showing {len(df)} results")
50
-
51
- if len(df) == 0:
52
- st.warning("No matches found.")
53
-
54
- # Show table with inline audio players
55
- for i, row in df.iterrows():
56
- col1, col2, col3 = st.columns([2, 5, 3])
57
- col1.markdown(f"**{row['speaker']}**")
58
- col2.markdown(row['text'])
59
-
60
- audio_data = row["audio"]
61
- try:
62
- if isinstance(audio_data, (bytes, bytearray, memoryview)):
63
- audio_bytes = bytes(audio_data)
64
- elif isinstance(audio_data, list): # DuckDB sometimes gives list[int]
65
- audio_bytes = bytes(audio_data)
66
- else:
67
- audio_bytes = None
68
-
69
- if audio_bytes:
70
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
71
- tmpfile.write(audio_bytes)
72
- tmpfile.flush()
73
- col3.audio(tmpfile.name, format="audio/mp3")
74
- else:
75
- col3.warning("Audio missing or invalid format.")
76
- except Exception as e:
77
- col3.error(f"Audio error: {e}")
78
 
 
2
  import duckdb
3
  import streamlit as st
4
  from huggingface_hub import hf_hub_download
 
 
5
 
6
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
7
  HF_FILENAME = "ycsep.duckdb"
8
  LOCAL_PATH = "./ycsep.duckdb"
9
 
10
+ st.title("YCSEP Audio Debug")
 
11
 
 
12
  if not os.path.exists(LOCAL_PATH):
13
+ st.write("Downloading DB...")
14
  hf_hub_download(
15
  repo_id=HF_REPO_ID,
16
  repo_type="dataset",
17
  filename=HF_FILENAME,
18
  local_dir="."
19
  )
20
+ st.success("Downloaded.")
21
+
22
+ con = duckdb.connect(LOCAL_PATH, read_only=True)
23
+
24
+ st.markdown("### Raw Preview")
25
+ df = con.execute("SELECT speaker, text, audio FROM data LIMIT 5").fetchall()
26
+
27
+ for row in df:
28
+ speaker, text, audio = row
29
+ st.write(f"Speaker: {speaker}")
30
+ st.write(f"Text: {text}")
31
+ st.write(f"AUDIO TYPE: {type(audio)}")
32
+ st.write(f"AUDIO LENGTH: {len(audio) if hasattr(audio, '__len__') else 'n/a'}")
33
+ if isinstance(audio, (bytes, bytearray, memoryview)):
34
+ st.audio(audio, format="audio/mp3")
35
+ else:
36
+ st.warning("Audio not valid binary")
37
+ st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38