stcoats commited on
Commit
65c9ca0
·
1 Parent(s): 416c906

Add application file

Browse files
Files changed (1) hide show
  1. app.py +41 -24
app.py CHANGED
@@ -5,17 +5,16 @@ from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import tempfile
7
 
8
- # --- Config ---
9
- HF_REPO_ID = "stcoats/temp-duckdb-upload" # Change if needed
10
  HF_FILENAME = "ycsep.duckdb"
11
  LOCAL_PATH = "./ycsep.duckdb"
12
 
13
  st.set_page_config(layout="wide")
14
  st.title("YCSEP Audio Dataset Viewer")
15
 
16
- # --- Download database if not present ---
17
  if not os.path.exists(LOCAL_PATH):
18
- st.write("Downloading ycsep.duckdb from HF Hub...")
19
  hf_hub_download(
20
  repo_id=HF_REPO_ID,
21
  repo_type="dataset",
@@ -24,38 +23,56 @@ if not os.path.exists(LOCAL_PATH):
24
  )
25
  st.success("Download complete.")
26
 
27
- # --- Connect to DuckDB ---
28
- con = duckdb.connect(LOCAL_PATH, read_only=True)
 
 
 
 
 
29
 
30
- # --- Search input ---
31
- query = st.text_input("Search text or speaker (case-insensitive)", "")
 
32
 
33
- # --- Execute query ---
34
- if query.strip():
35
- search_term = f"%{query.strip().lower()}%"
36
  sql = """
37
  SELECT speaker, text, audio
38
  FROM data
39
- WHERE LOWER(speaker) LIKE ? OR LOWER(text) LIKE ?
40
  LIMIT 100
41
  """
42
- df = con.execute(sql, [search_term, search_term]).df()
43
  else:
44
  df = con.execute("SELECT speaker, text, audio FROM data LIMIT 100").df()
45
 
46
- # --- Show results ---
47
- st.markdown("### Search Results")
48
- for idx, row in df.iterrows():
 
 
 
 
49
  col1, col2, col3 = st.columns([2, 5, 3])
50
  col1.markdown(f"**{row['speaker']}**")
51
  col2.markdown(row['text'])
52
 
53
- audio_data = row['audio']
54
- if isinstance(audio_data, (bytes, bytearray)):
55
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
56
- tmp.write(audio_data)
57
- tmp.flush()
58
- col3.audio(tmp.name, format="audio/mp3")
59
- else:
60
- col3.warning("Audio not available or invalid format.")
 
 
 
 
 
 
 
 
 
 
61
 
 
5
  import pandas as pd
6
  import tempfile
7
 
8
+ HF_REPO_ID = "stcoats/temp-duckdb-upload"
 
9
  HF_FILENAME = "ycsep.duckdb"
10
  LOCAL_PATH = "./ycsep.duckdb"
11
 
12
  st.set_page_config(layout="wide")
13
  st.title("YCSEP Audio Dataset Viewer")
14
 
15
+ # Download database if missing
16
  if not os.path.exists(LOCAL_PATH):
17
+ st.write("Downloading from HF Hub...")
18
  hf_hub_download(
19
  repo_id=HF_REPO_ID,
20
  repo_type="dataset",
 
23
  )
24
  st.success("Download complete.")
25
 
26
+ # Connect
27
+ try:
28
+ con = duckdb.connect(LOCAL_PATH, read_only=True)
29
+ st.success("Connected to DuckDB.")
30
+ except Exception as e:
31
+ st.error(f"DuckDB connection failed: {e}")
32
+ st.stop()
33
 
34
+ # Search
35
+ query = st.text_input("Search text or speaker", "")
36
+ query = query.strip().lower()
37
 
38
+ if query:
 
 
39
  sql = """
40
  SELECT speaker, text, audio
41
  FROM data
42
+ WHERE LOWER(CAST(speaker AS VARCHAR)) LIKE ? OR LOWER(CAST(text AS VARCHAR)) LIKE ?
43
  LIMIT 100
44
  """
45
+ df = con.execute(sql, [f"%{query}%", f"%{query}%"]).df()
46
  else:
47
  df = con.execute("SELECT speaker, text, audio FROM data LIMIT 100").df()
48
 
49
+ st.markdown(f"### Showing {len(df)} results")
50
+
51
+ if len(df) == 0:
52
+ st.warning("No matches found.")
53
+
54
+ # Show table with inline audio players
55
+ for i, row in df.iterrows():
56
  col1, col2, col3 = st.columns([2, 5, 3])
57
  col1.markdown(f"**{row['speaker']}**")
58
  col2.markdown(row['text'])
59
 
60
+ audio_data = row["audio"]
61
+ try:
62
+ if isinstance(audio_data, (bytes, bytearray, memoryview)):
63
+ audio_bytes = bytes(audio_data)
64
+ elif isinstance(audio_data, list): # DuckDB sometimes gives list[int]
65
+ audio_bytes = bytes(audio_data)
66
+ else:
67
+ audio_bytes = None
68
+
69
+ if audio_bytes:
70
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
71
+ tmpfile.write(audio_bytes)
72
+ tmpfile.flush()
73
+ col3.audio(tmpfile.name, format="audio/mp3")
74
+ else:
75
+ col3.warning("Audio missing or invalid format.")
76
+ except Exception as e:
77
+ col3.error(f"Audio error: {e}")
78