stcoats commited on
Commit
d5e4e4a
·
1 Parent(s): 9302019

Add application file

Browse files
Files changed (1) hide show
  1. app.py +28 -34
app.py CHANGED
@@ -4,7 +4,6 @@ import streamlit as st
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import tempfile
7
- import re
8
 
9
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
10
  HF_FILENAME = "ycsep.duckdb"
@@ -37,20 +36,37 @@ except Exception as e:
37
  st.error(f"DuckDB connection failed: {e}")
38
  st.stop()
39
 
40
- # Search
41
  query = st.text_input("Search text (case-insensitive)", "").strip()
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  if query:
44
  sql = """
45
- SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
46
  FROM data
47
- WHERE LOWER(text) LIKE LOWER(?)
48
  LIMIT 100
49
  """
50
- df = con.execute(sql, [f"%{query}%"]).df()
51
  else:
52
  df = con.execute("""
53
- SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
54
  FROM data
55
  LIMIT 100
56
  """).df()
@@ -60,33 +76,11 @@ st.markdown(f"### Showing {len(df)} results")
60
  if len(df) == 0:
61
  st.warning("No matches found.")
62
  else:
63
- def render_audio_cell(audio_bytes):
64
- try:
65
- if isinstance(audio_bytes, (bytes, bytearray, memoryview)):
66
- data = bytes(audio_bytes)
67
- elif isinstance(audio_bytes, list):
68
- data = bytes(audio_bytes)
69
- else:
70
- return None
71
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
72
- tmp.write(data)
73
- tmp.flush()
74
- return tmp.name
75
- except Exception:
76
- return None
77
-
78
- df["audio_file"] = df["audio"].apply(render_audio_cell)
79
 
80
- # Display table (sortable)
81
- df_display = df[["id", "channel", "video_id", "video_title", "speaker", "start_time", "end_time", "upload_date", "text", "pos_tags"]].copy()
82
- st.dataframe(df_display, use_container_width=True)
83
-
84
- # Audio previews column (aligned separately)
85
- st.markdown("### Audio Previews")
86
- for i, row in df.iterrows():
87
- audio_path = row["audio_file"]
88
- if audio_path:
89
- st.audio(audio_path, format="audio/mp3")
90
- else:
91
- st.warning("Missing or unreadable audio.")
92
 
 
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import tempfile
 
7
 
8
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
9
  HF_FILENAME = "ycsep.duckdb"
 
36
  st.error(f"DuckDB connection failed: {e}")
37
  st.stop()
38
 
39
+ # Search input
40
  query = st.text_input("Search text (case-insensitive)", "").strip()
41
 
42
+ # Render audio inline in a column
43
+ def render_audio_cell(audio_bytes):
44
+ try:
45
+ if isinstance(audio_bytes, (bytes, bytearray, memoryview)):
46
+ data = bytes(audio_bytes)
47
+ elif isinstance(audio_bytes, list):
48
+ data = bytes(audio_bytes)
49
+ else:
50
+ return ""
51
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
52
+ tmp.write(data)
53
+ tmp.flush()
54
+ return f'<audio controls style="height:20px; width:100%"> <source src="file://{tmp.name}" type="audio/mpeg"></audio>'
55
+ except Exception:
56
+ return ""
57
+
58
+ # Fetch data from DuckDB
59
  if query:
60
  sql = """
61
+ SELECT id, channel, video_id, video_title, speaker, start_time, end_time, upload_date, text, pos_tags, audio
62
  FROM data
63
+ WHERE LOWER(text) LIKE ?
64
  LIMIT 100
65
  """
66
+ df = con.execute(sql, [f"%{query.lower()}%"]).df()
67
  else:
68
  df = con.execute("""
69
+ SELECT id, channel, video_id, video_title, speaker, start_time, end_time, upload_date, text, pos_tags, audio
70
  FROM data
71
  LIMIT 100
72
  """).df()
 
76
  if len(df) == 0:
77
  st.warning("No matches found.")
78
  else:
79
+ df["Audio"] = df["audio"].apply(render_audio_cell)
80
+ df_display = df[["id", "channel", "video_id", "video_title", "speaker", "start_time", "end_time", "upload_date", "text", "pos_tags", "Audio"]].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ # Render table with inline audio column (HTML support)
83
+ st.markdown("### Results Table (Sortable with Audio Column)")
84
+ st.write("(Scroll right to view audio controls)")
85
+ st.write(df_display.to_html(escape=False, index=False), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
86