stcoats commited on
Commit
a671301
·
1 Parent(s): d5e4e4a

Add application file

Browse files
Files changed (1) hide show
  1. app.py +26 -26
app.py CHANGED
@@ -4,6 +4,7 @@ import streamlit as st
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import tempfile
 
7
 
8
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
9
  HF_FILENAME = "ycsep.duckdb"
@@ -36,37 +37,21 @@ except Exception as e:
36
  st.error(f"DuckDB connection failed: {e}")
37
  st.stop()
38
 
39
- # Search input
40
- query = st.text_input("Search text (case-insensitive)", "").strip()
41
 
42
- # Render audio inline in a column
43
- def render_audio_cell(audio_bytes):
44
- try:
45
- if isinstance(audio_bytes, (bytes, bytearray, memoryview)):
46
- data = bytes(audio_bytes)
47
- elif isinstance(audio_bytes, list):
48
- data = bytes(audio_bytes)
49
- else:
50
- return ""
51
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
52
- tmp.write(data)
53
- tmp.flush()
54
- return f'<audio controls style="height:20px; width:100%"> <source src="file://{tmp.name}" type="audio/mpeg"></audio>'
55
- except Exception:
56
- return ""
57
-
58
- # Fetch data from DuckDB
59
  if query:
60
  sql = """
61
- SELECT id, channel, video_id, video_title, speaker, start_time, end_time, upload_date, text, pos_tags, audio
62
  FROM data
63
  WHERE LOWER(text) LIKE ?
64
  LIMIT 100
65
  """
66
- df = con.execute(sql, [f"%{query.lower()}%"]).df()
67
  else:
68
  df = con.execute("""
69
- SELECT id, channel, video_id, video_title, speaker, start_time, end_time, upload_date, text, pos_tags, audio
70
  FROM data
71
  LIMIT 100
72
  """).df()
@@ -76,11 +61,26 @@ st.markdown(f"### Showing {len(df)} results")
76
  if len(df) == 0:
77
  st.warning("No matches found.")
78
  else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  df["Audio"] = df["audio"].apply(render_audio_cell)
80
- df_display = df[["id", "channel", "video_id", "video_title", "speaker", "start_time", "end_time", "upload_date", "text", "pos_tags", "Audio"]].copy()
81
 
82
- # Render table with inline audio column (HTML support)
83
  st.markdown("### Results Table (Sortable with Audio Column)")
84
- st.write("(Scroll right to view audio controls)")
85
- st.write(df_display.to_html(escape=False, index=False), unsafe_allow_html=True)
86
 
 
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import tempfile
7
+ import re
8
 
9
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
10
  HF_FILENAME = "ycsep.duckdb"
 
37
  st.error(f"DuckDB connection failed: {e}")
38
  st.stop()
39
 
40
+ # Search
41
+ query = st.text_input("Search text (case-insensitive)", "").strip().lower()
42
 
43
+ # Perform search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  if query:
45
  sql = """
46
+ SELECT id, channel, video_id, speaker, start_time, end_time, upload_date, text, pos_tags, audio
47
  FROM data
48
  WHERE LOWER(text) LIKE ?
49
  LIMIT 100
50
  """
51
+ df = con.execute(sql, [f"%{query}%"]).df()
52
  else:
53
  df = con.execute("""
54
+ SELECT id, channel, video_id, speaker, start_time, end_time, upload_date, text, pos_tags, audio
55
  FROM data
56
  LIMIT 100
57
  """).df()
 
61
  if len(df) == 0:
62
  st.warning("No matches found.")
63
  else:
64
+ def render_audio_cell(audio_bytes):
65
+ try:
66
+ if isinstance(audio_bytes, (bytes, bytearray, memoryview)):
67
+ data = bytes(audio_bytes)
68
+ elif isinstance(audio_bytes, list):
69
+ data = bytes(audio_bytes)
70
+ else:
71
+ return ""
72
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
73
+ tmp.write(data)
74
+ tmp.flush()
75
+ return f'<audio controls style="height:20px;"> <source src="file://{tmp.name}" type="audio/mpeg"></audio>'
76
+ except Exception:
77
+ return ""
78
+
79
  df["Audio"] = df["audio"].apply(render_audio_cell)
80
+ df_display = df[["id", "channel", "video_id", "speaker", "start_time", "end_time", "upload_date", "text", "pos_tags", "Audio"]].copy()
81
 
82
+ # Adjust column widths to make room for audio
83
  st.markdown("### Results Table (Sortable with Audio Column)")
84
+ st.markdown("(Scroll right to view audio controls)")
85
+ st.dataframe(df_display.drop(columns=["audio"]))
86