stcoats commited on
Commit
a47efdc
·
1 Parent(s): 5fde344

Add application file

Browse files
Files changed (1) hide show
  1. app.py +43 -42
app.py CHANGED
@@ -44,10 +44,10 @@ if query:
44
  sql = """
45
  SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
46
  FROM data
47
- WHERE LOWER(text) LIKE LOWER(?)
48
  LIMIT 100
49
  """
50
- df = con.execute(sql, [f"%{query}%"]).df()
51
  else:
52
  df = con.execute("""
53
  SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
@@ -59,43 +59,44 @@ st.markdown(f"### Showing {len(df)} results")
59
 
60
  if len(df) == 0:
61
  st.warning("No matches found.")
62
-
63
- # Show table with inline audio players
64
- for i, row in df.iterrows():
65
- col1, col2, col3 = st.columns([3, 5, 2])
66
-
67
- col1.markdown(f"**ID:** {row['id']}")
68
- col1.markdown(f"**Channel:** {row['channel']}")
69
- col1.markdown(f"**Video ID:** {row['video_id']}")
70
- col1.markdown(f"**Video Title:** {row['video_title']}")
71
- col1.markdown(f"**Speaker:** {row['speaker']}")
72
- col1.markdown(f"**Start Time:** {row['start_time']}")
73
- col1.markdown(f"**End Time:** {row['end_time']}")
74
- col1.markdown(f"**Upload Date:** {row['upload_date']}")
75
-
76
- highlighted_text = row['text']
77
- if query:
78
- highlighted_text = re.sub(f'({re.escape(query)})', r'<mark>\1</mark>', highlighted_text, flags=re.IGNORECASE)
79
-
80
- col2.markdown(f"**Text:** {highlighted_text}", unsafe_allow_html=True)
81
- col2.markdown(f"**POS tags:** {row['pos_tags']}")
82
-
83
- audio_data = row["audio"]
84
- try:
85
- if isinstance(audio_data, (bytes, bytearray, memoryview)):
86
- audio_bytes = bytes(audio_data)
87
- elif isinstance(audio_data, list): # DuckDB sometimes gives list[int]
88
- audio_bytes = bytes(audio_data)
89
- else:
90
- audio_bytes = None
91
-
92
- if audio_bytes:
93
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmpfile:
94
- tmpfile.write(audio_bytes)
95
- tmpfile.flush()
96
- col3.audio(tmpfile.name, format="audio/mp3")
97
- else:
98
- col3.warning("Audio missing or invalid format.")
99
- except Exception as e:
100
- col3.error(f"Audio error: {e}")
101
-
 
 
44
  sql = """
45
  SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
46
  FROM data
47
+ WHERE LOWER(text) LIKE '%' || LOWER(?) || '%'
48
  LIMIT 100
49
  """
50
+ df = con.execute(sql, [query]).df()
51
  else:
52
  df = con.execute("""
53
  SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
 
59
 
60
  if len(df) == 0:
61
  st.warning("No matches found.")
62
+ else:
63
+ def render_audio_cell(audio_bytes):
64
+ try:
65
+ if isinstance(audio_bytes, (bytes, bytearray, memoryview)):
66
+ data = bytes(audio_bytes)
67
+ elif isinstance(audio_bytes, list):
68
+ data = bytes(audio_bytes)
69
+ else:
70
+ return None
71
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
72
+ tmp.write(data)
73
+ tmp.flush()
74
+ return tmp.name
75
+ except Exception:
76
+ return None
77
+
78
+ df["audio_file"] = df["audio"].apply(render_audio_cell)
79
+
80
+ # Build an interactive sortable table
81
+ st.markdown("### Results Table (Sortable)")
82
+ for i, row in df.iterrows():
83
+ with st.expander(f"? {row['speaker']} | {row['text'][:60]}..."):
84
+ col1, col2 = st.columns([2, 3])
85
+
86
+ with col1:
87
+ st.write(f"**ID:** {row['id']}")
88
+ st.write(f"**Channel:** {row['channel']}")
89
+ st.write(f"**Video ID:** {row['video_id']}")
90
+ st.write(f"**Video Title:** {row['video_title']}")
91
+ st.write(f"**Speaker:** {row['speaker']}")
92
+ st.write(f"**Start Time:** {row['start_time']}")
93
+ st.write(f"**End Time:** {row['end_time']}")
94
+ st.write(f"**Upload Date:** {row['upload_date']}")
95
+ st.write(f"**POS Tags:** {row['pos_tags']}")
96
+
97
+ with col2:
98
+ st.markdown(f"**Text:** {row['text']}")
99
+ if row['audio_file']:
100
+ st.audio(row['audio_file'], format="audio/mp3")
101
+ else:
102
+ st.warning("Audio not available or invalid format.")