stcoats commited on
Commit
a0a9509
·
1 Parent(s): a47efdc

Add application file

Browse files
Files changed (1) hide show
  1. app.py +26 -24
app.py CHANGED
@@ -37,6 +37,9 @@ except Exception as e:
37
  st.error(f"DuckDB connection failed: {e}")
38
  st.stop()
39
 
 
 
 
40
  # Search
41
  query = st.text_input("Search text (case-insensitive)", "").strip()
42
 
@@ -44,7 +47,7 @@ if query:
44
  sql = """
45
  SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
46
  FROM data
47
- WHERE LOWER(text) LIKE '%' || LOWER(?) || '%'
48
  LIMIT 100
49
  """
50
  df = con.execute(sql, [query]).df()
@@ -76,27 +79,26 @@ else:
76
  return None
77
 
78
  df["audio_file"] = df["audio"].apply(render_audio_cell)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- # Build an interactive sortable table
81
- st.markdown("### Results Table (Sortable)")
82
- for i, row in df.iterrows():
83
- with st.expander(f"? {row['speaker']} | {row['text'][:60]}..."):
84
- col1, col2 = st.columns([2, 3])
85
-
86
- with col1:
87
- st.write(f"**ID:** {row['id']}")
88
- st.write(f"**Channel:** {row['channel']}")
89
- st.write(f"**Video ID:** {row['video_id']}")
90
- st.write(f"**Video Title:** {row['video_title']}")
91
- st.write(f"**Speaker:** {row['speaker']}")
92
- st.write(f"**Start Time:** {row['start_time']}")
93
- st.write(f"**End Time:** {row['end_time']}")
94
- st.write(f"**Upload Date:** {row['upload_date']}")
95
- st.write(f"**POS Tags:** {row['pos_tags']}")
96
-
97
- with col2:
98
- st.markdown(f"**Text:** {row['text']}")
99
- if row['audio_file']:
100
- st.audio(row['audio_file'], format="audio/mp3")
101
- else:
102
- st.warning("Audio not available or invalid format.")
 
37
  st.error(f"DuckDB connection failed: {e}")
38
  st.stop()
39
 
40
+ # Enable full-text search index on first run (one-time setup if not exists)
41
+ con.execute("PRAGMA create_fts_index('data', 'text')")
42
+
43
  # Search
44
  query = st.text_input("Search text (case-insensitive)", "").strip()
45
 
 
47
  sql = """
48
  SELECT id, channel, video_id, video_title, speaker, start_time, end_time, text, pos_tags, upload_date, audio
49
  FROM data
50
+ WHERE text % ?
51
  LIMIT 100
52
  """
53
  df = con.execute(sql, [query]).df()
 
79
  return None
80
 
81
  df["audio_file"] = df["audio"].apply(render_audio_cell)
82
+ df_display = df.drop(columns=["audio"]).copy()
83
+
84
+ # Add HTML audio tag column
85
+ def audio_html(path):
86
+ if path:
87
+ return f'<audio controls preload="none" style="height:20px;"> <source src="file://{path}" type="audio/mpeg"> </audio>'
88
+ return ""
89
+
90
+ df_display["Audio"] = df["audio_file"].apply(audio_html)
91
+
92
+ # Reorder columns
93
+ column_order = ["id", "channel", "video_id", "video_title", "speaker", "start_time", "end_time", "upload_date", "text", "pos_tags", "Audio"]
94
+ df_display = df_display[column_order]
95
+
96
+ st.markdown("### Full Table View (Sortable)")
97
+ st.write("Note: Audio is embedded using HTML tags; not all browsers allow playback from local temp paths.")
98
+ st.dataframe(df_display.drop(columns=["Audio"]))
99
 
100
+ st.markdown("### Audio Previews")
101
+ for i, row in df_display.iterrows():
102
+ if row["Audio"]:
103
+ st.markdown(f"**{row['speaker']} | {row['text'][:80]}**", unsafe_allow_html=True)
104
+ st.markdown(row["Audio"], unsafe_allow_html=True)