stcoats commited on
Commit
a5de086
·
1 Parent(s): c173a94

Add application file

Browse files
Files changed (1) hide show
  1. app.py +27 -31
app.py CHANGED
@@ -4,7 +4,7 @@ import streamlit as st
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import tempfile
7
- import re
8
 
9
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
10
  HF_FILENAME = "ycsep.duckdb"
@@ -15,14 +15,14 @@ st.title("YCSEP Audio Dataset Viewer")
15
 
16
  # Download database if missing
17
  if not os.path.exists(LOCAL_PATH):
18
- st.write("Downloading from HF Hub...")
19
- hf_hub_download(
20
- repo_id=HF_REPO_ID,
21
- repo_type="dataset",
22
- filename=HF_FILENAME,
23
- local_dir=".",
24
- local_dir_use_symlinks=False
25
- )
26
  st.success("Download complete.")
27
 
28
  # Connect (only once)
@@ -61,7 +61,7 @@ st.markdown(f"### Showing {len(df)} results")
61
  if len(df) == 0:
62
  st.warning("No matches found.")
63
  else:
64
- def render_audio_tag(audio_bytes, index):
65
  try:
66
  if isinstance(audio_bytes, (bytes, bytearray, memoryview)):
67
  data = bytes(audio_bytes)
@@ -69,46 +69,42 @@ else:
69
  data = bytes(audio_bytes)
70
  else:
71
  return ""
72
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp:
73
- tmp.write(data)
74
- tmp.flush()
75
- return f'<audio controls preload="metadata" style="height:20px;width:100px;"><source src="file://{tmp.name}" type="audio/mpeg"></audio>'
76
  except Exception:
77
  return ""
78
 
79
- df["audio_player"] = [render_audio_tag(b, i) for i, b in enumerate(df["audio"])]
80
- df_display = df.drop(columns=["audio"])
81
 
82
  from streamlit.components.v1 import html
83
 
84
- def display_html_table(df):
85
  table_html = """
86
  <table border='1' style='border-collapse:collapse;width:100%;font-size:13px;'>
87
  <thead>
88
  <tr>
89
- <th style='width:8em;'>id</th>
90
- <th>channel</th>
91
- <th>video_id</th>
92
- <th>speaker</th>
93
- <th>start_time</th>
94
- <th>end_time</th>
95
- <th>upload_date</th>
96
- <th>text</th>
97
- <th>pos_tags</th>
98
- <th>audio</th>
99
  </tr>
100
  </thead>
101
  <tbody>
102
  """
103
  for _, row in df.iterrows():
104
  table_html += "<tr>"
105
- for col in ["id", "channel", "video_id", "speaker", "start_time", "end_time", "upload_date", "text", "pos_tags"]:
106
  table_html += f"<td>{row[col]}</td>"
107
- table_html += f"<td>{row['audio_player']}</td>"
108
  table_html += "</tr>"
109
  table_html += "</tbody></table>"
110
  return table_html
111
 
112
  st.markdown("### Results Table (Sortable with Audio Column)")
113
- html(display_html_table(df_display), height=800, scrolling=True)
114
-
 
4
  from huggingface_hub import hf_hub_download
5
  import pandas as pd
6
  import tempfile
7
+ import base64
8
 
9
  HF_REPO_ID = "stcoats/temp-duckdb-upload"
10
  HF_FILENAME = "ycsep.duckdb"
 
15
 
16
  # Download database if missing
17
  if not os.path.exists(LOCAL_PATH):
18
+ with st.spinner("Downloading from HF Hub..."):
19
+ hf_hub_download(
20
+ repo_id=HF_REPO_ID,
21
+ repo_type="dataset",
22
+ filename=HF_FILENAME,
23
+ local_dir=".",
24
+ local_dir_use_symlinks=False
25
+ )
26
  st.success("Download complete.")
27
 
28
  # Connect (only once)
 
61
  if len(df) == 0:
62
  st.warning("No matches found.")
63
  else:
64
+ def get_audio_html(audio_bytes):
65
  try:
66
  if isinstance(audio_bytes, (bytes, bytearray, memoryview)):
67
  data = bytes(audio_bytes)
 
69
  data = bytes(audio_bytes)
70
  else:
71
  return ""
72
+ b64 = base64.b64encode(data).decode("utf-8")
73
+ return f'<audio controls preload="metadata" style="height:20px;width:120px;"><source src="data:audio/mp3;base64,{b64}" type="audio/mpeg"></audio>'
 
 
74
  except Exception:
75
  return ""
76
 
77
+ df["Audio"] = df["audio"].apply(get_audio_html)
78
+ df.drop(columns=["audio"], inplace=True)
79
 
80
  from streamlit.components.v1 import html
81
 
82
+ def display_table_with_audio(df):
83
  table_html = """
84
  <table border='1' style='border-collapse:collapse;width:100%;font-size:13px;'>
85
  <thead>
86
  <tr>
87
+ <th style='width:6em;'>id</th>
88
+ <th style='width:6em;'>channel</th>
89
+ <th style='width:6em;'>video_id</th>
90
+ <th style='width:6em;'>speaker</th>
91
+ <th style='width:6em;'>start_time</th>
92
+ <th style='width:6em;'>end_time</th>
93
+ <th style='width:6em;'>upload_date</th>
94
+ <th style='width:25em;'>text</th>
95
+ <th style='width:10em;'>pos_tags</th>
96
+ <th style='width:10em;'>Audio</th>
97
  </tr>
98
  </thead>
99
  <tbody>
100
  """
101
  for _, row in df.iterrows():
102
  table_html += "<tr>"
103
+ for col in ["id", "channel", "video_id", "speaker", "start_time", "end_time", "upload_date", "text", "pos_tags", "Audio"]:
104
  table_html += f"<td>{row[col]}</td>"
 
105
  table_html += "</tr>"
106
  table_html += "</tbody></table>"
107
  return table_html
108
 
109
  st.markdown("### Results Table (Sortable with Audio Column)")
110
+ html(display_table_with_audio(df), height=900, scrolling=True)