stcoats commited on
Commit
756e034
·
1 Parent(s): e798575

Initial FastAPI + DuckDB frontend app

Browse files
.huggingface.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ sdk: fastapi
2
+ app_file: app/main.py
3
+
app/duckdb_utils.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+ import duckdb
3
+ import os
4
+
5
+ HF_REPO_ID = "stcoats/temp-duckdb-upload"
6
+ HF_FILENAME = "ycsep.duckdb"
7
+ LOCAL_PATH = "./ycsep.duckdb"
8
+
9
+ def get_connection():
10
+ if not os.path.exists(LOCAL_PATH):
11
+ hf_hub_download(
12
+ repo_id=HF_REPO_ID,
13
+ repo_type="dataset",
14
+ filename=HF_FILENAME,
15
+ local_dir=".",
16
+ local_dir_use_symlinks=False
17
+ )
18
+ return duckdb.connect(LOCAL_PATH, read_only=True)
19
+
app/main.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query, HTTPException
2
+ from fastapi.responses import StreamingResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from app.duckdb_utils import get_connection
5
+ import pandas as pd
6
+ import io
7
+
8
+ app = FastAPI()
9
+
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"],
13
+ allow_methods=["*"],
14
+ allow_headers=["*"],
15
+ )
16
+
17
+ con = get_connection()
18
+
19
+ @app.get("/search")
20
+ def search(text: str = Query("")):
21
+ query = text.replace("'", "''")
22
+ df = con.execute(
23
+ f\"\"\"
24
+ SELECT id, channel, video_id, speaker, start_time, end_time, upload_date, text, pos_tags
25
+ FROM data
26
+ WHERE text ILIKE '%{query}%'
27
+ LIMIT 100
28
+ \"\"\"
29
+ ).df()
30
+ return df.to_dict(orient="records")
31
+
32
+ @app.get("/audio/{id}")
33
+ def get_audio(id: int):
34
+ row = con.execute(f"SELECT audio FROM data WHERE id = {id}").fetchone()
35
+ if not row:
36
+ raise HTTPException(status_code=404, detail="Audio not found")
37
+ audio_bytes = row[0]
38
+ return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/mpeg")
39
+
frontend/index.html ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>YCSEP Viewer</title>
6
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
7
+ <link rel="stylesheet" href="https://cdn.datatables.net/1.13.5/css/jquery.dataTables.min.css">
8
+ <script src="https://cdn.datatables.net/1.13.5/js/jquery.dataTables.min.js"></script>
9
+ <script src="js/app.js"></script>
10
+ </head>
11
+ <body>
12
+ <h2>YCSEP Audio Dataset Viewer</h2>
13
+ <input type="text" id="searchBox" placeholder="Search text..." style="width:300px; margin-bottom:10px;">
14
+ <table id="resultsTable" class="display" style="width:100%">
15
+ <thead>
16
+ <tr>
17
+ <th>ID</th><th>Channel</th><th>Video ID</th><th>Speaker</th><th>Start</th><th>End</th><th>Upload Date</th><th>Text</th><th>POS</th><th>Audio</th>
18
+ </tr>
19
+ </thead>
20
+ <tbody></tbody>
21
+ </table>
22
+ </body>
23
+ </html>
24
+
frontend/js/app.js ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ $(document).ready(function () {
2
+ function fetchResults(query) {
3
+ $.get(`/search?text=${encodeURIComponent(query)}`, function (data) {
4
+ const table = $('#resultsTable').DataTable();
5
+ table.clear();
6
+ data.forEach(row => {
7
+ const audioHtml = `<audio controls preload="metadata" style="height:20px; width:120px;">
8
+ <source src="/audio/${row.id}" type="audio/mpeg"></audio>`;
9
+ table.row.add([
10
+ row.id, row.channel, row.video_id, row.speaker,
11
+ row.start_time, row.end_time, row.upload_date,
12
+ row.text, row.pos_tags, audioHtml
13
+ ]);
14
+ });
15
+ table.draw();
16
+ });
17
+ }
18
+
19
+ $('#resultsTable').DataTable();
20
+ $('#searchBox').on('input', function () {
21
+ const query = $(this).val();
22
+ fetchResults(query);
23
+ });
24
+
25
+ // Initial empty fetch
26
+ fetchResults('');
27
+ });
28
+
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ pandas
4
+ duckdb
5
+ huggingface_hub
6
+