Spaces:
Runtime error
Runtime error
stcoats
commited on
Commit
·
756e034
1
Parent(s):
e798575
Initial FastAPI + DuckDB frontend app
Browse files- .huggingface.yaml +3 -0
- app/duckdb_utils.py +19 -0
- app/main.py +39 -0
- frontend/index.html +24 -0
- frontend/js/app.js +28 -0
- requirements.txt +6 -0
.huggingface.yaml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
sdk: fastapi
|
2 |
+
app_file: app/main.py
|
3 |
+
|
app/duckdb_utils.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import hf_hub_download
|
2 |
+
import duckdb
|
3 |
+
import os
|
4 |
+
|
5 |
+
HF_REPO_ID = "stcoats/temp-duckdb-upload"
|
6 |
+
HF_FILENAME = "ycsep.duckdb"
|
7 |
+
LOCAL_PATH = "./ycsep.duckdb"
|
8 |
+
|
9 |
+
def get_connection():
|
10 |
+
if not os.path.exists(LOCAL_PATH):
|
11 |
+
hf_hub_download(
|
12 |
+
repo_id=HF_REPO_ID,
|
13 |
+
repo_type="dataset",
|
14 |
+
filename=HF_FILENAME,
|
15 |
+
local_dir=".",
|
16 |
+
local_dir_use_symlinks=False
|
17 |
+
)
|
18 |
+
return duckdb.connect(LOCAL_PATH, read_only=True)
|
19 |
+
|
app/main.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Query, HTTPException
|
2 |
+
from fastapi.responses import StreamingResponse
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from app.duckdb_utils import get_connection
|
5 |
+
import pandas as pd
|
6 |
+
import io
|
7 |
+
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
app.add_middleware(
|
11 |
+
CORSMiddleware,
|
12 |
+
allow_origins=["*"],
|
13 |
+
allow_methods=["*"],
|
14 |
+
allow_headers=["*"],
|
15 |
+
)
|
16 |
+
|
17 |
+
con = get_connection()
|
18 |
+
|
19 |
+
@app.get("/search")
|
20 |
+
def search(text: str = Query("")):
|
21 |
+
query = text.replace("'", "''")
|
22 |
+
df = con.execute(
|
23 |
+
f\"\"\"
|
24 |
+
SELECT id, channel, video_id, speaker, start_time, end_time, upload_date, text, pos_tags
|
25 |
+
FROM data
|
26 |
+
WHERE text ILIKE '%{query}%'
|
27 |
+
LIMIT 100
|
28 |
+
\"\"\"
|
29 |
+
).df()
|
30 |
+
return df.to_dict(orient="records")
|
31 |
+
|
32 |
+
@app.get("/audio/{id}")
|
33 |
+
def get_audio(id: int):
|
34 |
+
row = con.execute(f"SELECT audio FROM data WHERE id = {id}").fetchone()
|
35 |
+
if not row:
|
36 |
+
raise HTTPException(status_code=404, detail="Audio not found")
|
37 |
+
audio_bytes = row[0]
|
38 |
+
return StreamingResponse(io.BytesIO(audio_bytes), media_type="audio/mpeg")
|
39 |
+
|
frontend/index.html
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html>
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<title>YCSEP Viewer</title>
|
6 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
7 |
+
<link rel="stylesheet" href="https://cdn.datatables.net/1.13.5/css/jquery.dataTables.min.css">
|
8 |
+
<script src="https://cdn.datatables.net/1.13.5/js/jquery.dataTables.min.js"></script>
|
9 |
+
<script src="js/app.js"></script>
|
10 |
+
</head>
|
11 |
+
<body>
|
12 |
+
<h2>YCSEP Audio Dataset Viewer</h2>
|
13 |
+
<input type="text" id="searchBox" placeholder="Search text..." style="width:300px; margin-bottom:10px;">
|
14 |
+
<table id="resultsTable" class="display" style="width:100%">
|
15 |
+
<thead>
|
16 |
+
<tr>
|
17 |
+
<th>ID</th><th>Channel</th><th>Video ID</th><th>Speaker</th><th>Start</th><th>End</th><th>Upload Date</th><th>Text</th><th>POS</th><th>Audio</th>
|
18 |
+
</tr>
|
19 |
+
</thead>
|
20 |
+
<tbody></tbody>
|
21 |
+
</table>
|
22 |
+
</body>
|
23 |
+
</html>
|
24 |
+
|
frontend/js/app.js
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
$(document).ready(function () {
|
2 |
+
function fetchResults(query) {
|
3 |
+
$.get(`/search?text=${encodeURIComponent(query)}`, function (data) {
|
4 |
+
const table = $('#resultsTable').DataTable();
|
5 |
+
table.clear();
|
6 |
+
data.forEach(row => {
|
7 |
+
const audioHtml = `<audio controls preload="metadata" style="height:20px; width:120px;">
|
8 |
+
<source src="/audio/${row.id}" type="audio/mpeg"></audio>`;
|
9 |
+
table.row.add([
|
10 |
+
row.id, row.channel, row.video_id, row.speaker,
|
11 |
+
row.start_time, row.end_time, row.upload_date,
|
12 |
+
row.text, row.pos_tags, audioHtml
|
13 |
+
]);
|
14 |
+
});
|
15 |
+
table.draw();
|
16 |
+
});
|
17 |
+
}
|
18 |
+
|
19 |
+
$('#resultsTable').DataTable();
|
20 |
+
$('#searchBox').on('input', function () {
|
21 |
+
const query = $(this).val();
|
22 |
+
fetchResults(query);
|
23 |
+
});
|
24 |
+
|
25 |
+
// Initial empty fetch
|
26 |
+
fetchResults('');
|
27 |
+
});
|
28 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
uvicorn
|
3 |
+
pandas
|
4 |
+
duckdb
|
5 |
+
huggingface_hub
|
6 |
+
|