File size: 7,015 Bytes
591d823 252d749 ad7e461 e666955 07fd3f6 af5c58a 7fc9e46 385a3d3 252d749 4fceeff 61061b5 591d823 61061b5 4fceeff dd2695b b7c9b59 5781d73 252d749 62b5ac4 252d749 7fc5d88 7fc9e46 252d749 7fc5d88 385a3d3 fec71d6 c51d144 591d823 fec71d6 7fc9e46 ad7e461 7fc9e46 ad7e461 fec71d6 af5c58a fbe8b8e dd2695b af5c58a 7fc9e46 af5c58a 7fc9e46 af5c58a 4fceeff af5c58a 385a3d3 af5c58a 385a3d3 af5c58a 89d221a 7fc5d88 fec71d6 b8952b7 fec71d6 65f65a4 385a3d3 e666955 5425bdf 9ec30bc e666955 b560dd6 65f65a4 d7d0d8e 5781d73 d7d0d8e b560dd6 6dc1517 8fa2b64 6dc1517 59681d8 6dc1517 4fceeff 6dc1517 e981d74 6dc1517 252d749 78f1df3 4ac9f00 78f1df3 f61cd1c 4be78c4 f61cd1c 78f1df3 591d823 78f1df3 7fc5d88 252d749 5600a35 70889e0 7fc9e46 39a0fff 70889e0 7b68e58 8240ca0 70889e0 252d749 7fc9e46 252d749 61061b5 ee482de 7fc9e46 ee482de 7fc5d88 07fd3f6 4ada698 07fd3f6 252d749 a068f90 65f65a4 4766734 f200120 7fc9e46 44d30a3 252d749 b0b473d 09ded1f c1f12e1 681194b 7fc9e46 55d7fe6 c1f12e1 65f65a4 7fc9e46 24031eb 65f65a4 55d7fe6 b8952b7 65f65a4 7fc9e46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
from fastapi import APIRouter, status, Depends, UploadFile, File, Query, BackgroundTasks
from typing_extensions import Annotated
from urllib.parse import urlparse, parse_qs
from .Schemas import (
UserDetails,
TranscriptionMetadata,
TranscriptionResult,
BaseTranscription,
)
from App import bot
import aiofiles, os, re
import uuid
import tempfile
from celery.result import AsyncResult
from App.Worker import transcription_task, downloadfile,downloadUrl
from App.Users.Model import User
from App.Users.UserRoutes import get_token_owner
from App.Users.Schemas import UserSchema
from .Model import Transcriptions
from .Utils.fastapi_tasks import perform_background_task
import yt_dlp
from fastapi_jwt_auth import AuthJWT
from App.Embedding.utils.Initialize import delete_documents
# from .Model import User
# from sqlalchemy import and_
transcription_router = APIRouter(tags=["Transcription"])
def genUUID():
uuid_value = uuid.uuid4()
short_uuid = str(uuid_value)[:6]
return short_uuid
@transcription_router.get("/download-audio")
async def download_audio(
url: str,
model: str = Query(
"tiny",
enum=["tiny", "small", "medium", "base", "large-v2"],
description="Whisper model Sizes",
),
user: UserSchema = Depends(get_token_owner),
):
youtube_url = url
parsed_url = urlparse(youtube_url)
# Get the query parameters
query_parameters = parse_qs(parsed_url.query)
# Get the value of the 'v' parameter
v_param_value = (
query_parameters.get("v", [])[0] if "v" in query_parameters else None
)
url = f"https://www.youtube.com/watch?v={v_param_value}"
if user == None:
return {"code": 400, "message": "doesn't exist", "payload": None}
ydl_opts_info = {
"quiet": True,
}
with yt_dlp.YoutubeDL(ydl_opts_info) as ydl:
info_dict = ydl.extract_info(url, download=False)
video_title = info_dict.get("title", None)
short_uuid = genUUID()
sanitized_title = re.sub(
r"(?u)[^-\w.]", "", short_uuid
) # Ensure the title is file-friendly
filename = f"{sanitized_title}.mp3"
file_path = os.path.join("./", "Downloads", filename)
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": file_path,
}
task = downloadfile.delay(url=url, ydl_opts=ydl_opts, model_size=model)
response = {"task_id": task.id, "file_name": video_title}
transcription_enrty = await Transcriptions.objects.create(
user=user, youtubeLink=url, **response
)
entry = BaseTranscription(**transcription_enrty.__dict__)
return entry
@transcription_router.get("/transcriptions")
async def get_user_transcriptions(
user: UserSchema = Depends(get_token_owner),
):
transcriptions = await Transcriptions.objects.filter(user=user.id).all()
objects = [
BaseTranscription(**obj.__dict__) for obj in transcriptions if obj != None
]
return objects
@transcription_router.post("/delete/{task_id}")
async def delete_transcription(
task_id: str,
user: UserSchema = Depends(get_token_owner),
):
transcript = (
await Transcriptions.objects.filter(user=user.id)
.filter(task_id=task_id)
.first()
)
if transcript:
await transcript.delete()
task = AsyncResult(task_id)
task.revoke(terminate=True)
await delete_documents(task_id=task_id)
return {"code": 200, "message": f"deleted {task_id}", "payload": None}
else:
return {
"code": 404,
"message": f"task {task_id} does not exist",
"payload": None,
}
@transcription_router.post("/url/")
async def url_task(
url: str,
model: str = Query(
"tiny",
enum=["tiny", "small", "medium", "base", "large-v2"],
description="Whisper model Sizes",
),
user: UserSchema = Depends(get_token_owner),
):
extension = 'wav'
file_name = f"{genUUID()}.{extension}"
file_dir = os.path.join("/tmp/", "Downloads")
# celery task
task = downloadUrl.delay(link=url, download_dir=file_dir, filename=file_name, model_size=model)
# create a transcription entry
transcription_enrty = await Transcriptions.objects.create(
task_id=task.id, user=user, file_name=file_name
)
print(task.id)
return {
"file_name": file_name,
"task_id": task.id,
# "message_id": data.id,
}
@transcription_router.post("/uploadfile/")
async def create_file(
background_tasks: BackgroundTasks,
file: UploadFile,
model: str = Query(
"tiny",
enum=["tiny", "small", "medium", "base", "large-v2"],
description="Whisper model Sizes",
),
user: UserSchema = Depends(get_token_owner),
):
extension = file.filename.split(".")[-1]
file_name = f"{genUUID()}.{extension}"
# Write the file to disk asynchronously
Upload_dir = ""
try:
async with aiofiles.open(file_name, "wb") as f:
while contents := await file.read(1024 * 1):
await f.write(contents)
except Exception as e:
return {
"message": f"There was an error uploading the file, error message {str(e)} "
}
finally:
await file.close()
# celery task
task = transcription_task.delay(file_name, model)
# create a transcription entry
transcription_enrty = await Transcriptions.objects.create(
task_id=task.id, user=user, file_name=file_name
)
background_tasks.add_task(
perform_background_task, file_name, file=file, task_id=task.id
)
return {
"file_size": file.size,
"file_name": file.filename,
"task_id": task.id,
# "message_id": data.id,
}
@transcription_router.get("/tasks/{task_id}")
async def get_status(task_id):
entry: Transcriptions = await Transcriptions.objects.filter(task_id=task_id).first()
if entry == None:
return {"payload": None, "message": "Nothing found", "code": 200}
result = BaseTranscription(**entry.__dict__)
if result.status == "SUCCESS":
result.percentage = "100"
return result
task_result = AsyncResult(task_id)
# print(task_result.result)
if task_result.result == None:
return {
"task_id": task_id,
"task_status": task_result.status,
"task_result": task_result.result,
}
if task_result.status == "SUCCESS":
trans = TranscriptionMetadata(**task_result.result)
percentage = "100"
await entry.update(**trans.dict())
else:
try:
_trans = TranscriptionMetadata(**task_result.result)
percentage = _trans.percentage
except Exception as e:
print(e)
return {"payload": None, "message": "Nothing found", "code": 200}
await entry.update(**_trans.dict())
result = BaseTranscription(**entry.__dict__)
result.percentage = percentage
return result
|