File size: 7,015 Bytes
591d823
252d749
ad7e461
e666955
 
 
 
 
 
07fd3f6
af5c58a
7fc9e46
385a3d3
252d749
4fceeff
61061b5
591d823
 
61061b5
4fceeff
dd2695b
b7c9b59
5781d73
 
252d749
 
 
 
 
62b5ac4
252d749
7fc5d88
7fc9e46
 
 
 
252d749
7fc5d88
385a3d3
fec71d6
 
c51d144
 
 
 
 
591d823
fec71d6
7fc9e46
ad7e461
 
 
 
 
 
7fc9e46
 
 
ad7e461
fec71d6
 
af5c58a
fbe8b8e
 
 
 
dd2695b
af5c58a
 
7fc9e46
af5c58a
7fc9e46
af5c58a
 
4fceeff
af5c58a
385a3d3
 
af5c58a
385a3d3
af5c58a
89d221a
7fc5d88
fec71d6
b8952b7
fec71d6
65f65a4
 
385a3d3
 
e666955
 
 
 
5425bdf
9ec30bc
 
 
e666955
 
 
b560dd6
 
 
 
 
65f65a4
 
 
 
 
d7d0d8e
 
 
 
5781d73
d7d0d8e
 
 
 
 
 
 
b560dd6
 
6dc1517
8fa2b64
6dc1517
 
 
 
 
 
 
 
 
 
59681d8
6dc1517
4fceeff
6dc1517
 
 
 
 
e981d74
6dc1517
 
 
 
 
 
 
 
 
 
252d749
78f1df3
4ac9f00
78f1df3
 
f61cd1c
4be78c4
f61cd1c
78f1df3
591d823
78f1df3
7fc5d88
 
252d749
5600a35
70889e0
7fc9e46
39a0fff
70889e0
7b68e58
8240ca0
 
 
 
70889e0
 
252d749
 
7fc9e46
252d749
61061b5
ee482de
7fc9e46
ee482de
7fc5d88
 
 
07fd3f6
 
 
 
4ada698
07fd3f6
252d749
 
 
 
a068f90
65f65a4
 
 
4766734
f200120
7fc9e46
44d30a3
252d749
b0b473d
09ded1f
 
 
 
 
 
 
c1f12e1
681194b
7fc9e46
55d7fe6
c1f12e1
65f65a4
 
7fc9e46
24031eb
 
65f65a4
55d7fe6
b8952b7
65f65a4
7fc9e46
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
from fastapi import APIRouter, status, Depends, UploadFile, File, Query, BackgroundTasks
from typing_extensions import Annotated
from urllib.parse import urlparse, parse_qs
from .Schemas import (
    UserDetails,
    TranscriptionMetadata,
    TranscriptionResult,
    BaseTranscription,
)
from App import bot
import aiofiles, os, re
import uuid
import tempfile
from celery.result import AsyncResult
from App.Worker import transcription_task, downloadfile,downloadUrl
from App.Users.Model import User
from App.Users.UserRoutes import get_token_owner
from App.Users.Schemas import UserSchema
from .Model import Transcriptions
from .Utils.fastapi_tasks import perform_background_task
import yt_dlp
from fastapi_jwt_auth import AuthJWT

from App.Embedding.utils.Initialize import delete_documents

# from .Model import User
# from sqlalchemy import and_


transcription_router = APIRouter(tags=["Transcription"])


def genUUID():
    uuid_value = uuid.uuid4()
    short_uuid = str(uuid_value)[:6]
    return short_uuid


@transcription_router.get("/download-audio")
async def download_audio(
    url: str,
    model: str = Query(
        "tiny",
        enum=["tiny", "small", "medium", "base", "large-v2"],
        description="Whisper model Sizes",
    ),
    user: UserSchema = Depends(get_token_owner),
):
    youtube_url = url
    parsed_url = urlparse(youtube_url)

    # Get the query parameters
    query_parameters = parse_qs(parsed_url.query)

    # Get the value of the 'v' parameter
    v_param_value = (
        query_parameters.get("v", [])[0] if "v" in query_parameters else None
    )
    url = f"https://www.youtube.com/watch?v={v_param_value}"
    if user == None:
        return {"code": 400, "message": "doesn't exist", "payload": None}

    ydl_opts_info = {
        "quiet": True,
    }

    with yt_dlp.YoutubeDL(ydl_opts_info) as ydl:
        info_dict = ydl.extract_info(url, download=False)
        video_title = info_dict.get("title", None)
    short_uuid = genUUID()
    sanitized_title = re.sub(
        r"(?u)[^-\w.]", "", short_uuid
    )  # Ensure the title is file-friendly
    filename = f"{sanitized_title}.mp3"
    file_path = os.path.join("./", "Downloads", filename)

    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": file_path,
    }

    task = downloadfile.delay(url=url, ydl_opts=ydl_opts, model_size=model)
    response = {"task_id": task.id, "file_name": video_title}
    transcription_enrty = await Transcriptions.objects.create(
        user=user, youtubeLink=url, **response
    )
    entry = BaseTranscription(**transcription_enrty.__dict__)
    return entry


@transcription_router.get("/transcriptions")
async def get_user_transcriptions(
    user: UserSchema = Depends(get_token_owner),
):
    transcriptions = await Transcriptions.objects.filter(user=user.id).all()
    objects = [
        BaseTranscription(**obj.__dict__) for obj in transcriptions if obj != None
    ]
    return objects


@transcription_router.post("/delete/{task_id}")
async def delete_transcription(
    task_id: str,
    user: UserSchema = Depends(get_token_owner),
):
    transcript = (
        await Transcriptions.objects.filter(user=user.id)
        .filter(task_id=task_id)
        .first()
    )
    if transcript:
        await transcript.delete()
        task = AsyncResult(task_id)
        task.revoke(terminate=True)
        await delete_documents(task_id=task_id)
        return {"code": 200, "message": f"deleted {task_id}", "payload": None}
    else:
        return {
            "code": 404,
            "message": f"task {task_id} does not exist",
            "payload": None,
        }


@transcription_router.post("/url/")
async def url_task(
    url: str,
    model: str = Query(
        "tiny",
        enum=["tiny", "small", "medium", "base", "large-v2"],
        description="Whisper model Sizes",
    ),
    user: UserSchema = Depends(get_token_owner),
):
    extension = 'wav'
    file_name = f"{genUUID()}.{extension}"
    file_dir = os.path.join("/tmp/", "Downloads")
    # celery task
    task = downloadUrl.delay(link=url, download_dir=file_dir, filename=file_name, model_size=model)

    # create a transcription entry
    transcription_enrty = await Transcriptions.objects.create(
        task_id=task.id, user=user, file_name=file_name
    )
    print(task.id)
    return {
        "file_name": file_name,
        "task_id": task.id,
        # "message_id": data.id,
    }





@transcription_router.post("/uploadfile/")
async def create_file(
    background_tasks: BackgroundTasks,
    file: UploadFile,
    model: str = Query(
        "tiny",
        enum=["tiny", "small", "medium", "base", "large-v2"],
        description="Whisper model Sizes",
    ),
    user: UserSchema = Depends(get_token_owner),
):
    extension = file.filename.split(".")[-1]
    file_name = f"{genUUID()}.{extension}"
    # Write the file to disk asynchronously
    Upload_dir = ""
    try:
        async with aiofiles.open(file_name, "wb") as f:
            while contents := await file.read(1024 * 1):
                await f.write(contents)

    except Exception as e:
        return {
            "message": f"There was an error uploading the file, error message {str(e)}  "
        }
    finally:
        await file.close()

    # celery task
    task = transcription_task.delay(file_name, model)

    # create a transcription entry
    transcription_enrty = await Transcriptions.objects.create(
        task_id=task.id, user=user, file_name=file_name
    )
    background_tasks.add_task(
        perform_background_task, file_name, file=file, task_id=task.id
    )
    return {
        "file_size": file.size,
        "file_name": file.filename,
        "task_id": task.id,
        # "message_id": data.id,
    }


@transcription_router.get("/tasks/{task_id}")
async def get_status(task_id):
    entry: Transcriptions = await Transcriptions.objects.filter(task_id=task_id).first()
    if entry == None:
        return {"payload": None, "message": "Nothing found", "code": 200}
    result = BaseTranscription(**entry.__dict__)

    if result.status == "SUCCESS":
        result.percentage = "100"
        return result
    task_result = AsyncResult(task_id)
    # print(task_result.result)
    if task_result.result == None:
        return {
            "task_id": task_id,
            "task_status": task_result.status,
            "task_result": task_result.result,
        }

    if task_result.status == "SUCCESS":
        trans = TranscriptionMetadata(**task_result.result)
        percentage = "100"
        await entry.update(**trans.dict())
    else:
        try:
            _trans = TranscriptionMetadata(**task_result.result)
            percentage = _trans.percentage
        except Exception as e:
            print(e)
            return {"payload": None, "message": "Nothing found", "code": 200}
        await entry.update(**_trans.dict())

    result = BaseTranscription(**entry.__dict__)
    result.percentage = percentage
    return result