copyright_checker / audio.py
aliasgerovs's picture
Updated audio.py
173f4a0
raw
history blame
738 Bytes
import requests
import json
import time
import yaml
import yt_dlp
import assemblyai as aai
from dotenv import load_dotenv
import os
load_dotenv()
with open("config.yaml", "r") as file:
params = yaml.safe_load(file)
transcriber = aai.Transcriber()
aai.settings.api_key = os.environ['ASSEMBLYAI_API_KEY']
def assemblyai_transcribe(audio_url):
if audio_url is None:
return ""
with yt_dlp.YoutubeDL() as ydl:
info = ydl.extract_info(audio_url, download=False)
for format in info["formats"][::-1]:
if format["resolution"] == "audio only" and format["ext"] == "m4a":
url = format["url"]
break
transcript = transcriber.transcribe(url)
return transcript.text