Spaces:

Capx
/

popo-podcast

Sleeping

Adarsh Shirawalmath

Upload

0720db6 6 months ago

7.58 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.responses import StreamingResponse
	from pydantic import BaseModel, HttpUrl
	import os
	import logging
	import json
	import io
	import time
	from collections import deque
	import yt_dlp
	from openai import OpenAI
	from deepgram import Deepgram
	import asyncio
	import google.generativeai as genai
	from pytubefix import YouTube

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	app = FastAPI()

	DEEPGRAM_API_KEY = os.getenv("DEEPGRAM_API_KEY")
	deepgram = Deepgram(DEEPGRAM_API_KEY)

	OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
	openai_client = OpenAI(api_key=OPENAI_API_KEY)

	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
	genai.configure(api_key=GOOGLE_API_KEY)

	class VideoURL(BaseModel):
	url: HttpUrl
	summary_length: str

	class ChatMessage(BaseModel):
	speaker: str
	message: str
	transcript: str

	class VideoRequest(BaseModel):
	url: str

	RATE_LIMIT = 15
	RATE_WINDOW = 60
	request_timestamps = deque()

	def is_rate_limited():
	current_time = time.time()
	while request_timestamps and current_time - request_timestamps[0] > RATE_WINDOW:
	request_timestamps.popleft()
	return len(request_timestamps) >= RATE_LIMIT

	async def transcribe_audio(audio_file):
	try:
	with open(audio_file, 'rb') as audio:
	source = {'buffer': audio, 'mimetype': 'audio/mp3'}
	options = {"diarize": True, "punctuate": True, "paragraphs": True, "model": 'general', "tier": 'enhanced'}
	response = await deepgram.transcription.prerecorded(source, options)
	return response
	except Exception as e:
	logger.error(f"Error transcribing audio: {str(e)}")
	return None

	def generate_summary(text, video_description, summary_length):
	if summary_length == "100 words - bullet points":
	prompt = f"Summarize the following podcast in about 100 words using bullet points. Focus only on the main content and key points discussed in the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"
	elif summary_length == "250 words - TL;DR":
	prompt = f"Provide a TL;DR summary of the following podcast in about 250 words. Concentrate on the core content and main ideas presented in the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"
	else: # 500 words - paragraph
	prompt = f"Summarize the following podcast in about 500 words using paragraphs. Emphasize the primary topics and key discussions from the podcast. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text}"

	if not is_rate_limited():
	try:
	model = genai.GenerativeModel('gemini-1.5-flash')
	response = model.generate_content(prompt)
	request_timestamps.append(time.time())
	return response.text
	except Exception as e:
	logger.error(f"Error with Gemini model: {str(e)}. Falling back to GPT-4O-mini.")
	else:
	logger.info("Gemini rate limit reached. Falling back to GPT-4O-mini.")

	response = openai_client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are a helpful assistant that summarizes podcasts concisely and accurately, focusing on the main content and key points discussed."},
	{"role": "user", "content": prompt}
	]
	)
	return response.choices[0].message.content.strip()

	def generate_quiz(text, video_description):
	prompt = f"Create a quiz with 10 multiple-choice questions based on the following podcast. Each question should have 4 options (A, B, C, D) with only one correct answer. Focus on the main content and key points discussed in the podcast. Format the output as a JSON array of objects, where each object represents a question with 'question', 'choices', and 'correct_answer' keys. Here's the video description followed by the transcript:\n\nVideo Description:\n{video_description}\n\nTranscript:\n{text[:4000]}"

	response = openai_client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are an expert at creating engaging and informative quizzes based on podcast content, focusing on the main topics and key points discussed. Output your response as a valid JSON array."},
	{"role": "user", "content": prompt}
	]
	)

	try:
	quiz_data = json.loads(response.choices[0].message.content.strip())
	return quiz_data
	except json.JSONDecodeError as e:
	logger.error(f"Error parsing quiz data: {str(e)}")
	logger.error(f"Raw response: {response.choices[0].message.content}")
	return []

	@app.post("/transcribe")
	async def transcribe_video(request: VideoRequest):
	url = request.url
	try:
	# Prepare the token file required by pytubefix
	data = {
	"access_token": os.getenv("ACCESS_TOKEN"),
	"refresh_token": os.getenv("REFRESH_TOKEN"),
	"expires": 1823266077 # future timestamp to prevent token refresh issues
	}
	token_file_path = "/tmp/token.json"
	with open(token_file_path, "w") as f:
	json.dump(data, f)

	# Instantiate YouTube with OAuth credentials
	yt = YouTube(url, use_oauth=True, allow_oauth_cache=True, token_file=token_file_path)

	# Download the audio from the YouTube video
	ydl_opts = {
	'format': 'bestaudio/best',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'mp3',
	'preferredquality': '192',
	}],
	'outtmpl': 'downloads/%(id)s.%(ext)s',
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=True)
	audio_file_path = ydl.prepare_filename(info_dict)

	# Check if the audio file exists
	if not os.path.exists(audio_file_path):
	raise HTTPException(status_code=500, detail="Failed to download the audio")

	# Return the path of the downloaded audio file
	return {"audio_file": audio_file_path}

	except yt_dlp.utils.DownloadError as e:
	raise HTTPException(status_code=500, detail=f"Download error: {str(e)}")
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

	@app.post("/generate_audio_summary")
	async def generate_audio_summary(summary: str):
	response = openai_client.audio.speech.create(
	model="tts-1",
	voice="alloy",
	input=summary
	)

	audio_data = io.BytesIO(response.content)

	return StreamingResponse(audio_data, media_type="audio/mp3")

	@app.post("/chat")
	async def chat_with_personality(chat_message: ChatMessage):
	prompt = f"You are roleplaying as {chat_message.speaker}, a podcast guest. Respond to the user's message in character, based on the content of the podcast. Here's the full transcript for context: {chat_message.transcript[:2000]}"

	response = openai_client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": prompt},
	{"role": "user", "content": chat_message.message}
	]
	)

	return {"response": response.choices[0].message.content.strip()}


	# hello