Spaces:
Runtime error
Runtime error
import gradio as gr | |
from youtube_transcript_api import YouTubeTranscriptApi | |
import openai | |
from urllib.parse import urlparse, parse_qs | |
import re | |
import os | |
import logging | |
logging.basicConfig(filename='app.log', filemode='a', | |
format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) | |
def get_video_id_from_url(url): | |
""" | |
Extracts the video ID from the YouTube URL. | |
""" | |
try: | |
url_data = urlparse(url) | |
query = parse_qs(url_data.query) | |
video_id = query.get("v") | |
if video_id: | |
logging.info(f"Video ID {video_id[0]} extracted from URL.") | |
return video_id[0] | |
else: | |
logging.warning(f"No video ID found in URL: {url}") | |
return None | |
except Exception as e: | |
logging.error(f"Error extracting video ID from URL {url}: {e}") | |
return None | |
def get_transcript_data_and_pause_count(video_id): | |
""" | |
Retrieves the transcript for the given video ID, calculates the total duration, and estimates the number of pauses. | |
""" | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) | |
if transcript: | |
last_segment = transcript[-1] | |
total_duration = last_segment['start'] + last_segment['duration'] | |
# Estimate the number of pauses | |
pauses = 0 | |
for i in range(1, len(transcript)): | |
current_start = transcript[i]['start'] | |
previous_end = transcript[i-1]['start'] + transcript[i-1]['duration'] | |
if current_start > previous_end: | |
pauses += 1 | |
full_transcript = " ".join(segment['text'] for segment in transcript) | |
logging.info(f"Transcript retrieved successfully for video ID {video_id}.") | |
return full_transcript, total_duration // 60, pauses | |
except Exception as e: | |
logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}") | |
return None, None, None | |
def analyze_transcript(url): | |
""" | |
Analyzes the YouTube video transcript for total length and estimates the number of pauses. | |
""" | |
try: | |
with open('prompt.txt', 'r') as file: | |
prompt = file.read() | |
except Exception as e: | |
logging.error(f"Error opening or reading from 'prompt.txt': {e}") | |
return "Error processing the prompt file." | |
try: | |
video_id = get_video_id_from_url(url) | |
if not video_id: | |
logging.error("Invalid URL provided.") | |
return "Invalid URL. Please enter a valid YouTube video URL." | |
full_transcript, total_duration, pauses = get_transcript_data_and_pause_count( | |
video_id) | |
if full_transcript is None: # If there was an error retrieving the transcript | |
logging.error("Error retrieving the transcript.") | |
return pauses | |
# Define the prompt for GPT evaluation based on the rubric | |
prompt = prompt.format(full_transcript, pauses, total_duration) | |
# Using the new OpenAI client structure | |
client = openai.OpenAI(api_key=os.getenv('OpenAIKey')) | |
response = client.chat.completions.create( | |
model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt} | |
], | |
) | |
decision = response.choices[0].message.content.strip() | |
return decision | |
except Exception as e: | |
logging.error(f"An error occurred during the analysis: {e}") | |
return f"An error occurred during the processing. {e}" | |
# Gradio interface | |
iface = gr.Interface( | |
fn=analyze_transcript, | |
inputs=gr.Textbox(label="Enter YouTube Video URL"), # Updated | |
outputs=gr.Textbox(label="Interview Recommendation"), # Updated | |
description="This app evaluates a YouTube video interview transcript against a specific rubric to recommend if the person should receive an interview." | |
) | |
# Launch the app | |
iface.launch() | |