import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi import openai from urllib.parse import urlparse, parse_qs import re import os import logging logging.basicConfig(filename='app.log', filemode='a', format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) def get_video_id_from_url(url): """ Extracts the video ID from the YouTube URL. """ try: url_data = urlparse(url) query = parse_qs(url_data.query) video_id = query.get("v") if video_id: logging.info(f"Video ID {video_id[0]} extracted from URL.") return video_id[0] else: logging.warning(f"No video ID found in URL: {url}") return None except Exception as e: logging.error(f"Error extracting video ID from URL {url}: {e}") return None def get_transcript_data_and_pause_count(video_id): """ Retrieves the transcript for the given video ID, calculates the total duration, and estimates the number of pauses. """ try: transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) if transcript: last_segment = transcript[-1] total_duration = last_segment['start'] + last_segment['duration'] # Estimate the number of pauses pauses = 0 for i in range(1, len(transcript)): current_start = transcript[i]['start'] previous_end = transcript[i-1]['start'] + transcript[i-1]['duration'] if current_start > previous_end: pauses += 1 full_transcript = " ".join(segment['text'] for segment in transcript) logging.info(f"Transcript retrieved successfully for video ID {video_id}.") return full_transcript, total_duration // 60, pauses except Exception as e: logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}") return None, None, None def analyze_transcript(url): """ Analyzes the YouTube video transcript for total length and estimates the number of pauses. """ try: with open('prompt.txt', 'r') as file: prompt = file.read() except Exception as e: logging.error(f"Error opening or reading from 'prompt.txt': {e}") return "Error processing the prompt file." try: video_id = get_video_id_from_url(url) if not video_id: logging.error("Invalid URL provided.") return "Invalid URL. Please enter a valid YouTube video URL." full_transcript, total_duration, pauses = get_transcript_data_and_pause_count( video_id) if full_transcript is None: # If there was an error retrieving the transcript logging.error("Error retrieving the transcript.") return pauses # Define the prompt for GPT evaluation based on the rubric prompt = prompt.format(full_transcript, pauses, total_duration) # Using the new OpenAI client structure client = openai.OpenAI(api_key=os.getenv('OpenAIKey')) response = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ], ) decision = response.choices[0].message.content.strip() return decision except Exception as e: logging.error(f"An error occurred during the analysis: {e}") return f"An error occurred during the processing. {e}" # Gradio interface iface = gr.Interface( fn=analyze_transcript, inputs=gr.Textbox(label="Enter YouTube Video URL"), # Updated outputs=gr.Textbox(label="Interview Recommendation"), # Updated description="This app evaluates a YouTube video interview transcript against a specific rubric to recommend if the person should receive an interview." ) # Launch the app iface.launch()