Spaces:
Runtime error
Runtime error
File size: 4,051 Bytes
ab41f96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
import openai
from urllib.parse import urlparse, parse_qs
import re
import os
import logging
logging.basicConfig(filename='app.log', filemode='a',
format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)
def get_video_id_from_url(url):
"""
Extracts the video ID from the YouTube URL.
"""
try:
url_data = urlparse(url)
query = parse_qs(url_data.query)
video_id = query.get("v")
if video_id:
logging.info(f"Video ID {video_id[0]} extracted from URL.")
return video_id[0]
else:
logging.warning(f"No video ID found in URL: {url}")
return None
except Exception as e:
logging.error(f"Error extracting video ID from URL {url}: {e}")
return None
def get_transcript_data_and_pause_count(video_id):
"""
Retrieves the transcript for the given video ID, calculates the total duration, and estimates the number of pauses.
"""
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
if transcript:
last_segment = transcript[-1]
total_duration = last_segment['start'] + last_segment['duration']
# Estimate the number of pauses
pauses = 0
for i in range(1, len(transcript)):
current_start = transcript[i]['start']
previous_end = transcript[i-1]['start'] + transcript[i-1]['duration']
if current_start > previous_end:
pauses += 1
full_transcript = " ".join(segment['text'] for segment in transcript)
logging.info(f"Transcript retrieved successfully for video ID {video_id}.")
return full_transcript, total_duration // 60, pauses
except Exception as e:
logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}")
return None, None, None
def analyze_transcript(url):
"""
Analyzes the YouTube video transcript for total length and estimates the number of pauses.
"""
try:
with open('prompt.txt', 'r') as file:
prompt = file.read()
except Exception as e:
logging.error(f"Error opening or reading from 'prompt.txt': {e}")
return "Error processing the prompt file."
try:
video_id = get_video_id_from_url(url)
if not video_id:
logging.error("Invalid URL provided.")
return "Invalid URL. Please enter a valid YouTube video URL."
full_transcript, total_duration, pauses = get_transcript_data_and_pause_count(
video_id)
if full_transcript is None: # If there was an error retrieving the transcript
logging.error("Error retrieving the transcript.")
return pauses
# Define the prompt for GPT evaluation based on the rubric
prompt = prompt.format(full_transcript, pauses, total_duration)
# Using the new OpenAI client structure
client = openai.OpenAI(api_key=os.getenv('OpenAIKey'))
response = client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
)
decision = response.choices[0].message.content.strip()
return decision
except Exception as e:
logging.error(f"An error occurred during the analysis: {e}")
return f"An error occurred during the processing. {e}"
# Gradio interface
iface = gr.Interface(
fn=analyze_transcript,
inputs=gr.Textbox(label="Enter YouTube Video URL"), # Updated
outputs=gr.Textbox(label="Interview Recommendation"), # Updated
description="This app evaluates a YouTube video interview transcript against a specific rubric to recommend if the person should receive an interview."
)
# Launch the app
iface.launch()
|