File size: 4,051 Bytes
ab41f96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
import openai
from urllib.parse import urlparse, parse_qs
import re
import os
import logging

logging.basicConfig(filename='app.log', filemode='a',
                    format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)


def get_video_id_from_url(url):
    """
    Extracts the video ID from the YouTube URL.
    """
    try:
        url_data = urlparse(url)
        query = parse_qs(url_data.query)
        video_id = query.get("v")
        if video_id:
            logging.info(f"Video ID {video_id[0]} extracted from URL.")
            return video_id[0]
        else:
            logging.warning(f"No video ID found in URL: {url}")
            return None
    except Exception as e:
        logging.error(f"Error extracting video ID from URL {url}: {e}")
        return None


def get_transcript_data_and_pause_count(video_id):
    """
    Retrieves the transcript for the given video ID, calculates the total duration, and estimates the number of pauses.
    """
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
        if transcript:
            last_segment = transcript[-1]
            total_duration = last_segment['start'] + last_segment['duration']

            # Estimate the number of pauses
            pauses = 0
            for i in range(1, len(transcript)):
                current_start = transcript[i]['start']
                previous_end = transcript[i-1]['start'] + transcript[i-1]['duration']
                if current_start > previous_end:
                    pauses += 1

            full_transcript = " ".join(segment['text'] for segment in transcript)
            logging.info(f"Transcript retrieved successfully for video ID {video_id}.")
            return full_transcript, total_duration // 60, pauses
    except Exception as e:
        logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}")
        return None, None, None

def analyze_transcript(url):
    """
    Analyzes the YouTube video transcript for total length and estimates the number of pauses.
    """
    try:
        with open('prompt.txt', 'r') as file:
            prompt = file.read()
    except Exception as e:
        logging.error(f"Error opening or reading from 'prompt.txt': {e}")
        return "Error processing the prompt file."

    try:
        video_id = get_video_id_from_url(url)
        if not video_id:
            logging.error("Invalid URL provided.")
            return "Invalid URL. Please enter a valid YouTube video URL."

        full_transcript, total_duration, pauses = get_transcript_data_and_pause_count(
            video_id)

        if full_transcript is None:  # If there was an error retrieving the transcript
            logging.error("Error retrieving the transcript.")
            return pauses

        # Define the prompt for GPT evaluation based on the rubric
        prompt = prompt.format(full_transcript, pauses, total_duration)

        # Using the new OpenAI client structure
        client = openai.OpenAI(api_key=os.getenv('OpenAIKey'))
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
        )

        decision = response.choices[0].message.content.strip()
        return decision
    except Exception as e:
        logging.error(f"An error occurred during the analysis: {e}")
        return f"An error occurred during the processing. {e}"


# Gradio interface
iface = gr.Interface(
    fn=analyze_transcript,
    inputs=gr.Textbox(label="Enter YouTube Video URL"),  # Updated
    outputs=gr.Textbox(label="Interview Recommendation"),  # Updated
    description="This app evaluates a YouTube video interview transcript against a specific rubric to recommend if the person should receive an interview."
)

# Launch the app
iface.launch()