import os
from pydub import AudioSegment
import openai
from openai import OpenAI
import feedparser
from pathlib import Path
import wikipedia
import json
import streamlit as st
import requests

client = OpenAI()

# def load_whisper_api(audio):

#     '''Transcribe YT audio to text using Open AI API'''

#     import openai
#     file = open(audio, "rb")
#     transcript = openai.Audio.translate("whisper-1", file)

#     return transcript

@st.cache_data
def load_whisper_api(audio):

    '''Transcribe YT audio to text using Open AI API'''
    file = open(audio, "rb")
    transcript = client.audio.transcriptions.create(model="whisper-1", file=file,response_format="text")

    return transcript

@st.cache_data
def get_transcribe_podcast(rss_url, local_path='/data/'):
    
    st.info("Starting Podcast Transcription Function...")
    print("Feed URL: ", rss_url)
    print("Local Path:", local_path)  

    
    # Download the podcast episode by parsing the RSS feed
    p = Path(local_path)
    # p.mkdir(exist_ok=True)

    st.info("Downloading the podcast episode...")

    episode_name = "podcast_episode.mp3"
  
    with requests.get(rss_url, stream=True) as r:
        r.raise_for_status()
        episode_path = p.joinpath(episode_name)
        print(f'episode path {episode_path}')
        
        with open(episode_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)

    st.info("Podcast Episode downloaded")
    
    # Perform the transcription
    st.info("Starting podcast transcription")
    
    audio_file = episode_path
    
    
    #Get size of audio file
    audio_size = round(os.path.getsize(audio_file)/(1024*1024),1)

    print(f'audio size: {audio_size}')

  #Check if file is > 24mb, if not then use Whisper API
    if audio_size <= 25:

        #Use whisper API
        results = load_whisper_api(audio_file)

    else:

        st.info('File size larger than 24mb, applying chunking and transcription')

        song = AudioSegment.from_file(audio_file, format='mp3')

        # PyDub handles time in milliseconds
        twenty_minutes = 20 * 60 * 1000
    
        chunks = song[::twenty_minutes]
    
        transcriptions = []

        for i, chunk in enumerate(chunks):
            chunk.export(f'chunk_{i}.mp3', format='mp3')
            transcriptions.append(load_whisper_api(f'chunk_{i}.mp3'))

        results = ','.join(transcriptions)

    # Return the transcribed text
    st.info("Podcast transcription completed, returning results...")
    
    return results

@st.cache_data
def get_podcast_summary(podcast_transcript):
  
    instructPrompt = """
    You are a podcast analyst and your main task is to summarize the key and important points of
    the podcast for a busy professional by highlighting the main and important points
    to ensure the professional has a sufficient summary of the podcast. Include any questions you consider important or 
    any points that warrant further investigation.
    
    Please use bulletpoints.
    
    """
    
    request = instructPrompt + podcast_transcript
    
    chatOutput = client.chat.completions.create(model="gpt-4-turbo-preview",
                                            messages=[{"role": "system", "content": "You are a helpful podcast analyzer assistant"},
                                                      {"role": "user", "content": request}
                                                      ]
                                            )
    
    podcastSummary = chatOutput.choices[0].message.content
    
    return podcastSummary

@st.cache_data
def get_podcast_guest(podcast_transcript):
    '''Get guest name, professional title, organization name'''

    completion = client.chat.completions.create(
    model="gpt-4-turbo-preview",
    messages=[{"role": "user", "content": podcast_transcript}],
    functions=[
        
    {
        "name": "get_podcast_guest_information",
        "description": "Get information on the podcast guest using their full name and the name of the organization they are part of to search for them on Wikipedia or Google",
        "parameters": {
            "type": "object",
            "properties": {
                "guest_name": {
                    "type": "string",
                    "description": "The full name of the guest who is being interviewed in the podcast",
                },
                "guest_organization": {
                    "type": "string",
                    "description": "The name or details of the organization that the podcast guest belongs to, works for or runs",
                },
                "guest_title": {
                    "type": "string",
                    "description": "The title, designation or role the podcast guest holds or type of work that the podcast guest in the organization does",
                },
            },
            "required": ["guest_name"],
        },
    }
],
    function_call={"name": "get_podcast_guest_information"}
)

    podcast_guest = ""
    podcast_guest_org = ""
    podcast_guest_title = ""
    response_message = completion.choices[0].message.function_call

    print(f'func res: {response_message}')
    
    if response_message:

        function_name = response_message.name
        function_args = json.loads(response_message.arguments)
        podcast_guest=function_args.get("guest_name")
        podcast_guest_org=function_args.get("guest_organization")
        podcast_guest_title=function_args.get("guest_title")

    return (podcast_guest,podcast_guest_org,podcast_guest_title)

@st.cache_data
def get_podcast_highlights(podcast_transcript):

    instructPrompt = """
    Extract some key moments in the podcast. These are typically interesting insights from the guest or critical questions that the host might have put forward. It could also be a discussion on a hot topic or controversial opinion
"""
    request = instructPrompt + podcast_transcript

    chatOutput = client.chat.completions.create(model="gpt-4-turbo-preview",
                                            messages=[{"role": "system", "content": "You are a helpful assistant."},
                                                      {"role": "user", "content": podcast_transcript}
                                                      ]
                                            )

    podcastHighlights = chatOutput.choices[0].message.content
    
    return podcastHighlights

@st.cache_data
def process_podcast(url, path='/data/'):

    '''Get podcast transcription into json'''
    
    output = {}
    podcast_details = get_transcribe_podcast(url, path)
    podcast_summary = get_podcast_summary(podcast_details)
    podcast_guest_details = get_podcast_guest(podcast_details)
    podcast_highlights = get_podcast_highlights(podcast_details)
    output['podcast_details'] = podcast_details
    output['podcast_summary'] = podcast_summary
    output['podcast_guest'] = podcast_guest_details[0]
    output['podcast_guest_org'] = podcast_guest_details[1]
    output['podcast_guest_title'] = podcast_guest_details[2]
    output['podcast_highlights'] = podcast_highlights
    
    return output