Spaces:

nickmuchi
/

PodcastGPT

Sleeping

App Files Files Community

nickmuchi commited on Feb 11, 2024

Commit

cc8b68a

verified ·

1 Parent(s): f616749

Create functions.py

Browse files

Files changed (1) hide show

functions.py +204 -0

functions.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import os
+from pydub import AudioSegment
+import openai
+from openai import OpenAI
+import feedparser
+from pathlib import Path
+import wikipedia
+import json
+openai_audio = OpenAI()
+# def load_whisper_api(audio):
+#     '''Transcribe YT audio to text using Open AI API'''
+#     import openai
+#     file = open(audio, "rb")
+#     transcript = openai.Audio.translate("whisper-1", file)
+#     return transcript
+@st.cache_data
+def load_whisper_api(audio):
+    '''Transcribe YT audio to text using Open AI API'''
+    file = open(audio, "rb")
+    transcript = openai_audio.audio.transcriptions.create(model="whisper-1", file=file,response_format="text")
+    return transcript
+@st.cache_data
+def get_transcribe_podcast(rss_url, local_path):
+    st.info("Starting Podcast Transcription Function...")
+    print("Feed URL: ", rss_url)
+    print("Local Path:", local_path)
+    # Download the podcast episode by parsing the RSS feed
+    p = Path(local_path)
+    p.mkdir(exist_ok=True)
+    st.info("Downloading the podcast episode...")
+    with requests.get(rss_url, stream=True) as r:
+        r.raise_for_status()
+        episode_path = p.joinpath(episode_name)
+    with open(episode_path, 'wb') as f:
+        for chunk in r.iter_content(chunk_size=8192):
+            f.write(chunk)
+    st.info("Podcast Episode downloaded")
+    # Perform the transcription
+    st.info("Starting podcast transcription")
+    audio_file = local_path + episode_name
+    #Get size of audio file
+    audio_size = round(os.path.getsize(audio_file)/(1024*1024),1)
+  #Check if file is > 24mb, if not then use Whisper API
+    if audio_size <= 25:
+        #Use whisper API
+        results = load_whisper_api(audio_file)['text']
+    else:
+        st.info('File size larger than 24mb, applying chunking and transcription')
+        song = AudioSegment.from_file(audio_file, format='mp3')
+        # PyDub handles time in milliseconds
+        twenty_minutes = 20 * 60 * 1000
+        chunks = song[::twenty_minutes]
+        transcriptions = []
+        for i, chunk in enumerate(chunks):
+            chunk.export(f'chunk_{i}.mp3', format='mp3')
+            transcriptions.append(load_whisper_api(f'chunk_{i}.mp3')['text'])
+        results = ','.join(transcriptions)
+    # Return the transcribed text
+    st.info("Podcast transcription completed, returning results...")
+    return results
+@st.cache_data
+def get_podcast_summary(podcast_transcript):
+    instructPrompt = """
+    You are a podcast analyst and your main task is to summarize the key and important points of
+    the podcast for a busy professional by highlighting the main and important points
+    to ensure the professional has a sufficient summary of the podcast. Include any questions you consider important or
+    any points that warrant further investigation.
+    Please use bulletpoints.
+    """
+    request = instructPrompt + podcast_transcript
+    chatOutput = openai.ChatCompletion.create(model="gpt-3.5-turbo-16k",
+                                            messages=[{"role": "system", "content": "You are a helpful assistant."},
+                                                      {"role": "user", "content": request}
+                                                      ]
+                                            )
+    podcastSummary = chatOutput.choices[0].message.content
+    return podcastSummary
+@st.cache_data
+def get_podcast_guest(podcast_transcript):
+    '''Get guest name, professional title, organization name'''
+  completion = openai.ChatCompletion.create(
+    model="gpt-3.5-turbo-16k",
+    messages=[{"role": "user", "content": podcast_transcript}],
+    functions=[
+    {
+        "name": "get_podcast_guest_information",
+        "description": "Get information on the podcast guest using their full name and the name of the organization they are part of to search for them on Wikipedia or Google",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "guest_name": {
+                    "type": "string",
+                    "description": "The full name of the guest who is being interviewed in the podcast",
+                },
+                "guest_organization": {
+                    "type": "string",
+                    "description": "The name or details of the organization that the podcast guest belongs to, works for or runs",
+                },
+                "guest_title": {
+                    "type": "string",
+                    "description": "The title, designation or role the podcast guest holds or type of work that the podcast guest in the organization does",
+                },
+            },
+            "required": ["guest_name"],
+        },
+    }
+],
+    function_call={"name": "get_podcast_guest_information"}
+)
+  podcast_guest = ""
+  podcast_guest_org = ""
+  podcast_guest_title = ""
+  response_message = completion["choices"][0]["message"]
+  if response_message.get("function_call"):
+    function_name = response_message["function_call"]["name"]
+    function_args = json.loads(response_message["function_call"]["arguments"])
+    podcast_guest=function_args.get("guest_name")
+    podcast_guest_org=function_args.get("guest_organization")
+    podcast_guest_title=function_args.get("guest_title")
+  return (podcast_guest,podcast_guest_org,podcast_guest_title)
+@st.cache_data
+def get_podcast_highlights(podcast_transcript):
+    instructPrompt = """
+    Extract some key moments in the podcast. These are typically interesting insights from the guest or critical questions that the host might have put forward. It could also be a discussion on a hot topic or controversial opinion
+"""
+    request = instructPrompt + podcast_transcript
+    chatOutput = openai.ChatCompletion.create(model="gpt-3.5-turbo-16k",
+                                            messages=[{"role": "system", "content": "You are a helpful assistant."},
+                                                      {"role": "user", "content": podcast_transcript}
+                                                      ]
+                                            )
+    podcastHighlights = chatOutput.choices[0].message.content
+    return podcastHighlights
+@st.cache_data
+def process_podcast(url, path):
+    '''Get podcast transcription into json'''
+    output = {}
+    podcast_details = get_transcribe_podcast.call(url, path)
+    podcast_summary = get_podcast_summary.call(podcast_details)
+    podcast_guest_details = get_podcast_guest.call(podcast_details)
+    podcast_highlights = get_podcast_highlights.call(podcast_details)
+    output['podcast_details'] = podcast_details
+    output['podcast_summary'] = podcast_summary
+    output['podcast_guest'] = podcast_guest_details[0]
+    output['podcast_guest_org'] = podcast_guest_details[1]
+    output['podcast_guest_title'] = podcast_guest_details[2]
+    output['podcast_highlights'] = podcast_highlights
+    return output