File size: 2,592 Bytes
4edf1b3
 
 
 
1c0e7e6
4edf1b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fafbbfd
4edf1b3
fc132ae
fafbbfd
 
 
4edf1b3
 
 
 
 
 
b5bc106
4edf1b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fafbbfd
 
 
 
dc11b83
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json
import re
import textwrap
import openai
import gradio as gr

def gpt3_completion(prompt, engine='text-davinci-002', temp=0.6, top_p=1.0, tokens=1000, freq_pen=0.25, pres_pen=0.0, stop=['<<END>>']):
    print("here");
    max_retry = 5
    retry = 0
   
    response = openai.Completion.create(
        engine=engine,
        prompt=prompt,
        temperature=0.7,
        max_tokens=963,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0)
    text = response['choices'][0]['text'].strip()
    text = re.sub('\s+', ' ', text)
    return text

def summarize_podcast(podcast_file):
    # Open the transcript file
    print("ayay hai")
    with open(podcast_file.name, "r") as f:
        # read the contents of the uploaded file
        transcript = f.read()
    res = len(transcript.split())
    
    transcript = transcript.replace("WEBVTT", "")
    transcript = "\n".join(filter(None, transcript.splitlines()))
    transcript = re.sub(r"\d\d:\d\d:\d\d\.\d\d\d \d\d:\d\d:\d\d\.\d\d\d", "", transcript)
    text = transcript
    print(text)
    lines = text.split('\n')
    names = []
    unique_list=[]
    for line in lines:
        if ':' in line:
            name = line.split(':')[0]
            names.append(name)
    for x in range(0,len(names)):
        if names[x] not in unique_list:
            unique_list.append(names[x])
    sp=""
    al=["A","B","C","D","E","F","G","H","I","K","L","M","N","O","P","Q","R","S","T","V","X","Y","Z"]
    for x in range(0,len(unique_list)):
        transcript = transcript.replace(unique_list[x], al[x])
        sp=sp+"Speaker "+str(x+1)+" "+unique_list[x]+"\n"
    transcript=sp+""+transcript
    openai.api_key = "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"
    chunks = textwrap.wrap(transcript, 8000)
    result = list()
    count = 0
    tempstr=""
    for chunk in chunks:
        count = count + 1
        prompt = sp+"""\n
        Summarize the portion of the podcast. The summary should be around 200 words. use the Name instead of A, B, C...
        Podcast: 
        """+str(chunk)+"""'\n\n"""
        summary = gpt3_completion(prompt)
        summary="chunk : "+str(count)+summary
        result.append(summary)
        tempstr=tempstr+"\n"+summary
    return str(tempstr)

inputs = gr.inputs.File(label="Upload podcast file")
outputs = gr.outputs.Textbox(label="Summary")
title = "Podcast Summarizer"
description = "Summarize your podcast into a few key points using this app."
gr.Interface(fn=summarize_podcast, inputs=inputs, outputs=outputs, title=title, description=description).launch()