abdullah040 commited on
Commit
782d96b
·
1 Parent(s): 8d41dc1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import textwrap
4
+ import openai
5
+ import gradio as gr
6
+
7
+ def gpt3_completion(prompt, engine='text-davinci-002', temp=0.6, top_p=1.0, tokens=1000, freq_pen=0.25, pres_pen=0.0, stop=['<<END>>']):
8
+ print("here");
9
+ max_retry = 5
10
+ retry = 0
11
+
12
+ response = openai.Completion.create(
13
+ engine=engine,
14
+ prompt=prompt,
15
+ temperature=0.7,
16
+ max_tokens=963,
17
+ top_p=1,
18
+ frequency_penalty=0,
19
+ presence_penalty=0)
20
+ text = response['choices'][0]['text'].strip()
21
+ text = re.sub('\s+', ' ', text)
22
+ return text
23
+
24
+ def summarize_podcast(podcast):
25
+ # Open the transcript file
26
+ vtt = podcast
27
+ transcript = vtt.read().decode()
28
+ res = len(transcript.split())
29
+
30
+ transcript = transcript.replace("WEBVTT", "")
31
+ transcript = "\n".join(filter(None, transcript.splitlines()))
32
+ transcript = re.sub(r"\d\d:\d\d:\d\d\.\d\d\d \d\d:\d\d:\d\d\.\d\d\d", "", transcript)
33
+ text = transcript
34
+
35
+ lines = text.split('\n')
36
+ names = []
37
+ unique_list=[]
38
+ for line in lines:
39
+ if ':' in line:
40
+ name = line.split(':')[0]
41
+ names.append(name)
42
+ for x in range(0,len(names)):
43
+ if names[x] not in unique_list:
44
+ unique_list.append(names[x])
45
+ sp=""
46
+ al=["A","B","C","D","E","F","G","H","I","K","L","M","N","O","P","Q","R","S","T","V","X","Y","Z"]
47
+ for x in range(0,len(unique_list)):
48
+ transcript = transcript.replace(unique_list[x], al[x])
49
+ sp=sp+"Speaker "+str(x+1)+" "+unique_list[x]+"\n"
50
+ transcript=sp+""+transcript
51
+ openai.api_key = "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"
52
+ chunks = textwrap.wrap(transcript, 8000)
53
+ result = list()
54
+ count = 0
55
+ tempstr=""
56
+ for chunk in chunks:
57
+ count = count + 1
58
+ prompt = sp+"""\n
59
+ Summarize the portion of the podcast. The summary should be around 200 words. use the Name instead of A, B, C...
60
+ Podcast:
61
+
62
+ """+str(chunk)+"""'\n\n"""
63
+ summary = gpt3_completion(prompt)
64
+ summary="chunk : "+str(count)+summary
65
+ result.append(summary)
66
+ tempstr=tempstr+"\n"+summary
67
+ return str(tempstr)
68
+
69
+ input_file = gr.inputs.File(label="Upload your podcast in VTT format.")
70
+ output_text = gr.outputs.Textbox(label="Podcast Summary")
71
+
72
+ sample_url = "https://www.example.com/sample.vtt"
73
+
74
+ iface = gr.Interface(fn=summarize_podcast, inputs=input_file, outputs=output_text, title="Podcast Summarizer",
75
+ description="This tool summarizes a podcast in VTT format, providing a summary of each chunk of text.")
76
+
77
+ iface.launch()