ahujasherry18 commited on
Commit
fdb5d13
·
verified ·
1 Parent(s): cb85a43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -40
app.py CHANGED
@@ -1,64 +1,59 @@
 
1
  from youtube_transcript_api import YouTubeTranscriptApi
2
  from youtube_transcript_api.formatters import TextFormatter
3
- import re
4
  import torch
5
  import gradio as gr
6
  from transformers import pipeline
7
 
8
- # model_path = ("../Model/models--sshleifer--distilbart-cnn-12-6/snapshots"
9
- # "/a4f8f3ea906ed274767e9906dbaede7531d660ff")
10
- # text_summary = pipeline("summarization", model=model_path
11
- # ,torch_dtype=torch.bfloat16)
12
-
13
  text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
14
 
15
- def summary(input):
 
 
 
 
 
16
  output = text_summary(input)
17
  return output[0]['summary_text']
18
 
19
- # Function to extract video ID from the YouTube URL
20
  def extract_video_id(url):
21
- video_id = None
22
- # Updated regex to handle extra parameters in the YouTube URL
23
- regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|\S*[\?&]v=|\S*[\?&]v%2F)?([\w-]+))|(?:youtu\.be\/([\w-]+))"
24
  match = re.search(regex, url)
25
  if match:
26
- video_id = match.group(1) if match.group(1) else match.group(2)
27
- return video_id
 
28
 
29
- # Function to get transcript for a YouTube video
30
- def get_transcript(video_url):
31
  video_id = extract_video_id(video_url)
32
- if video_id:
33
- try:
34
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
35
 
36
- # Formatting transcript into plain text
37
- formatter = TextFormatter()
38
- formatted_transcript = formatter.format_transcript(transcript)
39
- summary_text = summary(formatted_transcript)
40
- return summary_text
41
 
42
- except Exception as e:
43
- return f"Error retrieving transcript: {e}"
44
- else:
45
- return "Invalid YouTube URL."
46
 
47
- # Main function to take URL input and fetch the transcript
48
- # if __name__ == "__main__":
49
- # youtube_url = input("Enter the YouTube video URL: ")
50
- # transcript = get_transcript(youtube_url)
51
- # print("\nTranscript:\n")
52
- # print(transcript)
53
 
54
 
55
- gr.close_all()
 
 
56
 
57
- # demo = gr.Interface(fn=summary,inputs=["text"],outputs=["text"])
58
 
59
- demo = gr.Interface(fn = get_transcript,
60
- inputs=[gr.Textbox(label="Input youtube url to summarize",lines=1)],
61
- outputs=[gr.Textbox(label="Summarized Text ",lines=4)],
62
- title="@SherryAhuja Project : Youtube Video Summarizer",
63
- description="This AI application will be used to summarize the Youtube Video Script")
 
64
  demo.launch()
 
1
+ import re
2
  from youtube_transcript_api import YouTubeTranscriptApi
3
  from youtube_transcript_api.formatters import TextFormatter
 
4
  import torch
5
  import gradio as gr
6
  from transformers import pipeline
7
 
 
 
 
 
 
8
  text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
9
 
10
+ # model_path = ("../Models/models--sshleifer--distilbart-cnn-12-6/snapshots"
11
+ # "/a4f8f3ea906ed274767e9906dbaede7531d660ff")
12
+ # text_summary = pipeline("summarization", model=model_path,
13
+ # torch_dtype=torch.bfloat16)
14
+
15
+ def summary (input):
16
  output = text_summary(input)
17
  return output[0]['summary_text']
18
 
 
19
  def extract_video_id(url):
20
+ # Regex to extract the video ID from various YouTube URL formats
21
+ regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
 
22
  match = re.search(regex, url)
23
  if match:
24
+ return match.group(1)
25
+ return None
26
+
27
 
28
+ def get_youtube_transcript(video_url):
 
29
  video_id = extract_video_id(video_url)
30
+ if not video_id:
31
+ return "Video ID could not be extracted."
 
32
 
33
+ try:
34
+ # Fetch the transcript
35
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
 
 
36
 
37
+ # Format the transcript into plain text
38
+ formatter = TextFormatter()
39
+ text_transcript = formatter.format_transcript(transcript)
40
+ summary_text = summary(text_transcript)
41
 
42
+ return summary_text
43
+ except Exception as e:
44
+ return f"An error occurred: {e}"
 
 
 
45
 
46
 
47
+ # Example URL (Replace this with the actual URL when using the script)
48
+ # video_url = "https://youtu.be/5PibknhIsTc"
49
+ # print(get_youtube_transcript(video_url))
50
 
51
+ gr.close_all()
52
 
53
+ # demo = gr.Interface(fn=summary, inputs="text",outputs="text")
54
+ demo = gr.Interface(fn=get_youtube_transcript,
55
+ inputs=[gr.Textbox(label="Input YouTube Url to summarize",lines=1)],
56
+ outputs=[gr.Textbox(label="Summarized text",lines=4)],
57
+ title="@GenAILearniverse Project 2: YouTube Script Summarizer",
58
+ description="THIS APPLICATION WILL BE USED TO SUMMARIZE THE YOUTUBE VIDEO SCRIPT.")
59
  demo.launch()