hermanda commited on
Commit
77bc72b
·
verified ·
1 Parent(s): 19f1cf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -39
app.py CHANGED
@@ -4,27 +4,54 @@ import subprocess
4
  import os
5
  import shutil
6
  import gradio as gr
 
 
7
 
8
- def download_subtitles(url):
9
  # Execute the bash script and capture the output
10
- result = subprocess.run(
11
- ['bash', 'download_subtitles.sh', url],
12
- check=True,
13
- text=True,
14
- stdout=subprocess.PIPE,
15
- stderr=subprocess.PIPE
16
- )
17
-
18
- # Extract the last line from stdout which is the directory name
19
- print(result.stdout)
20
- stdout_lines = result.stdout.strip().split('\n')
21
- directory = stdout_lines[-1].strip()
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  # Verify the directory exists
24
  if not os.path.isdir(directory):
25
  raise FileNotFoundError(f"Directory {directory} does not exist")
26
- print(os.listdir(directory))
27
  # Find the .srt file in the directory
 
28
  srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
29
  if not srt_files:
30
  raise FileNotFoundError(f"No .srt file found in {directory}")
@@ -81,32 +108,33 @@ def srt_to_text(input_file):
81
  # print(response.text)
82
 
83
  def get_transcript_text(url):
84
- try:
85
- print("Downloading subtitles...")
86
- subtitlesfile = download_subtitles(url)
87
- print("Extracting text from subtitles...")
88
- video_text = srt_to_text(subtitlesfile)
89
- print("Cleaning up...")
90
- cleanup_directory(os.path.dirname(subtitlesfile))
91
- return video_text
92
- except Exception as e:
93
- raise gr.Error(f"Error retrieving transcript: {e}")
94
 
95
  def summarize_video(url, prompt):
96
- try:
97
- video_text = get_transcript_text(url)
98
-
99
- client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
100
- final_prompt = prompt + "\n" + video_text
101
- response = client.models.generate_content(
102
- model='gemini-2.0-flash',
103
- contents=final_prompt,
104
- )
105
- summary = response.text
106
-
107
- return summary
108
- except Exception as e:
109
- return f"An error occurred: {str(e)}"
 
110
 
111
  with gr.Blocks() as app:
112
  gr.Markdown("# YouTube Video Summarizer")
@@ -114,7 +142,7 @@ with gr.Blocks() as app:
114
  with gr.Row():
115
  with gr.Column(scale=5):
116
  url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
117
- with gr.Column(scale=5):
118
  summarize_btn = gr.Button("Summarize", variant="primary")
119
 
120
  default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""
 
4
  import os
5
  import shutil
6
  import gradio as gr
7
+ import uuid
8
+ import subprocess
9
 
10
+ def download_subtitles(video_url):
11
  # Execute the bash script and capture the output
12
+ # result = subprocess.run(
13
+ # ['bash', 'download_subtitles.sh', url],
14
+ # check=True,
15
+ # text=True,
16
+ # stdout=subprocess.PIPE,
17
+ # stderr=subprocess.PIPE
18
+ # )
 
 
 
 
 
19
 
20
+ # # Extract the last line from stdout which is the directory name
21
+ # stdout_lines = result.stdout.strip().split('\n')
22
+ # directory = stdout_lines[-1].strip()
23
+
24
+ uuid_dir = str(uuid.uuid4())
25
+
26
+ # First command for auto-generated subtitles
27
+ subprocess.run([
28
+ "yt-dlp",
29
+ "--write-auto-subs",
30
+ "--sub-lang", "en",
31
+ "--convert-subs", "srt",
32
+ "--skip-download",
33
+ "-P", f"home:{uuid_dir}",
34
+ video_url
35
+ ], check=True)
36
+
37
+ # Second command for regular subtitles
38
+ subprocess.run([
39
+ "yt-dlp",
40
+ "--write-subs",
41
+ "--sub-lang", "en",
42
+ "--convert-subs", "srt",
43
+ "--skip-download",
44
+ "-P", f"home:{uuid_dir}",
45
+ video_url
46
+ ], check=True)
47
+
48
+ directory = uuid_dir
49
  # Verify the directory exists
50
  if not os.path.isdir(directory):
51
  raise FileNotFoundError(f"Directory {directory} does not exist")
52
+
53
  # Find the .srt file in the directory
54
+ print( os.listdir(directory))
55
  srt_files = [f for f in os.listdir(directory) if f.endswith('.srt')]
56
  if not srt_files:
57
  raise FileNotFoundError(f"No .srt file found in {directory}")
 
108
  # print(response.text)
109
 
110
  def get_transcript_text(url):
111
+ # try:
112
+ print("Downloading subtitles...")
113
+ subtitlesfile = download_subtitles(url)
114
+ print("Extracting text from subtitles...")
115
+ video_text = srt_to_text(subtitlesfile)
116
+ print("Cleaning up...")
117
+ cleanup_directory(os.path.dirname(subtitlesfile))
118
+ return video_text
119
+ # except Exception as e:
120
+ # raise gr.Error(f"Error retrieving transcript: {e}")
121
 
122
  def summarize_video(url, prompt):
123
+ # try:
124
+ video_text = get_transcript_text(url)
125
+
126
+ client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
127
+ final_prompt = prompt + "\n" + video_text
128
+ print("Generating summary...")
129
+ response = client.models.generate_content(
130
+ model='gemini-2.0-flash',
131
+ contents=final_prompt,
132
+ )
133
+ summary = response.text
134
+
135
+ return summary
136
+ # except Exception as e:
137
+ # return f"An error occurred: {str(e)}"
138
 
139
  with gr.Blocks() as app:
140
  gr.Markdown("# YouTube Video Summarizer")
 
142
  with gr.Row():
143
  with gr.Column(scale=5):
144
  url_input = gr.Textbox(label="YouTube URL", placeholder="Enter YouTube URL here...")
145
+ with gr.Column(scale=1):
146
  summarize_btn = gr.Button("Summarize", variant="primary")
147
 
148
  default_prompt = """Summarize the following text chronologically, make it long, use markdown:"""