mdeniz1 commited on
Commit
480ec28
·
1 Parent(s): 30ce89b

new youtube loader

Browse files
Files changed (1) hide show
  1. app.py +9 -20
app.py CHANGED
@@ -7,29 +7,18 @@ import os
7
 
8
  api_key = os.getenv('GROQ_API_KEY')
9
 
10
- def get_transcript(video_id):
11
- def transcript_to_string(transcript):
12
- transcript_text = '\n'.join([item['text'] for item in transcript])
13
- return transcript_text
14
-
15
- try:
16
- transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
17
-
18
- for transcript in transcripts:
19
- if transcript.is_translatable:
20
- transcript = transcript.fetch()
21
- return transcript_to_string(transcript)
22
-
23
- transcript = transcripts.find_manually_created_transcript(['en', 'tr']).fetch()
24
- return transcript_to_string(transcript)
25
-
26
- except Exception as e:
27
- return "Sorry, the video cannot be transcribed."
28
 
29
 
30
  def summarize_video(video_url, language):
31
- video_id = video_url.split('v=')[-1].split('&')[0]
32
- transcript = get_transcript(video_id)
33
 
34
  model = ChatGroq(
35
  model="llama-3.1-70b-versatile",
 
7
 
8
  api_key = os.getenv('GROQ_API_KEY')
9
 
10
+ def get_transcript(video_url):
11
+ from langchain_community.document_loaders import YoutubeLoader
12
+ loader = YoutubeLoader.from_youtube_url(
13
+ video_url, add_video_info=False
14
+ )
15
+ document = loader.load()
16
+ transcript = document[0].page_content
17
+ return transcript
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def summarize_video(video_url, language):
21
+ transcript = get_transcript(video_url)
 
22
 
23
  model = ChatGroq(
24
  model="llama-3.1-70b-versatile",