GoodML commited on
Commit
e0ba642
·
verified ·
1 Parent(s): 29d18fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -35
app.py CHANGED
@@ -88,63 +88,57 @@ def process_audio():
88
  print(f"Temporary WAV file deleted: {temp_audio_path}")
89
 
90
 
91
- # @app.route('/process-youtube', methods=['POST'])
92
- # def process_youtube():
93
- # youtube_url = request.json.get('youtube_url')
94
 
95
- # if not youtube_url:
96
- # return jsonify({"error": "No YouTube URL provided"}), 400
97
 
98
- # try:
99
-
100
-
101
- # # Extract the video ID from the YouTube URL
102
- # video_id = youtube_url.split("v=")[-1].split("&")[0]
103
-
104
- # # Fetch the transcript for the given video ID
105
- # transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
106
-
107
- # # Print transcript to console
108
- # print(f"Transcript for video ID {video_id}:")
109
- # transcript = " ".join([segment['text'] for segment in transcript_data])
110
-
111
-
112
-
113
- # # Send the transcript to the Gemini API for structured data
114
- # structured_data = query_gemini_api(transcript)
115
-
116
-
117
- # # Return the structured data
118
- # return jsonify(structured_data)
119
 
120
- # except Exception as e:
121
- # return jsonify({"error": str(e)}), 500
122
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- import logging
125
- logging.basicConfig(level=logging.DEBUG)
126
 
127
  @app.route('/process-youtube', methods=['POST'])
128
  def process_youtube():
129
  youtube_url = request.json.get('youtube_url')
130
-
131
  if not youtube_url:
132
  return jsonify({"error": "No YouTube URL provided"}), 400
133
 
134
  try:
135
  # Extract the video ID from the YouTube URL
136
- video_id = youtube_url.split("v=")[-1].split("&")[0]
 
137
  logging.debug(f"Processing video ID: {video_id}")
138
-
139
  try:
140
  # Fetch transcript
141
- transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
 
 
 
 
 
 
 
 
142
  except Exception as e:
143
  logging.error(f"Error fetching transcript for {video_id}: {e}")
144
  return jsonify({"error": f"Could not retrieve transcript for video {video_id}: {str(e)}"}), 500
145
 
146
  # Concatenate transcript
147
- transcript = " ".join([segment['text'] for segment in transcript_data])
148
  logging.debug(f"Transcript: {transcript}")
149
 
150
  # Send to Gemini API
 
88
  print(f"Temporary WAV file deleted: {temp_audio_path}")
89
 
90
 
 
 
 
91
 
 
 
92
 
93
+ import logging
94
+ logging.basicConfig(level=logging.DEBUG)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ from urllib.parse import urlparse, parse_qs
 
97
 
98
+ def extract_video_id(youtube_url):
99
+ """
100
+ Extracts the video ID from a YouTube URL.
101
+ """
102
+ try:
103
+ parsed_url = urlparse(youtube_url)
104
+ query_params = parse_qs(parsed_url.query)
105
+ video_id = query_params.get('v', [None])[0]
106
+ return video_id
107
+ except Exception as e:
108
+ print(f"Error extracting video ID: {e}")
109
+ return None
110
 
 
 
111
 
112
  @app.route('/process-youtube', methods=['POST'])
113
  def process_youtube():
114
  youtube_url = request.json.get('youtube_url')
115
+
116
  if not youtube_url:
117
  return jsonify({"error": "No YouTube URL provided"}), 400
118
 
119
  try:
120
  # Extract the video ID from the YouTube URL
121
+ video_id = extract_video_id(youtube_url)
122
+
123
  logging.debug(f"Processing video ID: {video_id}")
124
+
125
  try:
126
  # Fetch transcript
127
+ # transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
128
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
129
+ for transcript in transcript_list:
130
+ # translating the transcript will return another transcript object
131
+ req_transcript = transcript.translate('en').fetch())
132
+
133
+ transcript = req[0].get('text')
134
+
135
+
136
  except Exception as e:
137
  logging.error(f"Error fetching transcript for {video_id}: {e}")
138
  return jsonify({"error": f"Could not retrieve transcript for video {video_id}: {str(e)}"}), 500
139
 
140
  # Concatenate transcript
141
+ # transcript = " ".join([segment['text'] for segment in transcript_data])
142
  logging.debug(f"Transcript: {transcript}")
143
 
144
  # Send to Gemini API