Spaces:
Sleeping
Sleeping
Update processors/input_processor.py
Browse files
processors/input_processor.py
CHANGED
@@ -27,16 +27,12 @@ class ContentProcessor:
|
|
27 |
return pages
|
28 |
|
29 |
def process_youtube(self, video_url):
|
30 |
-
# Extract video ID from URL
|
31 |
video_id = self._extract_video_id(video_url)
|
32 |
if not video_id:
|
33 |
-
raise ValueError("
|
34 |
|
35 |
try:
|
36 |
-
# Get transcript directly using youtube_transcript_api
|
37 |
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
38 |
-
|
39 |
-
# Combine all transcript pieces
|
40 |
full_transcript = " ".join([entry['text'] for entry in transcript_list])
|
41 |
|
42 |
# Create a document-like structure
|
@@ -46,11 +42,12 @@ class ContentProcessor:
|
|
46 |
metadata={"source": video_url}
|
47 |
)
|
48 |
|
49 |
-
# Split the document
|
50 |
return self.text_splitter.split_documents([doc])
|
51 |
|
|
|
|
|
52 |
except Exception as e:
|
53 |
-
raise Exception(f"
|
54 |
|
55 |
def _extract_video_id(self, url):
|
56 |
# Handle different YouTube URL formats
|
|
|
27 |
return pages
|
28 |
|
29 |
def process_youtube(self, video_url):
|
|
|
30 |
video_id = self._extract_video_id(video_url)
|
31 |
if not video_id:
|
32 |
+
raise ValueError("This appears to be an invalid YouTube URL. Please check the URL and try again.")
|
33 |
|
34 |
try:
|
|
|
35 |
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
|
|
|
|
36 |
full_transcript = " ".join([entry['text'] for entry in transcript_list])
|
37 |
|
38 |
# Create a document-like structure
|
|
|
42 |
metadata={"source": video_url}
|
43 |
)
|
44 |
|
|
|
45 |
return self.text_splitter.split_documents([doc])
|
46 |
|
47 |
+
except TranscriptsDisabled:
|
48 |
+
raise Exception("This video does not have subtitles/captions enabled. Please try a different video that has captions available.")
|
49 |
except Exception as e:
|
50 |
+
raise Exception(f"Unable to get transcript: {str(e)}. Please ensure the video has captions enabled.")
|
51 |
|
52 |
def _extract_video_id(self, url):
|
53 |
# Handle different YouTube URL formats
|