suppress moviepy console output
Browse files- ttv_web_scraper.py +9 -2
ttv_web_scraper.py
CHANGED
@@ -18,6 +18,10 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
18 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
19 |
logger = logging.getLogger(__name__)
|
20 |
|
|
|
|
|
|
|
|
|
21 |
CACHE_DIR = "cache/"
|
22 |
DB_METADATA_FILE = os.path.join(CACHE_DIR, "db_metadata.json")
|
23 |
SUBJECTS = [
|
@@ -115,7 +119,8 @@ def parse_transcript(content: str) -> List[TranscriptSegment]:
|
|
115 |
parsed_segments = []
|
116 |
saved_info = None
|
117 |
|
118 |
-
segments = [segment.strip() for segment in re.split(r'(<br><br>.*?\((?:\d{2}:)?\d{2}:\d{2}\):<br>)',
|
|
|
119 |
|
120 |
for i, segment in enumerate(segments):
|
121 |
speaker_info = extract_speaker_info(segment)
|
@@ -226,7 +231,9 @@ async def process_urls(urls: List[str]) -> List[Optional[VideoInfo]]:
|
|
226 |
return await asyncio.gather(*[process_url(url) for url in urls])
|
227 |
|
228 |
|
229 |
-
def db_save_metadata_sets(processed_urls: Set[str], speakers: Set[str],
|
|
|
|
|
230 |
metadata = {
|
231 |
'processed_urls': list(processed_urls),
|
232 |
'speakers': list(speakers),
|
|
|
18 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
19 |
logger = logging.getLogger(__name__)
|
20 |
|
21 |
+
# Configure logging to suppress MoviePy's console output
|
22 |
+
logging.getLogger("moviepy").setLevel(logging.WARNING)
|
23 |
+
|
24 |
+
|
25 |
CACHE_DIR = "cache/"
|
26 |
DB_METADATA_FILE = os.path.join(CACHE_DIR, "db_metadata.json")
|
27 |
SUBJECTS = [
|
|
|
119 |
parsed_segments = []
|
120 |
saved_info = None
|
121 |
|
122 |
+
segments = [segment.strip() for segment in re.split(r'(<br><br>.*?\((?:\d{2}:)?\d{2}:\d{2}\):<br>)',
|
123 |
+
content) if segment.strip()]
|
124 |
|
125 |
for i, segment in enumerate(segments):
|
126 |
speaker_info = extract_speaker_info(segment)
|
|
|
231 |
return await asyncio.gather(*[process_url(url) for url in urls])
|
232 |
|
233 |
|
234 |
+
def db_save_metadata_sets(processed_urls: Set[str], speakers: Set[str],
|
235 |
+
companies: Dict[str, Set[str]],
|
236 |
+
sentiments: Set[str], subjects: Set[str]):
|
237 |
metadata = {
|
238 |
'processed_urls': list(processed_urls),
|
239 |
'speakers': list(speakers),
|