eusholli commited on
Commit
810b5d2
·
1 Parent(s): 03ec9cb

suppress moviepy console output

Browse files
Files changed (1) hide show
  1. ttv_web_scraper.py +9 -2
ttv_web_scraper.py CHANGED
@@ -18,6 +18,10 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
  logger = logging.getLogger(__name__)
20
 
 
 
 
 
21
  CACHE_DIR = "cache/"
22
  DB_METADATA_FILE = os.path.join(CACHE_DIR, "db_metadata.json")
23
  SUBJECTS = [
@@ -115,7 +119,8 @@ def parse_transcript(content: str) -> List[TranscriptSegment]:
115
  parsed_segments = []
116
  saved_info = None
117
 
118
- segments = [segment.strip() for segment in re.split(r'(<br><br>.*?\((?:\d{2}:)?\d{2}:\d{2}\):<br>)', content) if segment.strip()]
 
119
 
120
  for i, segment in enumerate(segments):
121
  speaker_info = extract_speaker_info(segment)
@@ -226,7 +231,9 @@ async def process_urls(urls: List[str]) -> List[Optional[VideoInfo]]:
226
  return await asyncio.gather(*[process_url(url) for url in urls])
227
 
228
 
229
- def db_save_metadata_sets(processed_urls: Set[str], speakers: Set[str], companies: Dict[str, Set[str]], sentiments: Set[str], subjects: Set[str]):
 
 
230
  metadata = {
231
  'processed_urls': list(processed_urls),
232
  'speakers': list(speakers),
 
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
19
  logger = logging.getLogger(__name__)
20
 
21
+ # Configure logging to suppress MoviePy's console output
22
+ logging.getLogger("moviepy").setLevel(logging.WARNING)
23
+
24
+
25
  CACHE_DIR = "cache/"
26
  DB_METADATA_FILE = os.path.join(CACHE_DIR, "db_metadata.json")
27
  SUBJECTS = [
 
119
  parsed_segments = []
120
  saved_info = None
121
 
122
+ segments = [segment.strip() for segment in re.split(r'(<br><br>.*?\((?:\d{2}:)?\d{2}:\d{2}\):<br>)',
123
+ content) if segment.strip()]
124
 
125
  for i, segment in enumerate(segments):
126
  speaker_info = extract_speaker_info(segment)
 
231
  return await asyncio.gather(*[process_url(url) for url in urls])
232
 
233
 
234
+ def db_save_metadata_sets(processed_urls: Set[str], speakers: Set[str],
235
+ companies: Dict[str, Set[str]],
236
+ sentiments: Set[str], subjects: Set[str]):
237
  metadata = {
238
  'processed_urls': list(processed_urls),
239
  'speakers': list(speakers),