avans06 commited on
Commit
f7c9c25
1 Parent(s): 0d5acc7

Minor adjustments to vad.py

Browse files
Files changed (1) hide show
  1. src/vad.py +10 -2
src/vad.py CHANGED
@@ -1,4 +1,4 @@
1
- from abc import ABC, abstractmethod
2
  from collections import Counter, deque
3
  import os
4
  import time
@@ -421,7 +421,8 @@ class AbstractTranscription(ABC):
421
  sub_text = ""
422
  sub_words = []
423
  word_length = 0
424
-
 
425
  for idx, word in enumerate(segment_words):
426
  word2 = segment_words[idx + 1] if idx + 1 < len(segment_words) else None
427
  # Adjust start and end
@@ -430,12 +431,18 @@ class AbstractTranscription(ABC):
430
 
431
  if "start" not in sub_segment:
432
  sub_segment["start"] = float(word["start"])
 
 
433
 
434
  sub_text += word["word"]
435
  sub_words.append(word)
436
  word_length += len_wide(word["word"])
437
  if (sub_text.rstrip().endswith(".") or
438
  (word_length > 90 and (sub_text.rstrip().endswith(",") or sub_text.rstrip().endswith("?"))) or
 
 
 
 
439
  (word_length > 120 and word2 and (word2["word"].lstrip().startswith(",") or ((word2["word"].strip() in ["and", "or", "but"])))) or
440
  (word_length > 180 and sub_text.endswith(" "))):
441
  sub_segment["text"] = sub_text
@@ -446,6 +453,7 @@ class AbstractTranscription(ABC):
446
  sub_text = ""
447
  sub_words = []
448
  word_length = 0
 
449
  if "start" in sub_segment:
450
  sub_segment["text"] = sub_text
451
  sub_segment["end"] = float(word["end"])
 
1
+ from abc import ABC, abstractmethod
2
  from collections import Counter, deque
3
  import os
4
  import time
 
421
  sub_text = ""
422
  sub_words = []
423
  word_length = 0
424
+ is_wide = False
425
+
426
  for idx, word in enumerate(segment_words):
427
  word2 = segment_words[idx + 1] if idx + 1 < len(segment_words) else None
428
  # Adjust start and end
 
431
 
432
  if "start" not in sub_segment:
433
  sub_segment["start"] = float(word["start"])
434
+ if not is_wide and len(word["word"]) > 1:
435
+ is_wide = True
436
 
437
  sub_text += word["word"]
438
  sub_words.append(word)
439
  word_length += len_wide(word["word"])
440
  if (sub_text.rstrip().endswith(".") or
441
  (word_length > 90 and (sub_text.rstrip().endswith(",") or sub_text.rstrip().endswith("?"))) or
442
+ (word_length > 80 and is_wide and (
443
+ sub_text.rstrip().endswith(",") or sub_text.rstrip().endswith("?") or
444
+ sub_text.rstrip().endswith("、") or sub_text.rstrip().endswith("。"))) or
445
+ (word_length > 90 and is_wide and sub_text.endswith(" ")) or
446
  (word_length > 120 and word2 and (word2["word"].lstrip().startswith(",") or ((word2["word"].strip() in ["and", "or", "but"])))) or
447
  (word_length > 180 and sub_text.endswith(" "))):
448
  sub_segment["text"] = sub_text
 
453
  sub_text = ""
454
  sub_words = []
455
  word_length = 0
456
+ is_wide = False
457
  if "start" in sub_segment:
458
  sub_segment["text"] = sub_text
459
  sub_segment["end"] = float(word["end"])