imorcillo commited on
Commit
e04575a
·
verified ·
1 Parent(s): 143de70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -42,24 +42,29 @@ def fix_italian_transcription(transcription):
42
  "uno studente", "uno psicologo", "uno zio",
43
  "di autore", "a uomo", "su imperatore", "con amico", "per artista"
44
  }
 
45
  transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|nell|sull|coll|pell)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
46
  transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
47
  transcription = re.sub(r"\bpo\b", "po'", transcription)
48
  transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
49
- pattern_numbers = r"\b(trenta|quaranta|cinquanta|sessanta|settanta|ottanta|novanta)\s+(?=anni|ore)\b"
50
- replacement_numbers = lambda m: m.group(1)[:-1] + "’" + m.group(0).split()[1]
 
51
  transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
 
52
  for phrase in no_elision_cases:
53
  fixed = phrase.replace(" ", "'")
54
  transcription = transcription.replace(fixed, phrase)
 
55
  return transcription
56
 
57
  def transcribe_mic(audio_microphone, language):
58
  print("Transcription microphone")
59
  transcription = transcribe_base(audio_microphone, language)
60
-
61
  if language=="it":
62
  transcription = fix_italian_transcription(transcription)
 
63
 
64
  return transcription
65
  #return transcribe_base(audio_microphone, language)
 
42
  "uno studente", "uno psicologo", "uno zio",
43
  "di autore", "a uomo", "su imperatore", "con amico", "per artista"
44
  }
45
+
46
  transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|nell|sull|coll|pell)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
47
  transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
48
  transcription = re.sub(r"\bpo\b", "po'", transcription)
49
  transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
50
+
51
+ pattern_numbers = r"\b(trenta|quaranta|cinquanta|sessanta|settanta|ottanta|novanta)\s+(anni|ore)\b"
52
+ replacement_numbers = lambda m: m.group(1)[:-1] + "’" + m.group(2)
53
  transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
54
+
55
  for phrase in no_elision_cases:
56
  fixed = phrase.replace(" ", "'")
57
  transcription = transcription.replace(fixed, phrase)
58
+
59
  return transcription
60
 
61
  def transcribe_mic(audio_microphone, language):
62
  print("Transcription microphone")
63
  transcription = transcribe_base(audio_microphone, language)
64
+ print(transcription)
65
  if language=="it":
66
  transcription = fix_italian_transcription(transcription)
67
+ print(transcription)
68
 
69
  return transcription
70
  #return transcribe_base(audio_microphone, language)