imorcillo commited on
Commit
ad75ddd
·
verified ·
1 Parent(s): 48bba9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -40,7 +40,16 @@ def transcribe_mic(audio_microphone, language):
40
  print("Transcription microphone")
41
  transcription = transcribe_base(audio_microphone, language)
42
  if language=="it":
43
- transcription=re.sub(r"\b([ldvc]) ([aeiouáéíóúàèìòù])", r"\1'\2", transcription)
 
 
 
 
 
 
 
 
 
44
  return transcription
45
  #return transcribe_base(audio_microphone, language)
46
 
 
40
  print("Transcription microphone")
41
  transcription = transcribe_base(audio_microphone, language)
42
  if language=="it":
43
+ no_elision_cases = {
44
+ "un autore", "un artista", "un uomo", "un amico", "un imperatore",
45
+ "uno studente", "uno psicologo", "uno zio",
46
+ "di autore", "a uomo", "su imperatore", "con amico", "per artista"
47
+ }
48
+ transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|nell|sull|coll|pell)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
49
+ transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
50
+ for phrase in no_elision_cases:
51
+ fixed = phrase.replace(" ", "'")
52
+ transcription = transcription.replace(fixed, phrase)
53
  return transcription
54
  #return transcribe_base(audio_microphone, language)
55