Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -42,24 +42,29 @@ def fix_italian_transcription(transcription):
|
|
42 |
"uno studente", "uno psicologo", "uno zio",
|
43 |
"di autore", "a uomo", "su imperatore", "con amico", "per artista"
|
44 |
}
|
|
|
45 |
transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|nell|sull|coll|pell)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
|
46 |
transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
|
47 |
transcription = re.sub(r"\bpo\b", "po'", transcription)
|
48 |
transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
|
49 |
-
|
50 |
-
|
|
|
51 |
transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
|
|
|
52 |
for phrase in no_elision_cases:
|
53 |
fixed = phrase.replace(" ", "'")
|
54 |
transcription = transcription.replace(fixed, phrase)
|
|
|
55 |
return transcription
|
56 |
|
57 |
def transcribe_mic(audio_microphone, language):
|
58 |
print("Transcription microphone")
|
59 |
transcription = transcribe_base(audio_microphone, language)
|
60 |
-
|
61 |
if language=="it":
|
62 |
transcription = fix_italian_transcription(transcription)
|
|
|
63 |
|
64 |
return transcription
|
65 |
#return transcribe_base(audio_microphone, language)
|
|
|
42 |
"uno studente", "uno psicologo", "uno zio",
|
43 |
"di autore", "a uomo", "su imperatore", "con amico", "per artista"
|
44 |
}
|
45 |
+
|
46 |
transcription = re.sub(r"\b(un|l|d|s|t|m|c|n|quest|all|dall|nell|sull|coll|pell)\s+(?=[aeiouhàèìòùáéíóú])", r"\1'", transcription)
|
47 |
transcription = re.sub(r"\b(s|t|m|c|n)\s+(?=è\b|ha\b|hanno\b)", r"\1'", transcription)
|
48 |
transcription = re.sub(r"\bpo\b", "po'", transcription)
|
49 |
transcription = re.sub(r"\b(senz) ([aeiou])", r"\1'\2", transcription)
|
50 |
+
|
51 |
+
pattern_numbers = r"\b(trenta|quaranta|cinquanta|sessanta|settanta|ottanta|novanta)\s+(anni|ore)\b"
|
52 |
+
replacement_numbers = lambda m: m.group(1)[:-1] + "’" + m.group(2)
|
53 |
transcription = re.sub(pattern_numbers, replacement_numbers, transcription)
|
54 |
+
|
55 |
for phrase in no_elision_cases:
|
56 |
fixed = phrase.replace(" ", "'")
|
57 |
transcription = transcription.replace(fixed, phrase)
|
58 |
+
|
59 |
return transcription
|
60 |
|
61 |
def transcribe_mic(audio_microphone, language):
|
62 |
print("Transcription microphone")
|
63 |
transcription = transcribe_base(audio_microphone, language)
|
64 |
+
print(transcription)
|
65 |
if language=="it":
|
66 |
transcription = fix_italian_transcription(transcription)
|
67 |
+
print(transcription)
|
68 |
|
69 |
return transcription
|
70 |
#return transcribe_base(audio_microphone, language)
|