raihanrifaldi commited on
Commit
718aed2
·
1 Parent(s): 8bedf81

update source_languages

Browse files
Files changed (1) hide show
  1. app.py +100 -100
app.py CHANGED
@@ -29,105 +29,105 @@ import psutil
29
 
30
  whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
31
  source_languages = {
32
- "en": "English",
33
- "zh": "Chinese",
34
- "de": "German",
35
- "es": "Spanish",
36
- "ru": "Russian",
37
- "ko": "Korean",
38
- "fr": "French",
39
- "ja": "Japanese",
40
- "pt": "Portuguese",
41
- "tr": "Turkish",
42
- "pl": "Polish",
43
- "ca": "Catalan",
44
- "nl": "Dutch",
45
- "ar": "Arabic",
46
- "sv": "Swedish",
47
- "it": "Italian",
48
- "id": "Indonesian",
49
- "hi": "Hindi",
50
- "fi": "Finnish",
51
- "vi": "Vietnamese",
52
- "he": "Hebrew",
53
- "uk": "Ukrainian",
54
- "el": "Greek",
55
- "ms": "Malay",
56
- "cs": "Czech",
57
- "ro": "Romanian",
58
- "da": "Danish",
59
- "hu": "Hungarian",
60
- "ta": "Tamil",
61
- "no": "Norwegian",
62
- "th": "Thai",
63
- "ur": "Urdu",
64
- "hr": "Croatian",
65
- "bg": "Bulgarian",
66
- "lt": "Lithuanian",
67
- "la": "Latin",
68
- "mi": "Maori",
69
- "ml": "Malayalam",
70
- "cy": "Welsh",
71
- "sk": "Slovak",
72
- "te": "Telugu",
73
- "fa": "Persian",
74
- "lv": "Latvian",
75
- "bn": "Bengali",
76
- "sr": "Serbian",
77
- "az": "Azerbaijani",
78
- "sl": "Slovenian",
79
- "kn": "Kannada",
80
- "et": "Estonian",
81
- "mk": "Macedonian",
82
- "br": "Breton",
83
- "eu": "Basque",
84
- "is": "Icelandic",
85
- "hy": "Armenian",
86
- "ne": "Nepali",
87
- "mn": "Mongolian",
88
- "bs": "Bosnian",
89
- "kk": "Kazakh",
90
- "sq": "Albanian",
91
- "sw": "Swahili",
92
- "gl": "Galician",
93
- "mr": "Marathi",
94
- "pa": "Punjabi",
95
- "si": "Sinhala",
96
- "km": "Khmer",
97
- "sn": "Shona",
98
- "yo": "Yoruba",
99
- "so": "Somali",
100
- "af": "Afrikaans",
101
- "oc": "Occitan",
102
- "ka": "Georgian",
103
- "be": "Belarusian",
104
- "tg": "Tajik",
105
- "sd": "Sindhi",
106
- "gu": "Gujarati",
107
- "am": "Amharic",
108
- "yi": "Yiddish",
109
- "lo": "Lao",
110
- "uz": "Uzbek",
111
- "fo": "Faroese",
112
- "ht": "Haitian creole",
113
- "ps": "Pashto",
114
- "tk": "Turkmen",
115
- "nn": "Nynorsk",
116
- "mt": "Maltese",
117
- "sa": "Sanskrit",
118
- "lb": "Luxembourgish",
119
- "my": "Myanmar",
120
- "bo": "Tibetan",
121
- "tl": "Tagalog",
122
- "mg": "Malagasy",
123
- "as": "Assamese",
124
- "tt": "Tatar",
125
- "haw": "Hawaiian",
126
- "ln": "Lingala",
127
- "ha": "Hausa",
128
- "ba": "Bashkir",
129
- "jw": "Javanese",
130
- "su": "Sundanese",
131
  }
132
 
133
  source_language_list = [key[0] for key in source_languages.items()]
@@ -351,7 +351,7 @@ video_in = gr.Video(label="Video file", mirror_webcam=False)
351
  youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
352
  df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
353
  memory = psutil.virtual_memory()
354
- selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="en", label="Spoken language in video", interactive=True)
355
  selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
356
  number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
357
  system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")
 
29
 
30
  whisper_models = ["tiny", "base", "small", "medium", "large-v1", "large-v2"]
31
  source_languages = {
32
+ "English": "English",
33
+ # "zh": "Chinese",
34
+ # "de": "German",
35
+ # "es": "Spanish",
36
+ # "ru": "Russian",
37
+ # "ko": "Korean",
38
+ # "fr": "French",
39
+ "Japan": "Japanese",
40
+ # "pt": "Portuguese",
41
+ # "tr": "Turkish",
42
+ # "pl": "Polish",
43
+ # "ca": "Catalan",
44
+ # "nl": "Dutch",
45
+ # "ar": "Arabic",
46
+ # "sv": "Swedish",
47
+ # "it": "Italian",
48
+ "Indonesia": "Indonesian"
49
+ # "hi": "Hindi",
50
+ # "fi": "Finnish",
51
+ # "vi": "Vietnamese",
52
+ # "he": "Hebrew",
53
+ # "uk": "Ukrainian",
54
+ # "el": "Greek",
55
+ # "ms": "Malay",
56
+ # "cs": "Czech",
57
+ # "ro": "Romanian",
58
+ # "da": "Danish",
59
+ # "hu": "Hungarian",
60
+ # "ta": "Tamil",
61
+ # "no": "Norwegian",
62
+ # "th": "Thai",
63
+ # "ur": "Urdu",
64
+ # "hr": "Croatian",
65
+ # "bg": "Bulgarian",
66
+ # "lt": "Lithuanian",
67
+ # "la": "Latin",
68
+ # "mi": "Maori",
69
+ # "ml": "Malayalam",
70
+ # "cy": "Welsh",
71
+ # "sk": "Slovak",
72
+ # "te": "Telugu",
73
+ # "fa": "Persian",
74
+ # "lv": "Latvian",
75
+ # "bn": "Bengali",
76
+ # "sr": "Serbian",
77
+ # "az": "Azerbaijani",
78
+ # "sl": "Slovenian",
79
+ # "kn": "Kannada",
80
+ # "et": "Estonian",
81
+ # "mk": "Macedonian",
82
+ # "br": "Breton",
83
+ # "eu": "Basque",
84
+ # "is": "Icelandic",
85
+ # "hy": "Armenian",
86
+ # "ne": "Nepali",
87
+ # "mn": "Mongolian",
88
+ # "bs": "Bosnian",
89
+ # "kk": "Kazakh",
90
+ # "sq": "Albanian",
91
+ # "sw": "Swahili",
92
+ # "gl": "Galician",
93
+ # "mr": "Marathi",
94
+ # "pa": "Punjabi",
95
+ # "si": "Sinhala",
96
+ # "km": "Khmer",
97
+ # "sn": "Shona",
98
+ # "yo": "Yoruba",
99
+ # "so": "Somali",
100
+ # "af": "Afrikaans",
101
+ # "oc": "Occitan",
102
+ # "ka": "Georgian",
103
+ # "be": "Belarusian",
104
+ # "tg": "Tajik",
105
+ # "sd": "Sindhi",
106
+ # "gu": "Gujarati",
107
+ # "am": "Amharic",
108
+ # "yi": "Yiddish",
109
+ # "lo": "Lao",
110
+ # "uz": "Uzbek",
111
+ # "fo": "Faroese",
112
+ # "ht": "Haitian creole",
113
+ # "ps": "Pashto",
114
+ # "tk": "Turkmen",
115
+ # "nn": "Nynorsk",
116
+ # "mt": "Maltese",
117
+ # "sa": "Sanskrit",
118
+ # "lb": "Luxembourgish",
119
+ # "my": "Myanmar",
120
+ # "bo": "Tibetan",
121
+ # "tl": "Tagalog",
122
+ # "mg": "Malagasy",
123
+ # "as": "Assamese",
124
+ # "tt": "Tatar",
125
+ # "haw": "Hawaiian",
126
+ # "ln": "Lingala",
127
+ # "ha": "Hausa",
128
+ # "ba": "Bashkir",
129
+ # "jw": "Javanese",
130
+ # "su": "Sundanese",
131
  }
132
 
133
  source_language_list = [key[0] for key in source_languages.items()]
 
351
  youtube_url_in = gr.Textbox(label="Youtube url", lines=1, interactive=True)
352
  df_init = pd.DataFrame(columns=['Start', 'End', 'Speaker', 'Text'])
353
  memory = psutil.virtual_memory()
354
+ selected_source_lang = gr.Dropdown(choices=source_language_list, type="value", value="Indonesia", label="Spoken language in video", interactive=True)
355
  selected_whisper_model = gr.Dropdown(choices=whisper_models, type="value", value="base", label="Selected Whisper model", interactive=True)
356
  number_speakers = gr.Number(precision=0, value=0, label="Input number of speakers for better results. If value=0, model will automatic find the best number of speakers", interactive=True)
357
  system_info = gr.Markdown(f"*Memory: {memory.total / (1024 * 1024 * 1024):.2f}GB, used: {memory.percent}%, available: {memory.available / (1024 * 1024 * 1024):.2f}GB*")