Prathamesh Sarjerao Vaidya commited on
Commit
94b1ab5
·
1 Parent(s): 896d872

optimize model-preloader and added github action for syncing it to drive

Browse files
.github/workflows/check.yml CHANGED
@@ -1,5 +1,5 @@
1
- name: Check file size
2
- on:
3
  pull_request:
4
  branches: [main]
5
  workflow_dispatch:
@@ -15,3 +15,125 @@ jobs:
15
  uses: ActionsDesk/[email protected]
16
  with:
17
  filesizelimit: 1073741824 # 1GB
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Check file size and sync to Google Drive
2
+ on
3
  pull_request:
4
  branches: [main]
5
  workflow_dispatch:
 
15
  uses: ActionsDesk/[email protected]
16
  with:
17
  filesizelimit: 1073741824 # 1GB
18
+
19
+ sync-to-drive-on-pr:
20
+ runs-on: ubuntu-latest
21
+ needs: check-file-size
22
+ if: github.event_name == 'pull_request'
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ with:
26
+ lfs: true
27
+
28
+ # Pull LFS files
29
+ - name: Pull LFS files
30
+ run: |
31
+ git lfs install
32
+ git lfs pull
33
+
34
+ # Install pandoc for MD to PDF conversion
35
+ - name: Install pandoc
36
+ run: |
37
+ sudo apt-get update
38
+ sudo apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended texlive-latex-extra
39
+
40
+ # Convert MD files to PDF
41
+ - name: Convert MD to PDF
42
+ run: |
43
+ mkdir -p pdf_output
44
+ find . -name "*.md" -not -path "./.git/*" -not -path "./pdf_output/*" | while read file; do
45
+ relative_path="${file#./}"
46
+ pdf_path="pdf_output/${relative_path%.md}.pdf"
47
+ mkdir -p "$(dirname "$pdf_path")"
48
+ pandoc "$file" -o "$pdf_path" --pdf-engine=pdflatex
49
+ echo "Converted $file to $pdf_path"
50
+ done
51
+
52
+ # Set up Python for Google Drive upload
53
+ - name: Set up Python
54
+ uses: actions/setup-python@v4
55
+ with:
56
+ python-version: '3.9'
57
+
58
+ # Install Python dependencies
59
+ - name: Install Python dependencies
60
+ run: |
61
+ pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
62
+
63
+ # Create and run upload script
64
+ - name: Upload to Google Drive (PR Preview)
65
+ env:
66
+ GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}
67
+ run: |
68
+ cat > upload_to_drive.py << 'EOF'
69
+ import os
70
+ import json
71
+ from google.oauth2 import service_account
72
+ from googleapiclient.discovery import build
73
+ from googleapiclient.http import MediaFileUpload
74
+ import mimetypes
75
+
76
+ credentials_json = os.environ['GOOGLE_CREDENTIALS']
77
+ credentials_info = json.loads(credentials_json)
78
+ credentials = service_account.Credentials.from_service_account_info(credentials_info)
79
+
80
+ service = build('drive', 'v3', credentials=credentials)
81
+ FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
82
+
83
+ def get_mime_type(file_path):
84
+ mime_type, _ = mimetypes.guess_type(file_path)
85
+ return mime_type or 'application/octet-stream'
86
+
87
+ def upload_file(file_path, parent_folder_id, drive_service):
88
+ file_name = f"PR_PREVIEW_{os.path.basename(file_path)}"
89
+
90
+ query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
91
+ results = drive_service.files().list(q=query).execute()
92
+ items = results.get('files', [])
93
+
94
+ media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
95
+
96
+ if items:
97
+ file_id = items[0]['id']
98
+ updated_file = drive_service.files().update(
99
+ fileId=file_id,
100
+ media_body=media
101
+ ).execute()
102
+ print(f'Updated PR Preview: {file_name}')
103
+ else:
104
+ file_metadata = {
105
+ 'name': file_name,
106
+ 'parents': [parent_folder_id]
107
+ }
108
+ file = drive_service.files().create(
109
+ body=file_metadata,
110
+ media_body=media,
111
+ fields='id'
112
+ ).execute()
113
+ print(f'Uploaded PR Preview: {file_name}')
114
+
115
+ def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None):
116
+ if exclude_dirs is None:
117
+ exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
118
+
119
+ for root, dirs, files in os.walk(local_path):
120
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
121
+
122
+ for file in files:
123
+ file_path = os.path.join(root, file)
124
+ try:
125
+ upload_file(file_path, parent_folder_id, drive_service)
126
+ except Exception as e:
127
+ print(f'Error uploading {file_path}: {e}')
128
+
129
+ print("Starting PR preview upload to Google Drive...")
130
+ upload_directory('.', FOLDER_ID, service)
131
+
132
+ if os.path.exists('pdf_output'):
133
+ print("Uploading converted PDF files...")
134
+ upload_directory('pdf_output', FOLDER_ID, service)
135
+
136
+ print("PR preview upload completed!")
137
+ EOF
138
+
139
+ python upload_to_drive.py
.github/workflows/main.yml CHANGED
@@ -1,11 +1,11 @@
1
- name: Sync to Hugging Face hub
2
  on:
3
  push:
4
  branches: [main]
5
  workflow_dispatch:
6
 
7
  jobs:
8
- sync-to-hub:
9
  runs-on: ubuntu-latest
10
  steps:
11
  - uses: actions/checkout@v3
@@ -14,16 +14,140 @@ jobs:
14
  lfs: true
15
 
16
  # Ensure Git LFS is installed and fetch binary files
17
- # Try Pull LFS files
18
  - name: Pull LFS files
19
  run: |
20
  git lfs install
21
  git lfs pull
22
 
23
- - name: Push to hub
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  env:
25
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
26
  run: |
27
  git config user.name "github-actions"
28
  git config user.email "[email protected]"
29
- git push --force https://prathameshv07:[email protected]/spaces/prathameshv07/Multilingual-Audio-Intelligence-System main
 
1
+ name: Sync to Hugging Face hub and Google Drive
2
  on:
3
  push:
4
  branches: [main]
5
  workflow_dispatch:
6
 
7
  jobs:
8
+ sync-to-hub-and-drive:
9
  runs-on: ubuntu-latest
10
  steps:
11
  - uses: actions/checkout@v3
 
14
  lfs: true
15
 
16
  # Ensure Git LFS is installed and fetch binary files
 
17
  - name: Pull LFS files
18
  run: |
19
  git lfs install
20
  git lfs pull
21
 
22
+ # Install pandoc for MD to PDF conversion
23
+ - name: Install pandoc
24
+ run: |
25
+ sudo apt-get update
26
+ sudo apt-get install -y pandoc texlive-latex-base texlive-fonts-recommended texlive-latex-extra
27
+
28
+ # Convert MD files to PDF
29
+ - name: Convert MD to PDF
30
+ run: |
31
+ mkdir -p pdf_output
32
+ find . -name "*.md" -not -path "./.git/*" -not -path "./pdf_output/*" | while read file; do
33
+ # Get the relative path and change extension to .pdf
34
+ relative_path="${file#./}"
35
+ pdf_path="pdf_output/${relative_path%.md}.pdf"
36
+
37
+ # Create directory structure in pdf_output
38
+ mkdir -p "$(dirname "$pdf_path")"
39
+
40
+ # Convert MD to PDF
41
+ pandoc "$file" -o "$pdf_path" --pdf-engine=pdflatex
42
+ echo "Converted $file to $pdf_path"
43
+ done
44
+
45
+ # Set up Python for Google Drive upload
46
+ - name: Set up Python
47
+ uses: actions/setup-python@v4
48
+ with:
49
+ python-version: '3.9'
50
+
51
+ # Install Python dependencies
52
+ - name: Install Python dependencies
53
+ run: |
54
+ pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
55
+
56
+ # Create Google Drive upload script
57
+ - name: Create upload script
58
+ run: |
59
+ cat > upload_to_drive.py << 'EOF'
60
+ import os
61
+ import json
62
+ from google.oauth2 import service_account
63
+ from googleapiclient.discovery import build
64
+ from googleapiclient.http import MediaFileUpload
65
+ import mimetypes
66
+
67
+ # Load credentials from environment
68
+ credentials_json = os.environ['GOOGLE_CREDENTIALS']
69
+ credentials_info = json.loads(credentials_json)
70
+ credentials = service_account.Credentials.from_service_account_info(credentials_info)
71
+
72
+ # Build the Drive service
73
+ service = build('drive', 'v3', credentials=credentials)
74
+
75
+ # Target folder ID
76
+ FOLDER_ID = '1-8HJcWxsUUQIj9OMXQeoeULS06RA9Hg9'
77
+
78
+ def get_mime_type(file_path):
79
+ mime_type, _ = mimetypes.guess_type(file_path)
80
+ return mime_type or 'application/octet-stream'
81
+
82
+ def upload_file(file_path, parent_folder_id, drive_service):
83
+ file_name = os.path.basename(file_path)
84
+
85
+ # Check if file already exists
86
+ query = f"name='{file_name}' and '{parent_folder_id}' in parents and trashed=false"
87
+ results = drive_service.files().list(q=query).execute()
88
+ items = results.get('files', [])
89
+
90
+ media = MediaFileUpload(file_path, mimetype=get_mime_type(file_path), resumable=True)
91
+
92
+ if items:
93
+ # Update existing file
94
+ file_id = items[0]['id']
95
+ updated_file = drive_service.files().update(
96
+ fileId=file_id,
97
+ media_body=media
98
+ ).execute()
99
+ print(f'Updated: {file_name} (ID: {updated_file.get("id")})')
100
+ else:
101
+ # Create new file
102
+ file_metadata = {
103
+ 'name': file_name,
104
+ 'parents': [parent_folder_id]
105
+ }
106
+ file = drive_service.files().create(
107
+ body=file_metadata,
108
+ media_body=media,
109
+ fields='id'
110
+ ).execute()
111
+ print(f'Uploaded: {file_name} (ID: {file.get("id")})')
112
+
113
+ def upload_directory(local_path, parent_folder_id, drive_service, exclude_dirs=None):
114
+ if exclude_dirs is None:
115
+ exclude_dirs = ['.git', '.github', 'node_modules', '__pycache__']
116
+
117
+ for root, dirs, files in os.walk(local_path):
118
+ # Remove excluded directories
119
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
120
+
121
+ for file in files:
122
+ file_path = os.path.join(root, file)
123
+ try:
124
+ upload_file(file_path, parent_folder_id, drive_service)
125
+ except Exception as e:
126
+ print(f'Error uploading {file_path}: {e}')
127
+
128
+ # Upload all files to Google Drive
129
+ print("Starting upload to Google Drive...")
130
+ upload_directory('.', FOLDER_ID, service)
131
+
132
+ # Upload PDF files if they exist
133
+ if os.path.exists('pdf_output'):
134
+ print("Uploading converted PDF files...")
135
+ upload_directory('pdf_output', FOLDER_ID, service)
136
+
137
+ print("Upload completed!")
138
+ EOF
139
+
140
+ # Upload to Google Drive
141
+ - name: Upload to Google Drive
142
+ env:
143
+ GOOGLE_CREDENTIALS: ${{ secrets.GOOGLE_CREDENTIALS }}
144
+ run: python upload_to_drive.py
145
+
146
+ # Push to Hugging Face (original functionality)
147
+ - name: Push to Hugging Face hub
148
  env:
149
  HF_TOKEN: ${{ secrets.HF_TOKEN }}
150
  run: |
151
  git config user.name "github-actions"
152
  git config user.email "[email protected]"
153
+ git push --force https://prathameshv07:[email protected]/spaces/prathameshv07/Multilingual-Audio-Intelligence-System main
demo_results/film_podcast_results.json ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "segments": [
3
+ {
4
+ "speaker": "SPEAKER_01",
5
+ "start_time": 6.308468750000001,
6
+ "end_time": 13.46346875,
7
+ "text": "Le film intitulé « The Social Network » traite de la création du site Facebook par Marc Zuckerberg.",
8
+ "translated_text": "The film entitled \"The Social Network\" deals with the creation of the Facebook site by Marc Zuckerberg.",
9
+ "language": "fr"
10
+ },
11
+ {
12
+ "speaker": "SPEAKER_01",
13
+ "start_time": 13.98659375,
14
+ "end_time": 18.47534375,
15
+ "text": "et des problèmes judiciaires que cela a comporté pour le créateur de ceci.",
16
+ "translated_text": "and the judicial problems that this involved for the creator of this.",
17
+ "language": "fr"
18
+ },
19
+ {
20
+ "speaker": "SPEAKER_01",
21
+ "start_time": 19.09971875,
22
+ "end_time": 21.49596875,
23
+ "text": "Ce film est très réaliste et très intéressant.",
24
+ "translated_text": "This film is very realistic and very interesting.",
25
+ "language": "fr"
26
+ },
27
+ {
28
+ "speaker": "SPEAKER_04",
29
+ "start_time": 25.74846875,
30
+ "end_time": 30.405968750000003,
31
+ "text": "La semaine dernière, j'ai été au cinéma voir Paranormal Activity 2.",
32
+ "translated_text": "Last week, I went to the movies to see Paranormal Activity 2.",
33
+ "language": "fr"
34
+ },
35
+ {
36
+ "speaker": "SPEAKER_04",
37
+ "start_time": 31.08096875,
38
+ "end_time": 33.35909375,
39
+ "text": "Ce film est un film d'horreur.",
40
+ "translated_text": "This movie is a horror movie.",
41
+ "language": "fr"
42
+ },
43
+ {
44
+ "speaker": "SPEAKER_04",
45
+ "start_time": 34.28721875,
46
+ "end_time": 42.032843750000005,
47
+ "text": "Même s'il s'agit du deuxième film, il se déroule avant le premier et nous importe des informations sur celui-ci.",
48
+ "translated_text": "Even if it is the second film, it takes place before the first one and imports information about it.",
49
+ "language": "fr"
50
+ },
51
+ {
52
+ "speaker": "SPEAKER_03",
53
+ "start_time": 46.43721875,
54
+ "end_time": 48.86721875,
55
+ "text": "Récemment, j'ai vu le film V-Battery",
56
+ "translated_text": "Recently, I saw the movie V-Battery",
57
+ "language": "fr"
58
+ },
59
+ {
60
+ "speaker": "SPEAKER_03",
61
+ "start_time": 49.15409375,
62
+ "end_time": 50.63909375,
63
+ "text": "qui raconte l'histoire des 4 Rébouins.",
64
+ "translated_text": "which tells the story of the 4 Rebouins.",
65
+ "language": "fr"
66
+ },
67
+ {
68
+ "speaker": "SPEAKER_03",
69
+ "start_time": 51.212843750000005,
70
+ "end_time": 52.39409375,
71
+ "text": "Part pour les égages.",
72
+ "translated_text": "Get out of here for the lights.",
73
+ "language": "fr"
74
+ },
75
+ {
76
+ "speaker": "SPEAKER_03",
77
+ "start_time": 53.05221875,
78
+ "end_time": 55.07721875,
79
+ "text": "pour intérer la vie de garçon de l'un d'autre.",
80
+ "translated_text": "to interest each other's boy's life.",
81
+ "language": "fr"
82
+ },
83
+ {
84
+ "speaker": "SPEAKER_03",
85
+ "start_time": 56.12346875,
86
+ "end_time": 57.439718750000004,
87
+ "text": "qui va se marier prochainement.",
88
+ "translated_text": "who's about to get married.",
89
+ "language": "fr"
90
+ },
91
+ {
92
+ "speaker": "SPEAKER_03",
93
+ "start_time": 58.65471875,
94
+ "end_time": 60.84846875,
95
+ "text": "Histoire se déroule donc à Las Vegas.",
96
+ "translated_text": "History takes place in Las Vegas.",
97
+ "language": "fr"
98
+ },
99
+ {
100
+ "speaker": "SPEAKER_03",
101
+ "start_time": 60.899093750000006,
102
+ "end_time": 62.299718750000004,
103
+ "text": "Et après une folle nuit.",
104
+ "translated_text": "And after a crazy night.",
105
+ "language": "fr"
106
+ },
107
+ {
108
+ "speaker": "SPEAKER_03",
109
+ "start_time": 62.721593750000004,
110
+ "end_time": 65.23596875,
111
+ "text": "Il se réveille au petit matin sans se souvenir de l'aveil.",
112
+ "translated_text": "He wakes up in the early morning without remembering the confession.",
113
+ "language": "fr"
114
+ },
115
+ {
116
+ "speaker": "SPEAKER_03",
117
+ "start_time": 67.58159375000001,
118
+ "end_time": 70.43346875,
119
+ "text": "Le problème est qu'ils ont perdu leur...",
120
+ "translated_text": "The problem is, they lost their...",
121
+ "language": "fr"
122
+ },
123
+ {
124
+ "speaker": "SPEAKER_03",
125
+ "start_time": 70.68659375,
126
+ "end_time": 72.44159375000001,
127
+ "text": "Leur ami qui doit se marier prochainement.",
128
+ "translated_text": "Their friend who's due to get married soon.",
129
+ "language": "fr"
130
+ },
131
+ {
132
+ "speaker": "SPEAKER_03",
133
+ "start_time": 73.25159375,
134
+ "end_time": 75.24284375,
135
+ "text": "Je vous laisse donc imaginer la suite.",
136
+ "translated_text": "So I'll let you imagine the next one.",
137
+ "language": "fr"
138
+ },
139
+ {
140
+ "speaker": "SPEAKER_03",
141
+ "start_time": 76.08659375,
142
+ "end_time": 76.64346875000001,
143
+ "text": "qui est...",
144
+ "translated_text": "Which is...",
145
+ "language": "fr"
146
+ },
147
+ {
148
+ "speaker": "SPEAKER_03",
149
+ "start_time": 76.89659375000001,
150
+ "end_time": 78.41534375,
151
+ "text": "pour ma part très amusante.",
152
+ "translated_text": "For my part, very amusing.",
153
+ "language": "fr"
154
+ },
155
+ {
156
+ "speaker": "SPEAKER_03",
157
+ "start_time": 79.39409375000001,
158
+ "end_time": 81.01409375,
159
+ "text": "Ce n'est pas le film que je préfère.",
160
+ "translated_text": "It's not the movie I like best.",
161
+ "language": "fr"
162
+ },
163
+ {
164
+ "speaker": "SPEAKER_03",
165
+ "start_time": 81.45284375,
166
+ "end_time": 82.92096875,
167
+ "text": "Mais c'est un moment agréable.",
168
+ "translated_text": "But it's a nice time.",
169
+ "language": "fr"
170
+ },
171
+ {
172
+ "speaker": "SPEAKER_00",
173
+ "start_time": 87.52784375,
174
+ "end_time": 94.93596875,
175
+ "text": "Dernièrement, j'ai vu un film qui s'appelle Paranormal Activity 2.",
176
+ "translated_text": "Recently, I saw a movie called Paranormal Activity 2.",
177
+ "language": "fr"
178
+ },
179
+ {
180
+ "speaker": "SPEAKER_00",
181
+ "start_time": 95.25659375000001,
182
+ "end_time": 97.04534375,
183
+ "text": "Il s'agit d'un film d'horreur qui est accessible aux jeunes publics.",
184
+ "translated_text": "It is a horror film that is accessible to young audiences.",
185
+ "language": "fr"
186
+ },
187
+ {
188
+ "speaker": "SPEAKER_00",
189
+ "start_time": 97.80471875,
190
+ "end_time": 106.81596875000001,
191
+ "text": "Gros ne trouve pas de scène choquante.",
192
+ "translated_text": "Fat doesn't find a shocking scene.",
193
+ "language": "fr"
194
+ },
195
+ {
196
+ "speaker": "SPEAKER_00",
197
+ "start_time": 107.59221875,
198
+ "end_time": 115.96221875,
199
+ "text": "Il s'agit de l'histoire d'une famille américaine qui vit près de l'eau sans pacifique sur la côte ouest",
200
+ "translated_text": "This is the story of an American family living near the unpeaceful water on the west coast.",
201
+ "language": "fr"
202
+ },
203
+ {
204
+ "speaker": "SPEAKER_00",
205
+ "start_time": 116.72159375000001,
206
+ "end_time": 122.84721875000001,
207
+ "text": "et qui se trouve en proie à des phénomènes paranormaux.",
208
+ "translated_text": "and is plagued by paranormal phenomena.",
209
+ "language": "fr"
210
+ },
211
+ {
212
+ "speaker": "SPEAKER_07",
213
+ "start_time": 127.62284375,
214
+ "end_time": 128.95596875,
215
+ "text": "Peu à peu, tous les membres de cette famille vont disparaître mystérieusement, les uns après les autres, sans que l'on y trouve de véritables explications.",
216
+ "translated_text": "Gradually, all the members of this family will disappear mysteriously, one after another, without any real explanation.",
217
+ "language": "fr"
218
+ },
219
+ {
220
+ "speaker": "SPEAKER_07",
221
+ "start_time": 129.14159375,
222
+ "end_time": 130.71096875,
223
+ "text": "Il s'agit d'un bon film, rythmé et agréable à suivre, devant lequel on ne s'ennuie pas une seule seconde.",
224
+ "translated_text": "It is a good film, rhythmic and pleasant to follow, before which one does not miss one second.",
225
+ "language": "fr"
226
+ },
227
+ {
228
+ "speaker": "SPEAKER_07",
229
+ "start_time": 131.40284375000002,
230
+ "end_time": 135.79034375,
231
+ "text": "La semaine dernière",
232
+ "translated_text": "Last week",
233
+ "language": "fr"
234
+ },
235
+ {
236
+ "speaker": "SPEAKER_07",
237
+ "start_time": 136.39784375000002,
238
+ "end_time": 139.14846875,
239
+ "text": "J'ai vu le film La Ravre.",
240
+ "translated_text": "I saw the movie La Ravre.",
241
+ "language": "fr"
242
+ },
243
+ {
244
+ "speaker": "SPEAKER_02",
245
+ "start_time": 143.48534375,
246
+ "end_time": 145.47659375,
247
+ "text": "il parle de la 2e guerre mondiale et de la vie des Juifs en France.",
248
+ "translated_text": "He talks about the Second World War and the life of the Jews in France.",
249
+ "language": "fr"
250
+ },
251
+ {
252
+ "speaker": "SPEAKER_02",
253
+ "start_time": 145.71284375000002,
254
+ "end_time": 147.04596875000001,
255
+ "text": "Ce film est très intéressant et très réaliste.",
256
+ "translated_text": "This film is very interesting and very realistic.",
257
+ "language": "fr"
258
+ },
259
+ {
260
+ "speaker": "SPEAKER_02",
261
+ "start_time": 147.21471875,
262
+ "end_time": 148.91909375,
263
+ "text": "Récemment, j'ai vu Inception.",
264
+ "translated_text": "Recently, I saw Inception.",
265
+ "language": "fr"
266
+ },
267
+ {
268
+ "speaker": "SPEAKER_02",
269
+ "start_time": 149.20596875,
270
+ "end_time": 153.76221875000002,
271
+ "text": "Ce film est intéressant.",
272
+ "translated_text": "This movie is interesting.",
273
+ "language": "fr"
274
+ },
275
+ {
276
+ "speaker": "SPEAKER_02",
277
+ "start_time": 154.52159375000002,
278
+ "end_time": 155.88846875000002,
279
+ "text": "pour ces graphismes et ce qu'on scénario.",
280
+ "translated_text": "for these graphics and what we're scenarioing.",
281
+ "language": "fr"
282
+ },
283
+ {
284
+ "speaker": "SPEAKER_02",
285
+ "start_time": 156.58034375,
286
+ "end_time": 158.38596875000002,
287
+ "text": "L'idée de directrice est tendance d'entrer dans les rêves d'une personne pour y implanter une idée.",
288
+ "translated_text": "The idea of a director tends to enter a person's dreams to implant an idea.",
289
+ "language": "fr"
290
+ },
291
+ {
292
+ "speaker": "SPEAKER_05",
293
+ "start_time": 162.73971875,
294
+ "end_time": 166.85721875000002,
295
+ "text": "Les acteurs sont très bons.",
296
+ "translated_text": "The actors are very good.",
297
+ "language": "fr"
298
+ },
299
+ {
300
+ "speaker": "SPEAKER_05",
301
+ "start_time": 167.49846875,
302
+ "end_time": 177.04971875,
303
+ "text": "Et le film regarde les bannes hittées.",
304
+ "translated_text": "And the movie looks at the hit banners.",
305
+ "language": "fr"
306
+ },
307
+ {
308
+ "speaker": "SPEAKER_06",
309
+ "start_time": 181.31909375,
310
+ "end_time": 184.94721875000002,
311
+ "text": "La semaine dernière, je suis allée au cinéma pour voir paranormal activité.",
312
+ "translated_text": "Last week, I went to the movies to see paranormal activity.",
313
+ "language": "fr"
314
+ },
315
+ {
316
+ "speaker": "SPEAKER_06",
317
+ "start_time": 185.50409375,
318
+ "end_time": 191.19096875000002,
319
+ "text": "Ce film d'horreur apportait me le paranormal.",
320
+ "translated_text": "This horror movie brought me the paranormal.",
321
+ "language": "fr"
322
+ },
323
+ {
324
+ "speaker": "SPEAKER_06",
325
+ "start_time": 191.88284375,
326
+ "end_time": 198.12659375,
327
+ "text": "En effet, une famille subit des phénomènes paranormaux.",
328
+ "translated_text": "Indeed, a family undergoes paranormal phenomena.",
329
+ "language": "fr"
330
+ },
331
+ {
332
+ "speaker": "SPEAKER_08",
333
+ "start_time": 202.81784375,
334
+ "end_time": 205.07909375000003,
335
+ "text": "Tout au long du film, l'angoisse et le suspect, Reign.",
336
+ "translated_text": "Throughout the movie, anxiety and the suspect, Reign.",
337
+ "language": "fr"
338
+ },
339
+ {
340
+ "speaker": "SPEAKER_08",
341
+ "start_time": 205.31534375,
342
+ "end_time": 207.27284375000002,
343
+ "text": "Le dernier film que j'ai vu au cinéma était Wall Street.",
344
+ "translated_text": "The last movie I saw in the movies was Wall Street.",
345
+ "language": "fr"
346
+ },
347
+ {
348
+ "speaker": "SPEAKER_08",
349
+ "start_time": 208.23471875,
350
+ "end_time": 211.66034375,
351
+ "text": "J'ai trouvé ce film très intéressant car il parlait de l'univers financier pendant la crise.",
352
+ "translated_text": "I found this film very interesting because it was about the financial universe during the crisis.",
353
+ "language": "fr"
354
+ },
355
+ {
356
+ "speaker": "SPEAKER_08",
357
+ "start_time": 212.53784375,
358
+ "end_time": 214.07346875000002,
359
+ "text": "Il y avait aussi une histoire de famille qui rajoutait de la romance dans ce film qui décrit un monde très masculin.",
360
+ "translated_text": "There was also a family story that added romance in this film that describes a very masculine world.",
361
+ "language": "fr"
362
+ },
363
+ {
364
+ "speaker": "SPEAKER_08",
365
+ "start_time": 214.27596875,
366
+ "end_time": 215.40659375,
367
+ "text": "La semaine dernière, je suis allée au cinéma.",
368
+ "translated_text": "Last week, I went to the movies.",
369
+ "language": "fr"
370
+ }
371
+ ],
372
+ "summary": {
373
+ "total_duration": 230.478,
374
+ "num_speakers": 9,
375
+ "num_segments": 46,
376
+ "languages": [
377
+ "fr"
378
+ ],
379
+ "processing_time": 401.4734380245209
380
+ }
381
+ }
demo_results/yuri_kizaki_results.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "segments": [
3
+ {
4
+ "speaker": "SPEAKER_00",
5
+ "start_time": 0.40221875,
6
+ "end_time": 4.77284375,
7
+ "text": "音声メッセージが既存のウェブサイトを超えたコミュニケーションを実現。",
8
+ "translated_text": "The audio message will bring out communication beyond the existing website.",
9
+ "language": "ja"
10
+ },
11
+ {
12
+ "speaker": "SPEAKER_00",
13
+ "start_time": 5.5153437499999995,
14
+ "end_time": 7.388468750000001,
15
+ "text": "目で見るだけだったウェブサイトに",
16
+ "translated_text": "I'm going to show you what I'm doing.",
17
+ "language": "ja"
18
+ },
19
+ {
20
+ "speaker": "SPEAKER_00",
21
+ "start_time": 7.624718750000001,
22
+ "end_time": 9.852218750000002,
23
+ "text": "音声情報をインクルードすることで",
24
+ "translated_text": "We're going to be able to do that in the next video.",
25
+ "language": "ja"
26
+ },
27
+ {
28
+ "speaker": "SPEAKER_00",
29
+ "start_time": 10.274093750000002,
30
+ "end_time": 12.31596875,
31
+ "text": "情報に新しい価値を与え",
32
+ "translated_text": "And that's what we're going to do.",
33
+ "language": "ja"
34
+ },
35
+ {
36
+ "speaker": "SPEAKER_00",
37
+ "start_time": 12.36659375,
38
+ "end_time": 14.72909375,
39
+ "text": "他者との差別化に効果を発揮します",
40
+ "translated_text": "It's not just about being different from other people.",
41
+ "language": "ja"
42
+ },
43
+ {
44
+ "speaker": "SPEAKER_00",
45
+ "start_time": 15.67409375,
46
+ "end_time": 16.06221875,
47
+ "text": "また!",
48
+ "translated_text": "Again!",
49
+ "language": "ja"
50
+ },
51
+ {
52
+ "speaker": "SPEAKER_00",
53
+ "start_time": 16.33221875,
54
+ "end_time": 21.58034375,
55
+ "text": "文字やグラフィックだけでは伝えることの難しかった感情やニュアンスを表現し",
56
+ "translated_text": "It's not just writing, it's graphic.",
57
+ "language": "ja"
58
+ },
59
+ {
60
+ "speaker": "SPEAKER_00",
61
+ "start_time": 22.06971875,
62
+ "end_time": 24.44909375,
63
+ "text": "ユーザーの興味と理解を深めます。",
64
+ "translated_text": "It will enhance the user's interest and understanding.",
65
+ "language": "ja"
66
+ },
67
+ {
68
+ "speaker": "SPEAKER_00",
69
+ "start_time": 25.47846875,
70
+ "end_time": 25.832843750000002,
71
+ "text": "見る",
72
+ "translated_text": "See.",
73
+ "language": "ja"
74
+ },
75
+ {
76
+ "speaker": "SPEAKER_00",
77
+ "start_time": 26.204093750000002,
78
+ "end_time": 26.65971875,
79
+ "text": "聞く",
80
+ "translated_text": "Listen.",
81
+ "language": "ja"
82
+ },
83
+ {
84
+ "speaker": "SPEAKER_00",
85
+ "start_time": 26.96346875,
86
+ "end_time": 28.617218750000003,
87
+ "text": "理解するウェブサイトへ",
88
+ "translated_text": "To a website that understands.",
89
+ "language": "ja"
90
+ },
91
+ {
92
+ "speaker": "SPEAKER_00",
93
+ "start_time": 29.24159375,
94
+ "end_time": 31.90784375,
95
+ "text": "音声メッセージが人の心を動かします",
96
+ "translated_text": "And that's what I'm talking about.",
97
+ "language": "ja"
98
+ }
99
+ ],
100
+ "summary": {
101
+ "total_duration": 32.366,
102
+ "num_speakers": 1,
103
+ "num_segments": 12,
104
+ "languages": [
105
+ "ja"
106
+ ],
107
+ "processing_time": 88.7896044254303
108
+ }
109
+ }
model_preloader.py CHANGED
@@ -1,14 +1,12 @@
1
  #!/usr/bin/env python3
2
  """
3
- Model Preloader for Multilingual Audio Intelligence System
4
 
5
- This module handles downloading and initializing all AI models before the application starts.
6
- It provides progress tracking, caching, and error handling for model loading.
7
-
8
- Models loaded:
9
- - pyannote.audio for speaker diarization
10
- - faster-whisper for speech recognition
11
- - mBART50 for neural machine translation
12
  """
13
 
14
  import os
@@ -41,7 +39,7 @@ logger = logging.getLogger(__name__)
41
  console = Console()
42
 
43
  class ModelPreloader:
44
- """Comprehensive model preloader with progress tracking and caching."""
45
 
46
  def __init__(self, cache_dir: str = "./model_cache", device: str = "auto"):
47
  self.cache_dir = Path(cache_dir)
@@ -96,6 +94,154 @@ class ModelPreloader:
96
  }
97
  }
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  def get_system_info(self) -> Dict[str, Any]:
100
  """Get system information for optimal model loading."""
101
  return {
@@ -173,18 +319,28 @@ class ModelPreloader:
173
  return None
174
 
175
  def load_whisper_model(self, task_id: str) -> Optional[WhisperModel]:
176
- """Load Whisper speech recognition model."""
177
  try:
178
  console.print(f"[yellow]Loading Whisper model (small)...[/yellow]")
179
 
180
  # Determine compute type based on device
181
  compute_type = "int8" if self.device == "cpu" else "float16"
 
 
 
 
 
 
 
 
 
182
 
 
183
  model = WhisperModel(
184
  "small",
185
  device=self.device,
186
  compute_type=compute_type,
187
- download_root=str(self.cache_dir / "whisper")
188
  )
189
 
190
  # Test the model with a dummy audio array
@@ -203,93 +359,23 @@ class ModelPreloader:
203
  return None
204
 
205
  def load_mbart_model(self, task_id: str) -> Optional[Dict[str, Any]]:
206
- """Load mBART translation model."""
207
- try:
208
- console.print(f"[yellow]Loading mBART translation model...[/yellow]")
209
-
210
- model_name = "facebook/mbart-large-50-many-to-many-mmt"
211
- cache_path = self.cache_dir / "mbart"
212
- cache_path.mkdir(exist_ok=True)
213
-
214
- # Load tokenizer
215
- tokenizer = AutoTokenizer.from_pretrained(
216
- model_name,
217
- cache_dir=str(cache_path)
218
- )
219
-
220
- # Load model
221
- model = AutoModelForSeq2SeqLM.from_pretrained(
222
- model_name,
223
- cache_dir=str(cache_path),
224
- torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
225
- )
226
-
227
- if self.device != "cpu":
228
- model = model.to(self.device)
229
-
230
- # Test the model
231
- test_input = tokenizer("Hello world", return_tensors="pt")
232
- if self.device != "cpu":
233
- test_input = {k: v.to(self.device) for k, v in test_input.items()}
234
-
235
- with torch.no_grad():
236
- output = model.generate(**test_input, max_length=10)
237
-
238
- console.print(f"[green]✓ mBART model loaded successfully on {self.device}[/green]")
239
-
240
- return {
241
- "model": model,
242
- "tokenizer": tokenizer
243
- }
244
-
245
- except Exception as e:
246
- console.print(f"[red]✗ Failed to load mBART model: {e}[/red]")
247
- logger.error(f"mBART loading failed: {e}")
248
- return None
249
 
250
  def load_opus_mt_model(self, task_id: str, model_name: str) -> Optional[Dict[str, Any]]:
251
- """Load Opus-MT translation model."""
252
- try:
253
- console.print(f"[yellow]Loading Opus-MT model: {model_name}...[/yellow]")
254
-
255
- cache_path = self.cache_dir / "opus_mt" / model_name.replace("/", "--")
256
- cache_path.mkdir(parents=True, exist_ok=True)
257
-
258
- # Load tokenizer
259
- tokenizer = AutoTokenizer.from_pretrained(
260
- model_name,
261
- cache_dir=str(cache_path)
262
- )
263
-
264
- # Load model
265
- model = AutoModelForSeq2SeqLM.from_pretrained(
266
- model_name,
267
- cache_dir=str(cache_path),
268
- torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
269
- )
270
-
271
- if self.device != "cpu":
272
- model = model.to(self.device)
273
-
274
- # Test the model
275
- test_input = tokenizer("Hello world", return_tensors="pt")
276
- if self.device != "cpu":
277
- test_input = {k: v.to(self.device) for k, v in test_input.items()}
278
-
279
- with torch.no_grad():
280
- output = model.generate(**test_input, max_length=10)
281
-
282
- console.print(f"[green]✓ {model_name} loaded successfully on {self.device}[/green]")
283
-
284
- return {
285
- "model": model,
286
- "tokenizer": tokenizer
287
- }
288
-
289
- except Exception as e:
290
- console.print(f"[red]✗ Failed to load {model_name}: {e}[/red]")
291
- logger.error(f"Opus-MT loading failed: {e}")
292
- return None
293
 
294
  def preload_all_models(self) -> Dict[str, Any]:
295
  """Preload all models with progress tracking."""
@@ -465,4 +551,4 @@ def main():
465
 
466
  if __name__ == "__main__":
467
  success = main()
468
- sys.exit(0 if success else 1)
 
1
  #!/usr/bin/env python3
2
  """
3
+ Model Preloader for Multilingual Audio Intelligence System - Enhanced Version
4
 
5
+ Key improvements:
6
+ 1. Smart local cache detection with corruption checking
7
+ 2. Fallback to download if local files don't exist or are corrupted
8
+ 3. Better error handling and retry mechanisms
9
+ 4. Consistent approach across all model types
 
 
10
  """
11
 
12
  import os
 
39
  console = Console()
40
 
41
  class ModelPreloader:
42
+ """Comprehensive model preloader with enhanced local cache detection."""
43
 
44
  def __init__(self, cache_dir: str = "./model_cache", device: str = "auto"):
45
  self.cache_dir = Path(cache_dir)
 
94
  }
95
  }
96
 
97
+ def check_local_model_files(self, model_name: str, model_type: str) -> bool:
98
+ """
99
+ Check if model files exist locally and are not corrupted.
100
+ Returns True if valid local files exist, False otherwise.
101
+ """
102
+ try:
103
+ if model_type == "whisper":
104
+ # For Whisper, check the Systran faster-whisper cache
105
+ whisper_cache = self.cache_dir / "whisper" / "models--Systran--faster-whisper-small"
106
+ required_files = ["config.json", "model.bin", "tokenizer.json", "vocabulary.txt"]
107
+
108
+ # Find the snapshot directory
109
+ snapshots_dir = whisper_cache / "snapshots"
110
+ if not snapshots_dir.exists():
111
+ return False
112
+
113
+ # Check for any snapshot directory (there should be one)
114
+ snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
115
+ if not snapshot_dirs:
116
+ return False
117
+
118
+ # Check if required files exist in the snapshot
119
+ snapshot_path = snapshot_dirs[0] # Use the first (and likely only) snapshot
120
+ for file in required_files:
121
+ file_path = snapshot_path / file
122
+ if not file_path.exists() or file_path.stat().st_size == 0:
123
+ return False
124
+
125
+ return True
126
+
127
+ elif model_type in ["mbart", "opus_mt"]:
128
+ # For Transformers models, check the HuggingFace cache structure
129
+ if model_type == "mbart":
130
+ model_cache_path = self.cache_dir / "mbart" / f"models--{model_name.replace('/', '--')}"
131
+ else:
132
+ model_cache_path = self.cache_dir / "opus_mt" / f"{model_name.replace('/', '--')}" / f"models--{model_name.replace('/', '--')}"
133
+
134
+ required_files = ["config.json", "tokenizer_config.json"]
135
+ # Also check for model files (either .bin or .safetensors)
136
+ model_files = ["pytorch_model.bin", "model.safetensors"]
137
+
138
+ # Find the snapshot directory
139
+ snapshots_dir = model_cache_path / "snapshots"
140
+ if not snapshots_dir.exists():
141
+ return False
142
+
143
+ # Check for any snapshot directory
144
+ snapshot_dirs = [d for d in snapshots_dir.iterdir() if d.is_dir()]
145
+ if not snapshot_dirs:
146
+ return False
147
+
148
+ # Check the latest snapshot
149
+ snapshot_path = max(snapshot_dirs, key=lambda x: x.stat().st_mtime)
150
+
151
+ # Check required config files
152
+ for file in required_files:
153
+ file_path = snapshot_path / file
154
+ if not file_path.exists() or file_path.stat().st_size == 0:
155
+ return False
156
+
157
+ # Check for at least one model file
158
+ model_file_exists = any(
159
+ (snapshot_path / model_file).exists() and (snapshot_path / model_file).stat().st_size > 0
160
+ for model_file in model_files
161
+ )
162
+
163
+ return model_file_exists
164
+
165
+ elif model_type == "pyannote":
166
+ # For pyannote, it uses HuggingFace hub caching, harder to predict exact path
167
+ # We'll rely on the transformers library's cache detection
168
+ return False # Let it attempt to load and handle caching automatically
169
+
170
+ except Exception as e:
171
+ logger.warning(f"Error checking local files for {model_name}: {e}")
172
+ return False
173
+
174
+ return False
175
+
176
+ def load_transformers_model_with_cache_check(self, model_name: str, cache_path: Path, model_type: str = "seq2seq") -> Optional[Dict[str, Any]]:
177
+ """
178
+ Load transformers model with intelligent cache checking and fallback.
179
+ """
180
+ try:
181
+ # First, check if we have valid local files
182
+ has_local_files = self.check_local_model_files(model_name, "mbart" if "mbart" in model_name else "opus_mt")
183
+
184
+ if has_local_files:
185
+ console.print(f"[green]Found valid local cache for {model_name}, loading from cache...[/green]")
186
+ try:
187
+ # Try loading from local cache first
188
+ tokenizer = AutoTokenizer.from_pretrained(
189
+ model_name,
190
+ cache_dir=str(cache_path),
191
+ local_files_only=True
192
+ )
193
+
194
+ model = AutoModelForSeq2SeqLM.from_pretrained(
195
+ model_name,
196
+ cache_dir=str(cache_path),
197
+ local_files_only=True,
198
+ torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
199
+ )
200
+
201
+ console.print(f"[green]✓ Successfully loaded {model_name} from local cache[/green]")
202
+
203
+ except Exception as e:
204
+ console.print(f"[yellow]Local cache load failed for {model_name}, will download: {e}[/yellow]")
205
+ has_local_files = False # Force download
206
+
207
+ if not has_local_files:
208
+ console.print(f"[yellow]No valid local cache for {model_name}, downloading...[/yellow]")
209
+ # Load with download (default behavior)
210
+ tokenizer = AutoTokenizer.from_pretrained(
211
+ model_name,
212
+ cache_dir=str(cache_path)
213
+ )
214
+
215
+ model = AutoModelForSeq2SeqLM.from_pretrained(
216
+ model_name,
217
+ cache_dir=str(cache_path),
218
+ torch_dtype=torch.float32 if self.device == "cpu" else torch.float16
219
+ )
220
+
221
+ console.print(f"[green]✓ Successfully downloaded and loaded {model_name}[/green]")
222
+
223
+ # Move to device if needed
224
+ if self.device != "cpu":
225
+ model = model.to(self.device)
226
+
227
+ # Test the model
228
+ test_input = tokenizer("Hello world", return_tensors="pt")
229
+ if self.device != "cpu":
230
+ test_input = {k: v.to(self.device) for k, v in test_input.items()}
231
+
232
+ with torch.no_grad():
233
+ output = model.generate(**test_input, max_length=10)
234
+
235
+ return {
236
+ "model": model,
237
+ "tokenizer": tokenizer
238
+ }
239
+
240
+ except Exception as e:
241
+ console.print(f"[red]✗ Failed to load {model_name}: {e}[/red]")
242
+ logger.error(f"Model loading failed for {model_name}: {e}")
243
+ return None
244
+
245
  def get_system_info(self) -> Dict[str, Any]:
246
  """Get system information for optimal model loading."""
247
  return {
 
319
  return None
320
 
321
  def load_whisper_model(self, task_id: str) -> Optional[WhisperModel]:
322
+ """Load Whisper speech recognition model with enhanced cache checking."""
323
  try:
324
  console.print(f"[yellow]Loading Whisper model (small)...[/yellow]")
325
 
326
  # Determine compute type based on device
327
  compute_type = "int8" if self.device == "cpu" else "float16"
328
+ whisper_cache_dir = self.cache_dir / "whisper"
329
+
330
+ # Check if we have valid local files
331
+ has_local_files = self.check_local_model_files("small", "whisper")
332
+
333
+ if has_local_files:
334
+ console.print(f"[green]Found valid local Whisper cache, loading from cache...[/green]")
335
+ else:
336
+ console.print(f"[yellow]No valid local Whisper cache found, will download...[/yellow]")
337
 
338
+ # faster-whisper handles caching automatically, but we specify our cache dir
339
  model = WhisperModel(
340
  "small",
341
  device=self.device,
342
  compute_type=compute_type,
343
+ download_root=str(whisper_cache_dir)
344
  )
345
 
346
  # Test the model with a dummy audio array
 
359
  return None
360
 
361
  def load_mbart_model(self, task_id: str) -> Optional[Dict[str, Any]]:
362
+ """Load mBART translation model with enhanced cache checking."""
363
+ console.print(f"[yellow]Loading mBART translation model...[/yellow]")
364
+
365
+ model_name = "facebook/mbart-large-50-many-to-many-mmt"
366
+ cache_path = self.cache_dir / "mbart"
367
+ cache_path.mkdir(exist_ok=True)
368
+
369
+ return self.load_transformers_model_with_cache_check(model_name, cache_path, "seq2seq")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
  def load_opus_mt_model(self, task_id: str, model_name: str) -> Optional[Dict[str, Any]]:
372
+ """Load Opus-MT translation model with enhanced cache checking."""
373
+ console.print(f"[yellow]Loading Opus-MT model: {model_name}...[/yellow]")
374
+
375
+ cache_path = self.cache_dir / "opus_mt" / model_name.replace("/", "--")
376
+ cache_path.mkdir(parents=True, exist_ok=True)
377
+
378
+ return self.load_transformers_model_with_cache_check(model_name, cache_path, "seq2seq")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
 
380
  def preload_all_models(self) -> Dict[str, Any]:
381
  """Preload all models with progress tracking."""
 
551
 
552
  if __name__ == "__main__":
553
  success = main()
554
+ sys.exit(0 if success else 1)