openfree commited on
Commit
c32ffb2
·
verified ·
1 Parent(s): ff04ecb

Delete app-old-backup.py

Browse files
Files changed (1) hide show
  1. app-old-backup.py +0 -355
app-old-backup.py DELETED
@@ -1,355 +0,0 @@
1
- import spaces
2
- import logging
3
- from datetime import datetime
4
- from pathlib import Path
5
- import gradio as gr
6
- import torch
7
- import torchaudio
8
- import os
9
- import requests
10
- from transformers import pipeline
11
- import tempfile
12
- import numpy as np
13
- from einops import rearrange
14
- import cv2
15
- from scipy.io import wavfile
16
- import librosa
17
- import json
18
- from typing import Optional, Tuple, List
19
- import atexit
20
-
21
- try:
22
- import mmaudio
23
- except ImportError:
24
- os.system("pip install -e .")
25
- import mmaudio
26
-
27
- from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video,
28
- setup_eval_logging)
29
- from mmaudio.model.flow_matching import FlowMatching
30
- from mmaudio.model.networks import MMAudio, get_my_mmaudio
31
- from mmaudio.model.sequence_config import SequenceConfig
32
- from mmaudio.model.utils.features_utils import FeaturesUtils
33
-
34
- # 로깅 설정
35
- logging.basicConfig(
36
- level=logging.INFO,
37
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
38
- )
39
- log = logging.getLogger()
40
-
41
- # CUDA 설정
42
- if torch.cuda.is_available():
43
- device = torch.device("cuda")
44
- torch.backends.cuda.matmul.allow_tf32 = True
45
- torch.backends.cudnn.allow_tf32 = True
46
- torch.backends.cudnn.benchmark = True
47
- else:
48
- device = torch.device("cpu")
49
-
50
- dtype = torch.bfloat16
51
-
52
- # 모델 설정
53
- model: ModelConfig = all_model_cfg['large_44k_v2']
54
- model.download_if_needed()
55
- output_dir = Path('./output/gradio')
56
-
57
- setup_eval_logging()
58
-
59
- # 번역기 및 Pixabay API 설정
60
- translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en", device="cpu")
61
- PIXABAY_API_KEY = "33492762-a28a596ec4f286f84cd328b17"
62
-
63
-
64
-
65
- def cleanup_temp_files():
66
- temp_dir = tempfile.gettempdir()
67
- for file in os.listdir(temp_dir):
68
- if file.endswith(('.mp4', '.flac')):
69
- try:
70
- os.remove(os.path.join(temp_dir, file))
71
- except:
72
- pass
73
-
74
- atexit.register(cleanup_temp_files)
75
-
76
- def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
77
- with torch.cuda.device(device):
78
- seq_cfg = model.seq_cfg
79
- net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval()
80
- net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
81
- log.info(f'Loaded weights from {model.model_path}')
82
-
83
- feature_utils = FeaturesUtils(
84
- tod_vae_ckpt=model.vae_path,
85
- synchformer_ckpt=model.synchformer_ckpt,
86
- enable_conditions=True,
87
- mode=model.mode,
88
- bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
89
- need_vae_encoder=False
90
- ).to(device, dtype).eval()
91
-
92
- return net, feature_utils, seq_cfg
93
-
94
- net, feature_utils, seq_cfg = get_model()
95
-
96
-
97
- # search_videos 함수 수정
98
- @torch.no_grad()
99
- def search_videos(query):
100
- try:
101
- # CPU에서 번역 실행
102
- query = translate_prompt(query)
103
- return search_pixabay_videos(query, PIXABAY_API_KEY)
104
- except Exception as e:
105
- logging.error(f"Video search error: {e}")
106
- return []
107
-
108
- # translate_prompt 함수도 수정
109
- def translate_prompt(text):
110
- try:
111
- if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text):
112
- # CPU에서 번역 실행
113
- with torch.no_grad():
114
- translation = translator(text)[0]['translation_text']
115
- return translation
116
- return text
117
- except Exception as e:
118
- logging.error(f"Translation error: {e}")
119
- return text
120
-
121
- # 디바이스 설정 부분 수정
122
- if torch.cuda.is_available():
123
- device = torch.device("cuda")
124
- torch.backends.cuda.matmul.allow_tf32 = True
125
- torch.backends.cudnn.allow_tf32 = True
126
- torch.backends.cudnn.benchmark = True
127
- else:
128
- device = torch.device("cpu")
129
-
130
- # 번역기 설정 수정
131
- translator = pipeline("translation",
132
- model="Helsinki-NLP/opus-mt-ko-en",
133
- device="cpu") # 명시적으로 CPU 지정
134
-
135
-
136
-
137
- def search_pixabay_videos(query, api_key):
138
- try:
139
- base_url = "https://pixabay.com/api/videos/"
140
- params = {
141
- "key": api_key,
142
- "q": query,
143
- "per_page": 40
144
- }
145
-
146
- response = requests.get(base_url, params=params)
147
- if response.status_code == 200:
148
- data = response.json()
149
- return [video['videos']['large']['url'] for video in data.get('hits', [])]
150
- return []
151
- except Exception as e:
152
- logging.error(f"Pixabay API error: {e}")
153
- return []
154
-
155
-
156
- @spaces.GPU
157
- @torch.inference_mode()
158
- def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int,
159
- cfg_strength: float, duration: float):
160
- prompt = translate_prompt(prompt)
161
- negative_prompt = translate_prompt(negative_prompt)
162
-
163
- rng = torch.Generator(device=device)
164
- rng.manual_seed(seed)
165
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
166
-
167
- clip_frames, sync_frames, duration = load_video(video, duration)
168
- clip_frames = clip_frames.unsqueeze(0)
169
- sync_frames = sync_frames.unsqueeze(0)
170
- seq_cfg.duration = duration
171
- net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
172
-
173
- audios = generate(clip_frames,
174
- sync_frames, [prompt],
175
- negative_text=[negative_prompt],
176
- feature_utils=feature_utils,
177
- net=net,
178
- fm=fm,
179
- rng=rng,
180
- cfg_strength=cfg_strength)
181
- audio = audios.float().cpu()[0]
182
-
183
- video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
184
- make_video(video,
185
- video_save_path,
186
- audio,
187
- sampling_rate=seq_cfg.sampling_rate,
188
- duration_sec=seq_cfg.duration)
189
- return video_save_path
190
-
191
- @spaces.GPU
192
- @torch.inference_mode()
193
- def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float,
194
- duration: float):
195
- prompt = translate_prompt(prompt)
196
- negative_prompt = translate_prompt(negative_prompt)
197
-
198
- rng = torch.Generator(device=device)
199
- rng.manual_seed(seed)
200
- fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
201
-
202
- clip_frames = sync_frames = None
203
- seq_cfg.duration = duration
204
- net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
205
-
206
- audios = generate(clip_frames,
207
- sync_frames, [prompt],
208
- negative_text=[negative_prompt],
209
- feature_utils=feature_utils,
210
- net=net,
211
- fm=fm,
212
- rng=rng,
213
- cfg_strength=cfg_strength)
214
- audio = audios.float().cpu()[0]
215
-
216
- audio_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.flac').name
217
- torchaudio.save(audio_save_path, audio, seq_cfg.sampling_rate)
218
- return audio_save_path
219
-
220
-
221
-
222
- # CSS 스타일 수정
223
- custom_css = """
224
- .gradio-container {
225
- background: linear-gradient(45deg, #1a1a1a, #2a2a2a);
226
- border-radius: 15px;
227
- box-shadow: 0 8px 32px rgba(0,0,0,0.3);
228
- color: #e0e0e0;
229
- }
230
-
231
- .input-container, .output-container {
232
- background: rgba(40, 40, 40, 0.95);
233
- backdrop-filter: blur(10px);
234
- border-radius: 10px;
235
- padding: 20px;
236
- transform-style: preserve-3d;
237
- transition: transform 0.3s ease;
238
- border: 1px solid rgba(255, 255, 255, 0.1);
239
- }
240
-
241
- .input-container:hover {
242
- transform: translateZ(20px);
243
- box-shadow: 0 8px 32px rgba(0,0,0,0.5);
244
- }
245
-
246
- .gallery-item {
247
- transition: transform 0.3s ease;
248
- border-radius: 8px;
249
- overflow: hidden;
250
- background: #2a2a2a;
251
- }
252
-
253
- .gallery-item:hover {
254
- transform: scale(1.05);
255
- box-shadow: 0 4px 15px rgba(0,0,0,0.4);
256
- }
257
-
258
- .tabs {
259
- background: rgba(30, 30, 30, 0.95);
260
- border-radius: 10px;
261
- padding: 10px;
262
- border: 1px solid rgba(255, 255, 255, 0.05);
263
- }
264
-
265
- button {
266
- background: linear-gradient(45deg, #2196F3, #1976D2);
267
- border: none;
268
- border-radius: 5px;
269
- transition: all 0.3s ease;
270
- color: white;
271
- }
272
-
273
- button:hover {
274
- transform: translateY(-2px);
275
- box-shadow: 0 4px 15px rgba(33,150,243,0.3);
276
- }
277
-
278
- /* 텍스트 입력 필드 스타일 */
279
- textarea, input[type="text"], input[type="number"] {
280
- background: rgba(30, 30, 30, 0.95) !important;
281
- border: 1px solid rgba(255, 255, 255, 0.1) !important;
282
- color: #e0e0e0 !important;
283
- border-radius: 5px !important;
284
- }
285
-
286
- /* 레이블 스타일 */
287
- label {
288
- color: #e0e0e0 !important;
289
- }
290
-
291
- /* 갤러리 그리드 스타일 */
292
- .gallery {
293
- background: rgba(30, 30, 30, 0.95);
294
- padding: 15px;
295
- border-radius: 10px;
296
- border: 1px solid rgba(255, 255, 255, 0.05);
297
- }
298
- """
299
-
300
- text_to_audio_tab = gr.Interface(
301
- fn=text_to_audio,
302
- inputs=[
303
- gr.Textbox(label="Prompt(한글지원)"),
304
- gr.Textbox(label="Negative Prompt"),
305
- gr.Number(label="Seed", value=0),
306
- gr.Number(label="Steps", value=25),
307
- gr.Number(label="Guidance Scale", value=4.5),
308
- gr.Number(label="Duration (sec)", value=8),
309
- ],
310
- outputs=gr.Audio(label="Generated Audio"),
311
- css=custom_css
312
- )
313
-
314
-
315
- video_to_audio_tab = gr.Interface(
316
- fn=video_to_audio,
317
- inputs=[
318
- gr.Video(label="Input Video"),
319
- gr.Textbox(label="Prompt(한글지원)"),
320
- gr.Textbox(label="Negative Prompt", value="music"),
321
- gr.Number(label="Seed", value=0),
322
- gr.Number(label="Steps", value=25),
323
- gr.Number(label="Guidance Scale", value=4.5),
324
- gr.Number(label="Duration (sec)", value=8),
325
- ],
326
- outputs=gr.Video(label="Generated Result"),
327
- css=custom_css
328
- )
329
-
330
- # 인터페이스 정의 수정 (영문으로 변경)
331
- video_search_tab = gr.Interface(
332
- fn=search_videos,
333
- inputs=gr.Textbox(label="Search Query(한글지원)"),
334
- outputs=gr.Gallery(label="Search Results", columns=4, rows=20),
335
- css=custom_css,
336
- api_name=False
337
- )
338
-
339
-
340
-
341
- # CSS 스타일 수정
342
- css = """
343
- footer {
344
- visibility: hidden;
345
- }
346
- """ + custom_css # 기존 custom_css와 새로운 css를 결합
347
-
348
- # 메인 실행 부분 수정
349
- if __name__ == "__main__":
350
- gr.TabbedInterface(
351
- [video_search_tab, video_to_audio_tab, text_to_audio_tab],
352
- ["Video Search", "Video-to-Audio", "Text-to-Audio"],
353
- theme="Yntec/HaleyCH_Theme_Orange",
354
- css=css
355
- ).launch(allowed_paths=[output_dir])