Spaces:
Build error
Build error
import torch | |
import matplotlib.pyplot as plt | |
from datasets import load_dataset | |
from diffusers import DiffusionPipeline | |
from transformers import ( | |
WhisperForConditionalGeneration, | |
WhisperProcessor, | |
) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") | |
audio_sample = ds[3] | |
text = audio_sample["text"].lower() | |
speech_data = audio_sample["audio"]["array"] | |
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device) | |
processor = WhisperProcessor.from_pretrained("openai/whisper-small") | |
diffuser_pipeline = DiffusionPipeline.from_pretrained( | |
"CompVis/stable-diffusion-v1-4", | |
custom_pipeline="audio_to_image_pipeline.py", | |
speech_model=model, | |
speech_processor=processor, | |
torch_dtype=torch.float16, | |
) | |
diffuser_pipeline.enable_attention_slicing() | |
diffuser_pipeline = diffuser_pipeline.to(device) | |
output = diffuser_pipeline(speech_data) | |
plt.imshow(output.images[0]) |