Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,58 +3,42 @@ import os
|
|
3 |
import gradio as gr
|
4 |
from groq import Groq
|
5 |
from deep_translator import GoogleTranslator
|
6 |
-
import pickle
|
7 |
from diffusers import StableDiffusionPipeline
|
8 |
-
import matplotlib.pyplot as plt
|
9 |
import torch
|
10 |
-
from huggingface_hub import InferenceApi
|
11 |
-
from transformers.utils import move_cache
|
12 |
-
|
13 |
-
# Migrate the cache manually
|
14 |
-
move_cache()
|
15 |
-
|
16 |
|
17 |
# Set up Groq API key
|
18 |
api_key = os.getenv("GROQ_API_KEY")
|
19 |
client = Groq(api_key=api_key)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
|
22 |
model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
|
23 |
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
# Conditionally load the model based on the device (if GPU, use float16, if CPU, omit it)
|
28 |
-
if device.type == 'cuda':
|
29 |
-
pipe = StableDiffusionPipeline.from_pretrained(model_id2, torch_dtype=torch.float16, use_safetensors=True)
|
30 |
else:
|
31 |
pipe = StableDiffusionPipeline.from_pretrained(model_id2) # Omit torch_dtype for CPU
|
32 |
|
33 |
# Move model to the selected device (either GPU or CPU)
|
34 |
pipe = pipe.to(device)
|
35 |
|
36 |
-
|
37 |
-
extreme bokeh, dainty figure, long hair straight down, torn kawaii shirt and baggy jeans
|
38 |
-
"""
|
39 |
-
|
40 |
-
# Generate the image
|
41 |
-
image = pipe(prompt).images[0]
|
42 |
-
|
43 |
-
# Function to transcribe, translate, and analyze sentiment
|
44 |
def process_audio(audio_path, image_option):
|
45 |
if audio_path is None:
|
46 |
return "Please upload an audio file.", None, None, None
|
47 |
|
48 |
-
# Step 1: Transcribe audio
|
49 |
try:
|
50 |
-
|
51 |
-
|
52 |
-
file=(os.path.basename(audio_path), file.read()),
|
53 |
-
model="whisper-large-v3",
|
54 |
-
language="ta",
|
55 |
-
response_format="verbose_json",
|
56 |
-
)
|
57 |
-
tamil_text = transcription.text
|
58 |
except Exception as e:
|
59 |
return f"An error occurred during transcription: {str(e)}", None, None, None
|
60 |
|
|
|
3 |
import gradio as gr
|
4 |
from groq import Groq
|
5 |
from deep_translator import GoogleTranslator
|
|
|
6 |
from diffusers import StableDiffusionPipeline
|
|
|
7 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# Set up Groq API key
|
10 |
api_key = os.getenv("GROQ_API_KEY")
|
11 |
client = Groq(api_key=api_key)
|
12 |
|
13 |
+
# Set device: CUDA if available, else CPU
|
14 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
15 |
+
|
16 |
+
# Load Whisper model (if using locally, else use API as in original code)
|
17 |
+
# This is assuming you're using Whisper locally, if not, the client API is used.
|
18 |
+
whisper_model = whisper.load_model("base")
|
19 |
+
|
20 |
+
# Model IDs for Stable Diffusion pipelines
|
21 |
model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
|
22 |
model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
|
23 |
|
24 |
+
# Initialize Stable Diffusion pipeline based on device
|
25 |
+
if torch.cuda.is_available():
|
26 |
+
pipe = StableDiffusionPipeline.from_pretrained(model_id2, torch_dtype=torch.float16)
|
|
|
|
|
|
|
27 |
else:
|
28 |
pipe = StableDiffusionPipeline.from_pretrained(model_id2) # Omit torch_dtype for CPU
|
29 |
|
30 |
# Move model to the selected device (either GPU or CPU)
|
31 |
pipe = pipe.to(device)
|
32 |
|
33 |
+
# Function to process audio (transcription, translation, image generation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
def process_audio(audio_path, image_option):
|
35 |
if audio_path is None:
|
36 |
return "Please upload an audio file.", None, None, None
|
37 |
|
38 |
+
# Step 1: Transcribe audio using Whisper
|
39 |
try:
|
40 |
+
transcription = whisper_model.transcribe(audio_path)
|
41 |
+
tamil_text = transcription['text']
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
except Exception as e:
|
43 |
return f"An error occurred during transcription: {str(e)}", None, None, None
|
44 |
|