Spaces:

saranbalan
/

multimodel_Ai

Sleeping

App Files Files Community

saranbalan commited on Oct 21, 2024

Commit

abccd5b

verified ·

1 Parent(s): 56d0e1d

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -31

app.py CHANGED Viewed

@@ -3,58 +3,42 @@ import os
 import gradio as gr
 from groq import Groq
 from deep_translator import GoogleTranslator
-import pickle
 from diffusers import StableDiffusionPipeline
-import matplotlib.pyplot as plt
 import torch
-from huggingface_hub import InferenceApi
-from transformers.utils import move_cache
-# Migrate the cache manually
-move_cache()
 # Set up Groq API key
 api_key = os.getenv("GROQ_API_KEY")
 client = Groq(api_key=api_key)
 model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
 model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
-# Check if CUDA (GPU) is available and set the device accordingly
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# Conditionally load the model based on the device (if GPU, use float16, if CPU, omit it)
-if device.type == 'cuda':
-    pipe = StableDiffusionPipeline.from_pretrained(model_id2, torch_dtype=torch.float16, use_safetensors=True)
 else:
     pipe = StableDiffusionPipeline.from_pretrained(model_id2)  # Omit torch_dtype for CPU
 # Move model to the selected device (either GPU or CPU)
 pipe = pipe.to(device)
-prompt = """dreamlikeart, a grungy woman with rainbow hair, travelling between dimensions, dynamic pose, happy, soft eyes and narrow chin,
-extreme bokeh, dainty figure, long hair straight down, torn kawaii shirt and baggy jeans
-"""
-# Generate the image
-image = pipe(prompt).images[0]
-# Function to transcribe, translate, and analyze sentiment
 def process_audio(audio_path, image_option):
     if audio_path is None:
         return "Please upload an audio file.", None, None, None
-    # Step 1: Transcribe audio
     try:
-        with open(audio_path, "rb") as file:
-            transcription = client.audio.transcriptions.create(
-                file=(os.path.basename(audio_path), file.read()),
-                model="whisper-large-v3",
-                language="ta",
-                response_format="verbose_json",
-            )
-        tamil_text = transcription.text
     except Exception as e:
         return f"An error occurred during transcription: {str(e)}", None, None, None

 import gradio as gr
 from groq import Groq
 from deep_translator import GoogleTranslator
 from diffusers import StableDiffusionPipeline
 import torch
 # Set up Groq API key
 api_key = os.getenv("GROQ_API_KEY")
 client = Groq(api_key=api_key)
+# Set device: CUDA if available, else CPU
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# Load Whisper model (if using locally, else use API as in original code)
+# This is assuming you're using Whisper locally, if not, the client API is used.
+whisper_model = whisper.load_model("base")
+# Model IDs for Stable Diffusion pipelines
 model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
 model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
+# Initialize Stable Diffusion pipeline based on device
+if torch.cuda.is_available():
+    pipe = StableDiffusionPipeline.from_pretrained(model_id2, torch_dtype=torch.float16)
 else:
     pipe = StableDiffusionPipeline.from_pretrained(model_id2)  # Omit torch_dtype for CPU
 # Move model to the selected device (either GPU or CPU)
 pipe = pipe.to(device)
+# Function to process audio (transcription, translation, image generation)
 def process_audio(audio_path, image_option):
     if audio_path is None:
         return "Please upload an audio file.", None, None, None
+    # Step 1: Transcribe audio using Whisper
     try:
+        transcription = whisper_model.transcribe(audio_path)
+        tamil_text = transcription['text']
     except Exception as e:
         return f"An error occurred during transcription: {str(e)}", None, None, None