saranbalan commited on
Commit
abccd5b
·
verified ·
1 Parent(s): 56d0e1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -31
app.py CHANGED
@@ -3,58 +3,42 @@ import os
3
  import gradio as gr
4
  from groq import Groq
5
  from deep_translator import GoogleTranslator
6
- import pickle
7
  from diffusers import StableDiffusionPipeline
8
- import matplotlib.pyplot as plt
9
  import torch
10
- from huggingface_hub import InferenceApi
11
- from transformers.utils import move_cache
12
-
13
- # Migrate the cache manually
14
- move_cache()
15
-
16
 
17
  # Set up Groq API key
18
  api_key = os.getenv("GROQ_API_KEY")
19
  client = Groq(api_key=api_key)
20
 
 
 
 
 
 
 
 
 
21
  model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
22
  model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
23
 
24
- # Check if CUDA (GPU) is available and set the device accordingly
25
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
-
27
- # Conditionally load the model based on the device (if GPU, use float16, if CPU, omit it)
28
- if device.type == 'cuda':
29
- pipe = StableDiffusionPipeline.from_pretrained(model_id2, torch_dtype=torch.float16, use_safetensors=True)
30
  else:
31
  pipe = StableDiffusionPipeline.from_pretrained(model_id2) # Omit torch_dtype for CPU
32
 
33
  # Move model to the selected device (either GPU or CPU)
34
  pipe = pipe.to(device)
35
 
36
- prompt = """dreamlikeart, a grungy woman with rainbow hair, travelling between dimensions, dynamic pose, happy, soft eyes and narrow chin,
37
- extreme bokeh, dainty figure, long hair straight down, torn kawaii shirt and baggy jeans
38
- """
39
-
40
- # Generate the image
41
- image = pipe(prompt).images[0]
42
-
43
- # Function to transcribe, translate, and analyze sentiment
44
  def process_audio(audio_path, image_option):
45
  if audio_path is None:
46
  return "Please upload an audio file.", None, None, None
47
 
48
- # Step 1: Transcribe audio
49
  try:
50
- with open(audio_path, "rb") as file:
51
- transcription = client.audio.transcriptions.create(
52
- file=(os.path.basename(audio_path), file.read()),
53
- model="whisper-large-v3",
54
- language="ta",
55
- response_format="verbose_json",
56
- )
57
- tamil_text = transcription.text
58
  except Exception as e:
59
  return f"An error occurred during transcription: {str(e)}", None, None, None
60
 
 
3
  import gradio as gr
4
  from groq import Groq
5
  from deep_translator import GoogleTranslator
 
6
  from diffusers import StableDiffusionPipeline
 
7
  import torch
 
 
 
 
 
 
8
 
9
  # Set up Groq API key
10
  api_key = os.getenv("GROQ_API_KEY")
11
  client = Groq(api_key=api_key)
12
 
13
+ # Set device: CUDA if available, else CPU
14
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
15
+
16
+ # Load Whisper model (if using locally, else use API as in original code)
17
+ # This is assuming you're using Whisper locally, if not, the client API is used.
18
+ whisper_model = whisper.load_model("base")
19
+
20
+ # Model IDs for Stable Diffusion pipelines
21
  model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
22
  model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
23
 
24
+ # Initialize Stable Diffusion pipeline based on device
25
+ if torch.cuda.is_available():
26
+ pipe = StableDiffusionPipeline.from_pretrained(model_id2, torch_dtype=torch.float16)
 
 
 
27
  else:
28
  pipe = StableDiffusionPipeline.from_pretrained(model_id2) # Omit torch_dtype for CPU
29
 
30
  # Move model to the selected device (either GPU or CPU)
31
  pipe = pipe.to(device)
32
 
33
+ # Function to process audio (transcription, translation, image generation)
 
 
 
 
 
 
 
34
  def process_audio(audio_path, image_option):
35
  if audio_path is None:
36
  return "Please upload an audio file.", None, None, None
37
 
38
+ # Step 1: Transcribe audio using Whisper
39
  try:
40
+ transcription = whisper_model.transcribe(audio_path)
41
+ tamil_text = transcription['text']
 
 
 
 
 
 
42
  except Exception as e:
43
  return f"An error occurred during transcription: {str(e)}", None, None, None
44