saranbalan commited on
Commit
4b526a8
·
verified ·
1 Parent(s): ce1d9ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -23
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import whisper
2
- import os
3
  import gradio as gr
4
  from groq import Groq
5
  from deep_translator import GoogleTranslator
6
  from diffusers import StableDiffusionPipeline
 
7
  import torch
8
- from huggingface_hub import login
 
 
 
9
 
10
  # Set up Groq API key
11
  api_key = os.getenv("GROQ_API_KEY")
@@ -27,16 +30,18 @@ except Exception as e:
27
  # Set device: CUDA if available, else CPU
28
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
29
 
30
- # Load Whisper model
31
- whisper_model = whisper.load_model("base")
 
 
 
 
 
 
 
32
 
33
- # Model IDs for Stable Diffusion pipelines
34
- # model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
35
- # model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
36
- restricted_model_id = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors" # Model to access using HF_API_KEY
37
 
38
- # Function to transcribe, translate, and analyze sentiment
39
- def process_audio(audio_path, image_option):
40
  if audio_path is None:
41
  return "Please upload an audio file.", None, None, None
42
 
@@ -52,7 +57,7 @@ def process_audio(audio_path, image_option):
52
  tamil_text = transcription.text
53
  except Exception as e:
54
  return f"An error occurred during transcription: {str(e)}", None, None, None
55
-
56
  # Step 2: Translate Tamil to English
57
  try:
58
  translator = GoogleTranslator(source='ta', target='en')
@@ -60,38 +65,44 @@ def process_audio(audio_path, image_option):
60
  except Exception as e:
61
  return tamil_text, f"An error occurred during translation: {str(e)}", None, None
62
 
63
- # Step 3: Generate image (if selected)
 
 
 
 
 
64
  image = None
65
  if image_option == "Generate Image":
66
  try:
67
- # Use the Hugging Face API key to load the restricted model for image generation
68
- pipe = StableDiffusionPipeline.from_pretrained(restricted_model_id, torch_dtype=torch.float16, token=HF_API_KEY)
69
- pipe = pipe.to(device)
70
  image = pipe(translation).images[0]
71
  except Exception as e:
72
- print(f"Image generation error: {str(e)}") # Add error logging
73
- return tamil_text, translation, f"An error occurred during image generation: {str(e)}", None
74
 
75
- return tamil_text, translation, image
76
 
77
  # Create Gradio interface
78
  with gr.Blocks(theme=gr.themes.Base()) as iface:
79
- gr.Markdown("# Audio Transcription, Translation, and Image Generation")
80
  with gr.Row():
81
  with gr.Column():
82
  audio_input = gr.Audio(type="filepath", label="Upload Audio File")
83
  image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
 
84
  submit_button = gr.Button("Process Audio")
85
  with gr.Column():
86
- tamil_text_output = gr.Textbox(label="Tamil Transcription")
87
  translation_output = gr.Textbox(label="English Translation")
 
88
  image_output = gr.Image(label="Generated Image")
89
-
90
  submit_button.click(
91
  fn=process_audio,
92
- inputs=[audio_input, image_option],
93
- outputs=[tamil_text_output, translation_output, image_output]
94
  )
95
 
96
  # Launch the interface
97
  iface.launch()
 
 
1
  import whisper
 
2
  import gradio as gr
3
  from groq import Groq
4
  from deep_translator import GoogleTranslator
5
  from diffusers import StableDiffusionPipeline
6
+ import os
7
  import torch
8
+ import openai
9
+
10
+ # # Replace with your OpenAI API key
11
+ # openai.api_key = "https://huggingface.co/EleutherAI/gpt-neo-2.7B/resolve/main/model.safetensors"
12
 
13
  # Set up Groq API key
14
  api_key = os.getenv("GROQ_API_KEY")
 
30
  # Set device: CUDA if available, else CPU
31
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
32
 
33
+ model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
34
+ pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
35
+ pipe = pipe.to("cuda")
36
+
37
+ … temperature=0.7,
38
+ )
39
+ return response['choices'][0]['message']['content'].strip()
40
+ except Exception as e:
41
+ return f"An error occurred during text generation: {str(e)}"
42
 
 
 
 
 
43
 
44
+ def process_audio(audio_path, image_option, creative_text_option):
 
45
  if audio_path is None:
46
  return "Please upload an audio file.", None, None, None
47
 
 
57
  tamil_text = transcription.text
58
  except Exception as e:
59
  return f"An error occurred during transcription: {str(e)}", None, None, None
60
+
61
  # Step 2: Translate Tamil to English
62
  try:
63
  translator = GoogleTranslator(source='ta', target='en')
 
65
  except Exception as e:
66
  return tamil_text, f"An error occurred during translation: {str(e)}", None, None
67
 
68
+ # Step 3: Generate creative text (if selected)
69
+ creative_text = None
70
+ if creative_text_option == "Generate Creative Text":
71
+ creative_text = generate_creative_text(translation)
72
+
73
+ # Step 4: Generate image (if selected)
74
  image = None
75
  if image_option == "Generate Image":
76
  try:
77
+ model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
78
+ pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
79
+ pipe = pipe.to("cuda")
80
  image = pipe(translation).images[0]
81
  except Exception as e:
82
+ return tamil_text, translation, creative_text, f"An error occurred during image generation: {str(e)}"
 
83
 
84
+ return tamil_text, translation, creative_text, image
85
 
86
  # Create Gradio interface
87
  with gr.Blocks(theme=gr.themes.Base()) as iface:
88
+ gr.Markdown("# Audio Transcription, Translation, Image & Creative Text Generation")
89
  with gr.Row():
90
  with gr.Column():
91
  audio_input = gr.Audio(type="filepath", label="Upload Audio File")
92
  image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
93
+ creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
94
  submit_button = gr.Button("Process Audio")
95
  with gr.Column():
96
+ tamil_text_output = gr.Textbox(label="Tamil Transcription")
97
  translation_output = gr.Textbox(label="English Translation")
98
+ creative_text_output = gr.Textbox(label="Creative Text")
99
  image_output = gr.Image(label="Generated Image")
 
100
  submit_button.click(
101
  fn=process_audio,
102
+ inputs=[audio_input, image_option, creative_text_option],
103
+ outputs=[tamil_text_output, translation_output, creative_text_output, image_output]
104
  )
105
 
106
  # Launch the interface
107
  iface.launch()
108
+