Spaces:

saranbalan
/

multimodel_Ai

Sleeping

App Files Files Community

saranbalan commited on Oct 23, 2024

Commit

4b526a8

verified ·

1 Parent(s): ce1d9ec

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -23

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import whisper
-import os
 import gradio as gr
 from groq import Groq
 from deep_translator import GoogleTranslator
 from diffusers import StableDiffusionPipeline
 import torch
-from huggingface_hub import login
 # Set up Groq API key
 api_key = os.getenv("GROQ_API_KEY")
@@ -27,16 +30,18 @@ except Exception as e:
 # Set device: CUDA if available, else CPU
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# Load Whisper model
-whisper_model = whisper.load_model("base")
-# Model IDs for Stable Diffusion pipelines
-# model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
-# model_id2 = "stabilityai/stable-diffusion-xl-base-1.0"
-restricted_model_id = "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors"  # Model to access using HF_API_KEY
-# Function to transcribe, translate, and analyze sentiment
-def process_audio(audio_path, image_option):
     if audio_path is None:
         return "Please upload an audio file.", None, None, None
@@ -52,7 +57,7 @@ def process_audio(audio_path, image_option):
         tamil_text = transcription.text
     except Exception as e:
         return f"An error occurred during transcription: {str(e)}", None, None, None
     # Step 2: Translate Tamil to English
     try:
         translator = GoogleTranslator(source='ta', target='en')
@@ -60,38 +65,44 @@ def process_audio(audio_path, image_option):
     except Exception as e:
         return tamil_text, f"An error occurred during translation: {str(e)}", None, None
-    # Step 3: Generate image (if selected)
     image = None
     if image_option == "Generate Image":
         try:
-            # Use the Hugging Face API key to load the restricted model for image generation
-            pipe = StableDiffusionPipeline.from_pretrained(restricted_model_id, torch_dtype=torch.float16, token=HF_API_KEY)
-            pipe = pipe.to(device)
             image = pipe(translation).images[0]
         except Exception as e:
-            print(f"Image generation error: {str(e)}")  # Add error logging
-            return tamil_text, translation, f"An error occurred during image generation: {str(e)}", None
-    return tamil_text, translation, image
 # Create Gradio interface
 with gr.Blocks(theme=gr.themes.Base()) as iface:
-    gr.Markdown("# Audio Transcription, Translation, and Image Generation")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(type="filepath", label="Upload Audio File")
             image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
             submit_button = gr.Button("Process Audio")
         with gr.Column():
-            tamil_text_output = gr.Textbox(label="Tamil Transcription")
             translation_output = gr.Textbox(label="English Translation")
             image_output = gr.Image(label="Generated Image")
     submit_button.click(
         fn=process_audio,
-        inputs=[audio_input, image_option],
-        outputs=[tamil_text_output, translation_output, image_output]
     )
 # Launch the interface
 iface.launch()

 import whisper
 import gradio as gr
 from groq import Groq
 from deep_translator import GoogleTranslator
 from diffusers import StableDiffusionPipeline
+import os
 import torch
+import openai
+# # Replace with your OpenAI API key
+# openai.api_key = "https://huggingface.co/EleutherAI/gpt-neo-2.7B/resolve/main/model.safetensors"
 # Set up Groq API key
 api_key = os.getenv("GROQ_API_KEY")
 # Set device: CUDA if available, else CPU
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
+pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
+pipe = pipe.to("cuda")
+…            temperature=0.7,
+        )
+        return response['choices'][0]['message']['content'].strip()
+    except Exception as e:
+        return f"An error occurred during text generation: {str(e)}"
+def process_audio(audio_path, image_option, creative_text_option):
     if audio_path is None:
         return "Please upload an audio file.", None, None, None
         tamil_text = transcription.text
     except Exception as e:
         return f"An error occurred during transcription: {str(e)}", None, None, None
     # Step 2: Translate Tamil to English
     try:
         translator = GoogleTranslator(source='ta', target='en')
     except Exception as e:
         return tamil_text, f"An error occurred during translation: {str(e)}", None, None
+    # Step 3: Generate creative text (if selected)
+    creative_text = None
+    if creative_text_option == "Generate Creative Text":
+        creative_text = generate_creative_text(translation)
+    # Step 4: Generate image (if selected)
     image = None
     if image_option == "Generate Image":
         try:
+            model_id1 = "dreamlike-art/dreamlike-diffusion-1.0"
+            pipe = StableDiffusionPipeline.from_pretrained(model_id1, torch_dtype=torch.float16, use_safetensors=True)
+            pipe = pipe.to("cuda")
             image = pipe(translation).images[0]
         except Exception as e:
+            return tamil_text, translation, creative_text, f"An error occurred during image generation: {str(e)}"
+    return tamil_text, translation, creative_text, image
 # Create Gradio interface
 with gr.Blocks(theme=gr.themes.Base()) as iface:
+    gr.Markdown("# Audio Transcription, Translation, Image & Creative Text Generation")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(type="filepath", label="Upload Audio File")
             image_option = gr.Dropdown(["Generate Image", "Skip Image"], label="Image Generation", value="Generate Image")
+            creative_text_option = gr.Dropdown(["Generate Creative Text", "Skip Creative Text"], label="Creative Text Generation", value="Generate Creative Text")
             submit_button = gr.Button("Process Audio")
         with gr.Column():
+          tamil_text_output = gr.Textbox(label="Tamil Transcription")
             translation_output = gr.Textbox(label="English Translation")
+            creative_text_output = gr.Textbox(label="Creative Text")
             image_output = gr.Image(label="Generated Image")
     submit_button.click(
         fn=process_audio,
+        inputs=[audio_input, image_option, creative_text_option],
+        outputs=[tamil_text_output, translation_output, creative_text_output, image_output]
     )
 # Launch the interface
 iface.launch()