Spaces:

not-lain
/

gpu-utils

Paused

App Files Files Community

not-lain commited on Apr 4

Commit

dc95e97

1 Parent(s): b21d0d9

soft reset

Browse files

Files changed (1) hide show

app.py +368 -232

app.py CHANGED Viewed

@@ -1,19 +1,21 @@
 import gradio as gr
 import spaces
 import torch
-from loadimg import load_img
 from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, pipeline
 from diffusers import FluxFillPipeline
 from PIL import Image, ImageOps
-# from sam2.sam2_image_predictor import SAM2ImagePredictor
 import numpy as np
 from simple_lama_inpainting import SimpleLama
 from contextlib import contextmanager
-# import whisperx
 import gc
 @contextmanager
 def float32_high_matmul_precision():
     torch.set_float32_matmul_precision("high")
@@ -23,14 +25,33 @@ def float32_high_matmul_precision():
         torch.set_float32_matmul_precision("highest")
-pipe = FluxFillPipeline.from_pretrained(
-    "black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16
-).to("cuda")
-birefnet = AutoModelForImageSegmentation.from_pretrained(
-    "ZhengPeng7/BiRefNet", trust_remote_code=True
-)
-birefnet.to("cuda")
 transform_image = transforms.Compose(
     [
@@ -49,7 +70,6 @@ def prepare_image_and_mask(
     padding_right=0,
 ):
     image = load_img(image).convert("RGB")
-    # expand image (left,top,right,bottom)
     background = ImageOps.expand(
         image,
         border=(padding_left, padding_top, padding_right, padding_bottom),
@@ -77,19 +97,19 @@ def outpaint(
     background, mask = prepare_image_and_mask(
         image, padding_top, padding_bottom, padding_left, padding_right
     )
-    result = pipe(
-        prompt=prompt,
-        height=background.height,
-        width=background.width,
-        image=background,
-        mask_image=mask,
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-    ).images[0]
     result = result.convert("RGBA")
     return result
@@ -102,275 +122,391 @@ def inpaint(
 ):
     background = image.convert("RGB")
     mask = mask.convert("L")
-    result = pipe(
-        prompt=prompt,
-        height=background.height,
-        width=background.width,
-        image=background,
-        mask_image=mask,
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-    ).images[0]
     result = result.convert("RGBA")
     return result
 def rmbg(image=None, url=None):
-    if image is None:
-        image = url
-    image = load_img(image).convert("RGB")
-    image_size = image.size
-    input_images = transform_image(image).unsqueeze(0).to("cuda")
     with float32_high_matmul_precision():
-        # Prediction
         with torch.no_grad():
             preds = birefnet(input_images)[-1].sigmoid().cpu()
     pred = preds[0].squeeze()
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize(image_size)
-    image.putalpha(mask)
-    return image
-# def mask_generation(image=None, d=None):
-#     # use bfloat16 for the entire notebook
-#     # torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
-#     # # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
-#     # if torch.cuda.get_device_properties(0).major >= 8:
-#     #     torch.backends.cuda.matmul.allow_tf32 = True
-#     #     torch.backends.cudnn.allow_tf32 = True
-#     d = eval(d)  # convert this to dictionary
-#     with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
-#         predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2.1-hiera-large")
-#         predictor.set_image(image)
-#         input_point = np.array(d["input_points"])
-#         input_label = np.array(d["input_labels"])
-#         masks, scores, logits = predictor.predict(
-#             point_coords=input_point,
-#             point_labels=input_label,
-#             multimask_output=True,
-#         )
-#     sorted_ind = np.argsort(scores)[::-1]
-#     masks = masks[sorted_ind]
-#     scores = scores[sorted_ind]
-#     logits = logits[sorted_ind]
-#     out = []
-#     for i in range(len(masks)):
-#         m = Image.fromarray(masks[i] * 255).convert("L")
-#         comp = Image.composite(image, m, m)
-#         out.append((comp, f"image {i}"))
-#     return out
 def erase(image=None, mask=None):
-    simple_lama = SimpleLama()
-    image = load_img(image)
-    mask = load_img(mask).convert("L")
-    return simple_lama(image, mask)
-# def transcribe(audio):
-#     if audio is None:
-#         raise gr.Error("No audio file submitted!")
-#     device = "cuda" if torch.cuda.is_available() else "cpu"
-#     compute_type = "float16"
-#     batch_size = 8  # reduced batch size to be conservative with memory
-#     try:
-#         # 1. Load model and transcribe
-#         model = whisperx.load_model("large-v2", device, compute_type=compute_type)
-#         audio_input = whisperx.load_audio(audio)
-#         result = model.transcribe(audio_input, batch_size=batch_size)
-#         # Clear GPU memory
-#         del model
-#         gc.collect()
-#         torch.cuda.empty_cache()
-#         # 2. Align whisper output
-#         model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=device)
-#         result = whisperx.align(result["segments"], model_a, metadata, audio_input, device, return_char_alignments=False)
-#         # Clear GPU memory
-#         del model_a
-#         gc.collect()
-#         torch.cuda.empty_cache()
-#         # 3. Assign speaker labels
-#         diarize_model = whisperx.DiarizationPipeline(device=device)
-#         diarize_segments = diarize_model(audio_input)
-#         # Combine transcription with speaker diarization
-#         result = whisperx.assign_word_speakers(diarize_segments, result)
-#         # Format output with speaker labels and timestamps
-#         formatted_text = []
-#         for segment in result["segments"]:
-#             if not isinstance(segment, dict):
-#                 continue
-#             speaker = f"[Speaker {segment.get('speaker', 'Unknown')}]"
-#             start_time = f"{float(segment.get('start', 0)):.2f}"
-#             end_time = f"{float(segment.get('end', 0)):.2f}"
-#             text = segment.get('text', '').strip()
-#             formatted_text.append(f"[{start_time}s - {end_time}s] {speaker}: {text}")
-#         return "\n".join(formatted_text)
-#     except Exception as e:
-#         raise gr.Error(f"Transcription failed: {str(e)}")
-#     finally:
-#         # Ensure GPU memory is cleared even if an error occurs
-#         gc.collect()
-#         torch.cuda.empty_cache()
-@spaces.GPU(duration=120)
 def main(*args):
     api_num = args[0]
     args = args[1:]
-    if api_num == 1:
-        return rmbg(*args)
-    elif api_num == 2:
-        return outpaint(*args)
-    elif api_num == 3:
-        return inpaint(*args)
-    # elif api_num == 4:
-    #     return mask_generation(*args)
-    elif api_num == 5:
-        return erase(*args)
-    # elif api_num == 6:
-    #     return transcribe(*args)
 rmbg_tab = gr.Interface(
     fn=main,
     inputs=[
-        gr.Number(1, interactive=False),
-        "image",
-        gr.Text("", label="url"),
     ],
-    outputs=["image"],
     api_name="rmbg",
-    examples=[[1, "./assets/Inpainting mask.png", ""]],
     cache_examples=False,
-    description="pass an image or a url of an image",
 )
 outpaint_tab = gr.Interface(
     fn=main,
     inputs=[
-        gr.Number(2, interactive=False),
-        gr.Image(label="image", type="pil"),
-        gr.Number(label="padding top"),
-        gr.Number(label="padding bottom"),
-        gr.Number(label="padding left"),
-        gr.Number(label="padding right"),
-        gr.Text(label="prompt"),
-        gr.Number(value=50, label="num_inference_steps"),
-        gr.Number(value=28, label="guidance_scale"),
     ],
-    outputs=["image"],
     api_name="outpainting",
-    examples=[[2, "./assets/rocket.png", 100, 0, 0, 0, "", 50, 28]],
     cache_examples=False,
 )
 inpaint_tab = gr.Interface(
     fn=main,
     inputs=[
-        gr.Number(3, interactive=False),
-        gr.Image(label="image", type="pil"),
-        gr.Image(label="mask", type="pil"),
-        gr.Text(label="prompt"),
-        gr.Number(value=50, label="num_inference_steps"),
-        gr.Number(value=28, label="guidance_scale"),
     ],
-    outputs=["image"],
     api_name="inpaint",
-    examples=[[3, "./assets/rocket.png", "./assets/Inpainting mask.png"]],
     cache_examples=False,
-    description="it is recommended that you use https://github.com/la-voliere/react-mask-editor when creating an image mask in JS and then inverse it before sending it to this space",
 )
-# sam2_tab = gr.Interface(
-#     main,
-#     inputs=[
-#         gr.Number(4, interactive=False),
-#         gr.Image(type="pil"),
-#         gr.Text(),
-#     ],
-#     outputs=gr.Gallery(),
-#     examples=[
-#         [
-#             4,
-#             "./assets/truck.jpg",
-#             '{"input_points": [[500, 375], [1125, 625]], "input_labels": [1, 0]}',
-#         ]
-#     ],
-#     api_name="sam2",
-#     cache_examples=False,
-# )
 erase_tab = gr.Interface(
-    main,
     inputs=[
-        gr.Number(5, interactive=False),
-        gr.Image(type="pil"),
-        gr.Image(type="pil"),
-    ],
-    outputs=gr.Image(),
-    examples=[
-        [
-            5,
-            "./assets/rocket.png",
-            "./assets/Inpainting mask.png",
-        ]
     ],
     api_name="erase",
     cache_examples=False,
 )
-transcribe_tab = gr.Interface(
-    fn=main,
-    inputs=[
-        gr.Number(value=6, interactive=False),  # API number
-        gr.Audio(type="filepath", label="Audio File"),
-    ],
-    outputs=gr.Textbox(label="Transcription"),
-    title="Audio Transcription",
-    description="Upload an audio file to extract text using WhisperX with speaker diarization",
-    api_name="transcribe",
-    examples=[]
-)
 demo = gr.TabbedInterface(
     [
         rmbg_tab,
         outpaint_tab,
         inpaint_tab,
-        #  sam2_tab,
         erase_tab,
-        transcribe_tab,
     ],
     [
-        "remove background",
-        "outpainting",
-        "inpainting",
         #  "sam2",
-        "erase",
-        # "transcribe",
     ],
-    title="Utilities that require GPU",
 )
 demo.launch()

 import gradio as gr
 import spaces
 import torch
+from loadimg import load_img  # Assuming loadimg.py exists with load_img function
 from torchvision import transforms
 from transformers import AutoModelForImageSegmentation, pipeline
 from diffusers import FluxFillPipeline
 from PIL import Image, ImageOps
 import numpy as np
 from simple_lama_inpainting import SimpleLama
 from contextlib import contextmanager
 import gc
+# --- Add Translation Imports ---
+from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
+# --- Utility Functions ---
 @contextmanager
 def float32_high_matmul_precision():
     torch.set_float32_matmul_precision("high")
         torch.set_float32_matmul_precision("highest")
+# --- Model Loading ---
+# Use context manager for precision during model loading if needed
+with float32_high_matmul_precision():
+    pipe = FluxFillPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-Fill-dev", torch_dtype=torch.bfloat16
+    ).to("cuda")
+    birefnet = AutoModelForImageSegmentation.from_pretrained(
+        "ZhengPeng7/BiRefNet", trust_remote_code=True
+    ).to("cuda")
+simple_lama = SimpleLama()  # Initialize Lama globally if used often
+# --- Translation Model and Tokenizer Loading ---
+translation_model_name = "facebook/mbart-large-50-many-to-many-mmt"
+try:
+    translation_model = MBartForConditionalGeneration.from_pretrained(
+        translation_model_name
+    ).to("cuda")  # Move to GPU
+    translation_tokenizer = MBart50TokenizerFast.from_pretrained(translation_model_name)
+except Exception as e:
+    print(f"Error loading translation model/tokenizer: {e}")
+    # Consider exiting or disabling the translation tab if loading fails
+    translation_model = None
+    translation_tokenizer = None
+# --- Image Processing Functions ---
 transform_image = transforms.Compose(
     [
     padding_right=0,
 ):
     image = load_img(image).convert("RGB")
     background = ImageOps.expand(
         image,
         border=(padding_left, padding_top, padding_right, padding_bottom),
     background, mask = prepare_image_and_mask(
         image, padding_top, padding_bottom, padding_left, padding_right
     )
+    with (
+        float32_high_matmul_precision()
+    ):  # Apply precision context if needed for inference
+        result = pipe(
+            prompt=prompt,
+            height=background.height,
+            width=background.width,
+            image=background,
+            mask_image=mask,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+        ).images[0]
     result = result.convert("RGBA")
     return result
 ):
     background = image.convert("RGB")
     mask = mask.convert("L")
+    with (
+        float32_high_matmul_precision()
+    ):  # Apply precision context if needed for inference
+        result = pipe(
+            prompt=prompt,
+            height=background.height,
+            width=background.width,
+            image=background,
+            mask_image=mask,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+        ).images[0]
     result = result.convert("RGBA")
     return result
 def rmbg(image=None, url=None):
+    if image is None and url:
+        # Basic check for URL format, improve as needed
+        if not url.startswith(("http://", "https://")):
+            return "Invalid URL provided."
+        image = url  # load_img should handle URLs if configured correctly
+    elif image is None:
+        return "Please provide an image or a URL."
+    try:
+        image_pil = load_img(image).convert("RGB")
+    except Exception as e:
+        return f"Error loading image: {e}"
+    image_size = image_pil.size
+    input_images = transform_image(image_pil).unsqueeze(0).to("cuda")
     with float32_high_matmul_precision():
         with torch.no_grad():
             preds = birefnet(input_images)[-1].sigmoid().cpu()
     pred = preds[0].squeeze()
     pred_pil = transforms.ToPILImage()(pred)
     mask = pred_pil.resize(image_size)
+    image_pil.putalpha(mask)
+    # Clean up GPU memory if needed
+    del input_images, preds, pred
+    torch.cuda.empty_cache()
+    gc.collect()
+    return image_pil
 def erase(image=None, mask=None):
+    if image is None or mask is None:
+        return "Please provide both an image and a mask."
+    try:
+        image_pil = load_img(image)
+        mask_pil = load_img(mask).convert("L")
+        result = simple_lama(image_pil, mask_pil)
+        # Clean up
+        gc.collect()
+        return result
+    except Exception as e:
+        return f"Error during erase operation: {e}"
+# --- Translation Functionality ---
+# Language Mapping
+lang_data = {
+    "Arabic": "ar_AR",
+    "Czech": "cs_CZ",
+    "German": "de_DE",
+    "English": "en_XX",
+    "Spanish": "es_XX",
+    "Estonian": "et_EE",
+    "Finnish": "fi_FI",
+    "French": "fr_XX",
+    "Gujarati": "gu_IN",
+    "Hindi": "hi_IN",
+    "Italian": "it_IT",
+    "Japanese": "ja_XX",
+    "Kazakh": "kk_KZ",
+    "Korean": "ko_KR",
+    "Lithuanian": "lt_LT",
+    "Latvian": "lv_LV",
+    "Burmese": "my_MM",
+    "Nepali": "ne_NP",
+    "Dutch": "nl_XX",
+    "Romanian": "ro_RO",
+    "Russian": "ru_RU",
+    "Sinhala": "si_LK",
+    "Turkish": "tr_TR",
+    "Vietnamese": "vi_VN",
+    "Chinese": "zh_CN",
+    "Afrikaans": "af_ZA",
+    "Azerbaijani": "az_AZ",
+    "Bengali": "bn_IN",
+    "Persian": "fa_IR",
+    "Hebrew": "he_IL",
+    "Croatian": "hr_HR",
+    "Indonesian": "id_ID",
+    "Georgian": "ka_GE",
+    "Khmer": "km_KH",
+    "Macedonian": "mk_MK",
+    "Malayalam": "ml_IN",
+    "Mongolian": "mn_MN",
+    "Marathi": "mr_IN",
+    "Polish": "pl_PL",
+    "Pashto": "ps_AF",
+    "Portuguese": "pt_XX",
+    "Swedish": "sv_SE",
+    "Swahili": "sw_KE",
+    "Tamil": "ta_IN",
+    "Telugu": "te_IN",
+    "Thai": "th_TH",
+    "Tagalog": "tl_XX",
+    "Ukrainian": "uk_UA",
+    "Urdu": "ur_PK",
+    "Xhosa": "xh_ZA",
+    "Galician": "gl_ES",
+    "Slovene": "sl_SI",
+}
+language_names = sorted(list(lang_data.keys()))
+def translate_text(text_to_translate, source_language_name, target_language_name):
+    """
+    Translates text using the loaded mBART model.
+    """
+    if translation_model is None or translation_tokenizer is None:
+        return "Translation model not loaded. Cannot perform translation."
+    if not text_to_translate:
+        return "Please enter text to translate."
+    if not source_language_name:
+        return "Please select a source language."
+    if not target_language_name:
+        return "Please select a target language."
+    try:
+        source_lang_code = lang_data[source_language_name]
+        target_lang_code = lang_data[target_language_name]
+        translation_tokenizer.src_lang = source_lang_code
+        encoded_text = translation_tokenizer(text_to_translate, return_tensors="pt").to(
+            "cuda"
+        )  # Move input to GPU
+        target_lang_id = translation_tokenizer.lang_code_to_id[target_lang_code]
+        # Generate translation on GPU
+        with torch.no_grad():  # Use no_grad for inference
+            generated_tokens = translation_model.generate(
+                **encoded_text, forced_bos_token_id=target_lang_id, max_length=200
+            )
+        translated_text = translation_tokenizer.batch_decode(
+            generated_tokens, skip_special_tokens=True
+        )
+        # Clean up GPU memory
+        del encoded_text, generated_tokens
+        torch.cuda.empty_cache()
+        gc.collect()
+        return translated_text[0]
+    except KeyError as e:
+        return f"Error: Language code not found for {e}. Check language mappings."
+    except Exception as e:
+        print(f"Translation error: {e}")
+        # Clean up GPU memory on error too
+        torch.cuda.empty_cache()
+        gc.collect()
+        return f"An error occurred during translation: {e}"
+# --- Main Function Router (for image tasks) ---
+# Note: Translation uses its own function directly
+@spaces.GPU(duration=120)  # Keep GPU decorator if needed for image tasks
 def main(*args):
     api_num = args[0]
     args = args[1:]
+    gc.collect()  # Try to collect garbage before starting task
+    torch.cuda.empty_cache()  # Clear cache before starting task
+    result = None
+    try:
+        if api_num == 1:
+            result = rmbg(*args)
+        elif api_num == 2:
+            result = outpaint(*args)
+        elif api_num == 3:
+            result = inpaint(*args)
+        # elif api_num == 4: # Keep commented out as in original
+        #     return mask_generation(*args)
+        elif api_num == 5:
+            result = erase(*args)
+        else:
+            result = "Invalid API number."
+    except Exception as e:
+        print(f"Error in main task routing (api_num={api_num}): {e}")
+        result = f"An error occurred: {e}"
+    finally:
+        # Ensure memory cleanup happens even if there's an error
+        gc.collect()
+        torch.cuda.empty_cache()
+    return result
+# --- Define Gradio Interfaces for Each Tab ---
+# Image Task Tabs
 rmbg_tab = gr.Interface(
     fn=main,
     inputs=[
+        gr.Number(1, interactive=False, visible=False),  # Hide API number
+        gr.Image(label="Input Image", type="pil", sources=["upload", "clipboard"]),
+        gr.Text(label="Or Image URL (optional)"),
     ],
+    outputs=gr.Image(label="Output Image", type="pil"),
+    title="Remove Background",
+    description="Upload an image or provide a URL to remove its background.",
     api_name="rmbg",
+    # examples=[[1, "./assets/sample_rmbg.png", ""]], # Update example path if needed
     cache_examples=False,
 )
 outpaint_tab = gr.Interface(
     fn=main,
     inputs=[
+        gr.Number(2, interactive=False, visible=False),
+        gr.Image(label="Input Image", type="pil", sources=["upload", "clipboard"]),
+        gr.Number(value=0, label="Padding Top (pixels)"),
+        gr.Number(value=0, label="Padding Bottom (pixels)"),
+        gr.Number(value=0, label="Padding Left (pixels)"),
+        gr.Number(value=0, label="Padding Right (pixels)"),
+        gr.Text(
+            label="Prompt (optional)",
+            info="Describe what to fill the extended area with",
+        ),
+        gr.Slider(
+            minimum=10, maximum=100, step=1, value=28, label="Inference Steps"
+        ),  # Use slider for steps
+        gr.Slider(
+            minimum=1, maximum=100, step=1, value=50, label="Guidance Scale"
+        ),  # Use slider for guidance
     ],
+    outputs=gr.Image(label="Outpainted Image", type="pil"),
+    title="Outpainting",
+    description="Extend an image by adding padding and filling the new area using a diffusion model.",
     api_name="outpainting",
+    # examples=[[2, "./assets/rocket.png", 100, 0, 0, 0, "", 28, 50]], # Update example path
     cache_examples=False,
 )
 inpaint_tab = gr.Interface(
     fn=main,
     inputs=[
+        gr.Number(3, interactive=False, visible=False),
+        gr.Image(label="Input Image", type="pil", sources=["upload", "clipboard"]),
+        gr.Image(
+            label="Mask Image (White=Inpaint Area)",
+            type="pil",
+            sources=["upload", "clipboard"],
+        ),
+        gr.Text(
+            label="Prompt (optional)", info="Describe what to fill the masked area with"
+        ),
+        gr.Slider(minimum=10, maximum=100, step=1, value=28, label="Inference Steps"),
+        gr.Slider(minimum=1, maximum=100, step=1, value=50, label="Guidance Scale"),
     ],
+    outputs=gr.Image(label="Inpainted Image", type="pil"),
+    title="Inpainting",
+    description="Fill in the white areas of a mask applied to an image using a diffusion model.",
     api_name="inpaint",
+    # examples=[[3, "./assets/rocket.png", "./assets/Inpainting_mask.png", "", 28, 50]], # Update example paths
     cache_examples=False,
 )
 erase_tab = gr.Interface(
+    fn=main,
     inputs=[
+        gr.Number(5, interactive=False, visible=False),
+        gr.Image(label="Input Image", type="pil", sources=["upload", "clipboard"]),
+        gr.Image(
+            label="Mask Image (White=Erase Area)",
+            type="pil",
+            sources=["upload", "clipboard"],
+        ),
     ],
+    outputs=gr.Image(label="Result Image", type="pil"),
+    title="Erase Object (LAMA)",
+    description="Erase objects from an image based on a mask using the LaMa inpainting model.",
     api_name="erase",
+    # examples=[[5, "./assets/rocket.png", "./assets/Inpainting_mask.png"]], # Update example paths
     cache_examples=False,
 )
+# --- Define Translation Tab using gr.Blocks ---
+with gr.Blocks() as translation_tab:
+    gr.Markdown(
+        """
+        ## Multilingual Translation (mBART-50)
+        Translate text between 50 different languages.
+        Select the source and target languages, enter your text, and click Translate.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            source_lang_dropdown = gr.Dropdown(
+                label="Source Language",
+                choices=language_names,
+                info="Select the language of your input text.",
+            )
+            target_lang_dropdown = gr.Dropdown(
+                label="Target Language",
+                choices=language_names,
+                info="Select the language you want to translate to.",
+            )
+        with gr.Column(scale=2):
+            input_textbox = gr.Textbox(
+                label="Text to Translate",
+                lines=6,  # Increased lines
+                placeholder="Enter text here...",
+            )
+            translate_button = gr.Button(
+                "Translate", variant="primary"
+            )  # Added variant
+            output_textbox = gr.Textbox(
+                label="Translated Text",
+                lines=6,  # Increased lines
+                interactive=False,  # Make output read-only
+            )
+    # Connect Components to the translation function directly
+    translate_button.click(
+        fn=translate_text,
+        inputs=[input_textbox, source_lang_dropdown, target_lang_dropdown],
+        outputs=output_textbox,
+        api_name="translate",  # Add API name for the translation endpoint
+    )
+    # Add Translation Examples
+    gr.Examples(
+        examples=[
+            [
+                "संयुक्त राष्ट्र के प्रमुख का कहना है कि सीरिया में कोई सैन्य समाधान नहीं है",
+                "Hindi",
+                "French",
+            ],
+            [
+                "الأمين العام للأمم المتحدة يقول إنه لا يوجد حل عسكري في سوريا.",
+                "Arabic",
+                "English",
+            ],
+            [
+                "Le chef de l'ONU affirme qu'il n'y a pas de solution militaire en Syrie.",
+                "French",
+                "German",
+            ],
+            ["Hello world! How are you today?", "English", "Spanish"],
+            ["Guten Tag!", "German", "Japanese"],
+            ["これはテストです", "Japanese", "English"],
+        ],
+        inputs=[input_textbox, source_lang_dropdown, target_lang_dropdown],
+        outputs=output_textbox,
+        fn=translate_text,
+        cache_examples=False,
+    )
+# --- Combine all tabs ---
 demo = gr.TabbedInterface(
     [
         rmbg_tab,
         outpaint_tab,
         inpaint_tab,
         erase_tab,
+        translation_tab,  # Add the translation tab
+        #  sam2_tab, # Keep commented out
     ],
     [
+        "Remove Background",  # Tab title
+        "Outpainting",  # Tab title
+        "Inpainting",  # Tab title
+        "Erase (LAMA)",  # Tab title
+        "Translate",  # Tab title for translation
         #  "sam2",
     ],
+    title="Image & Text Utilities (GPU)",  # Updated title
 )
 demo.launch()