Spaces:

shukdevdatta123
/

EngToJap-2.0

Running

File size: 7,400 Bytes

import openai
import os
import streamlit as st
from PIL import Image
from gtts import gTTS
import tempfile
import shutil
import re
import speech_recognition as sr  # Library for voice recognition
import sounddevice as sd

def translate_to_japanese(api_key, text):
    """
    Translates English text to Japanese using OpenAI's API and provides pronunciation.
    """
    # Validate input
    if not api_key:
        return "Error: API key is missing."
    if not text:
        return "Error: Input text is empty."

    # Set the OpenAI API key
    openai.api_key = api_key
    
    # Define the messages for the chat model
    messages_translation = [
        {"role": "system", "content": "You are a helpful translator."},
        {"role": "user", "content": f"Translate the following English text to Japanese:\n\n{text}"}
    ]
    
    try:
        # Call the OpenAI API to get the Japanese translation
        response_translation = openai.ChatCompletion.create(
            model="gpt-4",  # Ensure using the correct endpoint for the GPT model
            messages=messages_translation,
            max_tokens=300,
            temperature=0.5
        )

        # Extract the Japanese translation from the response
        japanese_translation = response_translation.choices[0].message['content'].strip()

        # Define the messages for the pronunciation (Romaji) request
        messages_pronunciation = [
            {"role": "system", "content": "You are a helpful assistant who provides the Romaji (Japanese pronunciation in Latin script) of Japanese text."},
            {"role": "user", "content": f"Provide the Romaji pronunciation for the following Japanese text:\n\n{japanese_translation}"}
        ]
        
        # Call the OpenAI API to get the pronunciation
        response_pronunciation = openai.ChatCompletion.create(
            model="gpt-4",
            messages=messages_pronunciation,
            max_tokens=300,
            temperature=0.5
        )

        # Extract the pronunciation (Romaji) from the response
        pronunciation = response_pronunciation.choices[0].message['content'].strip()

        return japanese_translation, pronunciation

    except openai.error.OpenAIError as e:
        return f"OpenAI API error: {str(e)}", None
    except Exception as e:
        return f"An unexpected error occurred: {str(e)}", None

# Function to clean pronunciation text
def clean_pronunciation(pronunciation_text):
    # Remove introductory phrases like "Sure! The Romaji pronunciation..."
    pronunciation_cleaned = re.sub(r"^Sure! The Romaji pronunciation for the Japanese text.*?is[:]*", "", pronunciation_text).strip()
    return pronunciation_cleaned

# Function to generate audio file from text using gTTS
def generate_audio_from_text(text):
    tts = gTTS(text, lang='ja')  # 'ja' for Japanese language
    # Save audio to a temporary file
    temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    tts.save(temp_audio_file.name)
    return temp_audio_file.name

def transcribe_voice_input():
    recognizer = sr.Recognizer()

    # Set parameters for sounddevice (e.g., 44100 Hz, 1 channel)
    duration = 5  # Seconds to record
    fs = 44100  # Sampling frequency

    st.info("Please speak now...")

    # Record the audio using sounddevice
    audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
    sd.wait()  # Wait until recording is finished

    # Convert the numpy array to audio and recognize the speech
    with sr.AudioData(audio_data.tobytes(), fs, 2) as source:
        try:
            text = recognizer.recognize_google(source)
            st.success(f"Transcribed text: {text}")
            return text
        except sr.UnknownValueError:
            st.error("Sorry, I could not understand the audio. Please try again.")
            return None
        except sr.RequestError:
            st.error("Could not request results from Google Speech Recognition service.")
            return None

# Streamlit UI
st.title("English to Japanese Translator with Pronunciation")
st.markdown("Translate English text into Japanese and get its pronunciation (Romaji). You can input text or use voice.")

# Display an image if you have one
translateimg = Image.open("Untitled.png")  # Ensure the file is in the correct directory
st.image(translateimg, use_container_width=True)

# Access the API key from Hugging Face Secrets
api_key = os.getenv("OPENAI_API_KEY")

# Input field for the text
english_text = st.text_area("Enter the English text to translate")

# Button to trigger the translation from text input
if st.button("Translate from Text"):
    if api_key and english_text:
        japanese_text, pronunciation = translate_to_japanese(api_key, english_text)
        if pronunciation:
            cleaned_pronunciation = clean_pronunciation(pronunciation)
            st.markdown("### Translation Result:")
            st.write(f"**English Text:** {english_text}")
            st.write(f"**Japanese Output:** {japanese_text}")
            st.write(f"**Pronunciation:** {cleaned_pronunciation}")

            # Save the result in a text file
            result_text = f"English Text: {english_text}\n\nJapanese Translation: {japanese_text}\nPronunciation: {cleaned_pronunciation}"

            # Write to a text file
            with open("translation_result.txt", "w") as file:
                file.write(result_text)

            # Create a download button for the user to download the file
            with open("translation_result.txt", "rb") as file:
                st.download_button(
                    label="Download Translation Result",
                    data=file,
                    file_name="translation_result.txt",
                    mime="text/plain"
                )

            # Generate audio for pronunciation
            audio_file_path = generate_audio_from_text(cleaned_pronunciation)
            st.audio(audio_file_path, format="audio/mp3")
        else:
            st.error(japanese_text)  # Display error message if API call fails
    else:
        if not api_key:
            st.error("API key is missing. Please add it as a secret in Hugging Face Settings.")
        else:
            st.error("Please provide text to translate.")

# Button to trigger translation from voice input
if st.button("Translate from Voice"):
    if api_key:
        voice_input = transcribe_voice_input()
        if voice_input:
            japanese_text, pronunciation = translate_to_japanese(api_key, voice_input)
            if pronunciation:
                cleaned_pronunciation = clean_pronunciation(pronunciation)
                st.markdown("### Translation Result from Voice:")
                st.write(f"**English Text (from Voice):** {voice_input}")
                st.write(f"**Japanese Output:** {japanese_text}")
                st.write(f"**Pronunciation:** {cleaned_pronunciation}")

                # Generate audio for pronunciation
                audio_file_path = generate_audio_from_text(cleaned_pronunciation)
                st.audio(audio_file_path, format="audio/mp3")
            else:
                st.error(japanese_text)  # Display error message if API call fails
        else:
            st.error("Could not transcribe voice input.")
    else:
        st.error("API key is missing. Please add it as a secret in Hugging Face Settings.")