EngToJap-2.0 / app.py
shukdevdatta123's picture
Update app.py
ddec283 verified
raw
history blame
7.4 kB
import openai
import os
import streamlit as st
from PIL import Image
from gtts import gTTS
import tempfile
import shutil
import re
import speech_recognition as sr # Library for voice recognition
import sounddevice as sd
def translate_to_japanese(api_key, text):
"""
Translates English text to Japanese using OpenAI's API and provides pronunciation.
"""
# Validate input
if not api_key:
return "Error: API key is missing."
if not text:
return "Error: Input text is empty."
# Set the OpenAI API key
openai.api_key = api_key
# Define the messages for the chat model
messages_translation = [
{"role": "system", "content": "You are a helpful translator."},
{"role": "user", "content": f"Translate the following English text to Japanese:\n\n{text}"}
]
try:
# Call the OpenAI API to get the Japanese translation
response_translation = openai.ChatCompletion.create(
model="gpt-4", # Ensure using the correct endpoint for the GPT model
messages=messages_translation,
max_tokens=300,
temperature=0.5
)
# Extract the Japanese translation from the response
japanese_translation = response_translation.choices[0].message['content'].strip()
# Define the messages for the pronunciation (Romaji) request
messages_pronunciation = [
{"role": "system", "content": "You are a helpful assistant who provides the Romaji (Japanese pronunciation in Latin script) of Japanese text."},
{"role": "user", "content": f"Provide the Romaji pronunciation for the following Japanese text:\n\n{japanese_translation}"}
]
# Call the OpenAI API to get the pronunciation
response_pronunciation = openai.ChatCompletion.create(
model="gpt-4",
messages=messages_pronunciation,
max_tokens=300,
temperature=0.5
)
# Extract the pronunciation (Romaji) from the response
pronunciation = response_pronunciation.choices[0].message['content'].strip()
return japanese_translation, pronunciation
except openai.error.OpenAIError as e:
return f"OpenAI API error: {str(e)}", None
except Exception as e:
return f"An unexpected error occurred: {str(e)}", None
# Function to clean pronunciation text
def clean_pronunciation(pronunciation_text):
# Remove introductory phrases like "Sure! The Romaji pronunciation..."
pronunciation_cleaned = re.sub(r"^Sure! The Romaji pronunciation for the Japanese text.*?is[:]*", "", pronunciation_text).strip()
return pronunciation_cleaned
# Function to generate audio file from text using gTTS
def generate_audio_from_text(text):
tts = gTTS(text, lang='ja') # 'ja' for Japanese language
# Save audio to a temporary file
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_audio_file.name)
return temp_audio_file.name
def transcribe_voice_input():
recognizer = sr.Recognizer()
# Set parameters for sounddevice (e.g., 44100 Hz, 1 channel)
duration = 5 # Seconds to record
fs = 44100 # Sampling frequency
st.info("Please speak now...")
# Record the audio using sounddevice
audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
sd.wait() # Wait until recording is finished
# Convert the numpy array to audio and recognize the speech
with sr.AudioData(audio_data.tobytes(), fs, 2) as source:
try:
text = recognizer.recognize_google(source)
st.success(f"Transcribed text: {text}")
return text
except sr.UnknownValueError:
st.error("Sorry, I could not understand the audio. Please try again.")
return None
except sr.RequestError:
st.error("Could not request results from Google Speech Recognition service.")
return None
# Streamlit UI
st.title("English to Japanese Translator with Pronunciation")
st.markdown("Translate English text into Japanese and get its pronunciation (Romaji). You can input text or use voice.")
# Display an image if you have one
translateimg = Image.open("Untitled.png") # Ensure the file is in the correct directory
st.image(translateimg, use_container_width=True)
# Access the API key from Hugging Face Secrets
api_key = os.getenv("OPENAI_API_KEY")
# Input field for the text
english_text = st.text_area("Enter the English text to translate")
# Button to trigger the translation from text input
if st.button("Translate from Text"):
if api_key and english_text:
japanese_text, pronunciation = translate_to_japanese(api_key, english_text)
if pronunciation:
cleaned_pronunciation = clean_pronunciation(pronunciation)
st.markdown("### Translation Result:")
st.write(f"**English Text:** {english_text}")
st.write(f"**Japanese Output:** {japanese_text}")
st.write(f"**Pronunciation:** {cleaned_pronunciation}")
# Save the result in a text file
result_text = f"English Text: {english_text}\n\nJapanese Translation: {japanese_text}\nPronunciation: {cleaned_pronunciation}"
# Write to a text file
with open("translation_result.txt", "w") as file:
file.write(result_text)
# Create a download button for the user to download the file
with open("translation_result.txt", "rb") as file:
st.download_button(
label="Download Translation Result",
data=file,
file_name="translation_result.txt",
mime="text/plain"
)
# Generate audio for pronunciation
audio_file_path = generate_audio_from_text(cleaned_pronunciation)
st.audio(audio_file_path, format="audio/mp3")
else:
st.error(japanese_text) # Display error message if API call fails
else:
if not api_key:
st.error("API key is missing. Please add it as a secret in Hugging Face Settings.")
else:
st.error("Please provide text to translate.")
# Button to trigger translation from voice input
if st.button("Translate from Voice"):
if api_key:
voice_input = transcribe_voice_input()
if voice_input:
japanese_text, pronunciation = translate_to_japanese(api_key, voice_input)
if pronunciation:
cleaned_pronunciation = clean_pronunciation(pronunciation)
st.markdown("### Translation Result from Voice:")
st.write(f"**English Text (from Voice):** {voice_input}")
st.write(f"**Japanese Output:** {japanese_text}")
st.write(f"**Pronunciation:** {cleaned_pronunciation}")
# Generate audio for pronunciation
audio_file_path = generate_audio_from_text(cleaned_pronunciation)
st.audio(audio_file_path, format="audio/mp3")
else:
st.error(japanese_text) # Display error message if API call fails
else:
st.error("Could not transcribe voice input.")
else:
st.error("API key is missing. Please add it as a secret in Hugging Face Settings.")