Spaces:
Running
Running
File size: 7,400 Bytes
f0076e7 0502cd7 baae19f e5e7630 6d9788c ddec283 f0076e7 6d9788c f0076e7 402f170 f0076e7 6d9788c f0076e7 40dae95 f0076e7 e5e7630 f0076e7 e5e7630 baae19f 6d9788c ddec283 6d9788c ddec283 6d9788c f0076e7 6d9788c f0076e7 6d9788c c16188d 6d9788c c16188d f0076e7 6d9788c f0076e7 e5e7630 f0076e7 02006d4 f0076e7 e5e7630 6d9788c 02006d4 e5e7630 02006d4 baae19f e5e7630 baae19f f0076e7 6d9788c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import openai
import os
import streamlit as st
from PIL import Image
from gtts import gTTS
import tempfile
import shutil
import re
import speech_recognition as sr # Library for voice recognition
import sounddevice as sd
def translate_to_japanese(api_key, text):
"""
Translates English text to Japanese using OpenAI's API and provides pronunciation.
"""
# Validate input
if not api_key:
return "Error: API key is missing."
if not text:
return "Error: Input text is empty."
# Set the OpenAI API key
openai.api_key = api_key
# Define the messages for the chat model
messages_translation = [
{"role": "system", "content": "You are a helpful translator."},
{"role": "user", "content": f"Translate the following English text to Japanese:\n\n{text}"}
]
try:
# Call the OpenAI API to get the Japanese translation
response_translation = openai.ChatCompletion.create(
model="gpt-4", # Ensure using the correct endpoint for the GPT model
messages=messages_translation,
max_tokens=300,
temperature=0.5
)
# Extract the Japanese translation from the response
japanese_translation = response_translation.choices[0].message['content'].strip()
# Define the messages for the pronunciation (Romaji) request
messages_pronunciation = [
{"role": "system", "content": "You are a helpful assistant who provides the Romaji (Japanese pronunciation in Latin script) of Japanese text."},
{"role": "user", "content": f"Provide the Romaji pronunciation for the following Japanese text:\n\n{japanese_translation}"}
]
# Call the OpenAI API to get the pronunciation
response_pronunciation = openai.ChatCompletion.create(
model="gpt-4",
messages=messages_pronunciation,
max_tokens=300,
temperature=0.5
)
# Extract the pronunciation (Romaji) from the response
pronunciation = response_pronunciation.choices[0].message['content'].strip()
return japanese_translation, pronunciation
except openai.error.OpenAIError as e:
return f"OpenAI API error: {str(e)}", None
except Exception as e:
return f"An unexpected error occurred: {str(e)}", None
# Function to clean pronunciation text
def clean_pronunciation(pronunciation_text):
# Remove introductory phrases like "Sure! The Romaji pronunciation..."
pronunciation_cleaned = re.sub(r"^Sure! The Romaji pronunciation for the Japanese text.*?is[:]*", "", pronunciation_text).strip()
return pronunciation_cleaned
# Function to generate audio file from text using gTTS
def generate_audio_from_text(text):
tts = gTTS(text, lang='ja') # 'ja' for Japanese language
# Save audio to a temporary file
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_audio_file.name)
return temp_audio_file.name
def transcribe_voice_input():
recognizer = sr.Recognizer()
# Set parameters for sounddevice (e.g., 44100 Hz, 1 channel)
duration = 5 # Seconds to record
fs = 44100 # Sampling frequency
st.info("Please speak now...")
# Record the audio using sounddevice
audio_data = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
sd.wait() # Wait until recording is finished
# Convert the numpy array to audio and recognize the speech
with sr.AudioData(audio_data.tobytes(), fs, 2) as source:
try:
text = recognizer.recognize_google(source)
st.success(f"Transcribed text: {text}")
return text
except sr.UnknownValueError:
st.error("Sorry, I could not understand the audio. Please try again.")
return None
except sr.RequestError:
st.error("Could not request results from Google Speech Recognition service.")
return None
# Streamlit UI
st.title("English to Japanese Translator with Pronunciation")
st.markdown("Translate English text into Japanese and get its pronunciation (Romaji). You can input text or use voice.")
# Display an image if you have one
translateimg = Image.open("Untitled.png") # Ensure the file is in the correct directory
st.image(translateimg, use_container_width=True)
# Access the API key from Hugging Face Secrets
api_key = os.getenv("OPENAI_API_KEY")
# Input field for the text
english_text = st.text_area("Enter the English text to translate")
# Button to trigger the translation from text input
if st.button("Translate from Text"):
if api_key and english_text:
japanese_text, pronunciation = translate_to_japanese(api_key, english_text)
if pronunciation:
cleaned_pronunciation = clean_pronunciation(pronunciation)
st.markdown("### Translation Result:")
st.write(f"**English Text:** {english_text}")
st.write(f"**Japanese Output:** {japanese_text}")
st.write(f"**Pronunciation:** {cleaned_pronunciation}")
# Save the result in a text file
result_text = f"English Text: {english_text}\n\nJapanese Translation: {japanese_text}\nPronunciation: {cleaned_pronunciation}"
# Write to a text file
with open("translation_result.txt", "w") as file:
file.write(result_text)
# Create a download button for the user to download the file
with open("translation_result.txt", "rb") as file:
st.download_button(
label="Download Translation Result",
data=file,
file_name="translation_result.txt",
mime="text/plain"
)
# Generate audio for pronunciation
audio_file_path = generate_audio_from_text(cleaned_pronunciation)
st.audio(audio_file_path, format="audio/mp3")
else:
st.error(japanese_text) # Display error message if API call fails
else:
if not api_key:
st.error("API key is missing. Please add it as a secret in Hugging Face Settings.")
else:
st.error("Please provide text to translate.")
# Button to trigger translation from voice input
if st.button("Translate from Voice"):
if api_key:
voice_input = transcribe_voice_input()
if voice_input:
japanese_text, pronunciation = translate_to_japanese(api_key, voice_input)
if pronunciation:
cleaned_pronunciation = clean_pronunciation(pronunciation)
st.markdown("### Translation Result from Voice:")
st.write(f"**English Text (from Voice):** {voice_input}")
st.write(f"**Japanese Output:** {japanese_text}")
st.write(f"**Pronunciation:** {cleaned_pronunciation}")
# Generate audio for pronunciation
audio_file_path = generate_audio_from_text(cleaned_pronunciation)
st.audio(audio_file_path, format="audio/mp3")
else:
st.error(japanese_text) # Display error message if API call fails
else:
st.error("Could not transcribe voice input.")
else:
st.error("API key is missing. Please add it as a secret in Hugging Face Settings.")
|