Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from transformers import MarianMTModel, MarianTokenizer | |
model_name = 'Helsinki-NLP/opus-mt-en-ur' | |
""" | |
MODEL_NAME = os.environ.get("model_name") | |
if not MODEL_NAME: | |
raise ValueError("MODEL_NAME is not set. Please add it in the Hugging Face Secrets.") | |
""" | |
model = MarianMTModel.from_pretrained(model_name) | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
# Function to translate text from English to Urdu | |
def translate_text(text): | |
inputs = tokenizer(text, return_tensors="pt", padding=True) | |
translated = model.generate(**inputs) | |
return tokenizer.decode(translated[0], skip_special_tokens=True) | |
# Streamlit app | |
st.title("Dataset Translator From English to Urdu For Chatbot") | |
st.title("Upload Csv file for translation into Urdu. Remember Csv file must contain Only Question and Answer Column") | |
# Upload CSV file | |
uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) | |
if uploaded_file: | |
# Read the file into a pandas DataFrame | |
data = pd.read_csv(uploaded_file) | |
# Translate questions and answers | |
if 'Question' in data.columns and 'Answer' in data.columns: | |
data['Question_Urdu'] = data['Question'].apply(translate_text) | |
data['Answer_Urdu'] = data['Answer'].apply(translate_text) | |
# Display the translated dataframe | |
st.write(data) | |
# Provide option to download the translated CSV | |
translated_file = data.to_csv(index=False) | |
st.download_button("Download Translated CSV", translated_file, "Diabetes_Translated_Urdu.csv") | |
else: | |
st.error("CSV file must contain 'Question' and 'Answer' columns") | |