import streamlit as st import pandas as pd from transformers import MarianMTModel, MarianTokenizer model_name = 'Helsinki-NLP/opus-mt-en-ur' """ MODEL_NAME = os.environ.get("model_name") if not MODEL_NAME: raise ValueError("MODEL_NAME is not set. Please add it in the Hugging Face Secrets.") """ model = MarianMTModel.from_pretrained(model_name) tokenizer = MarianTokenizer.from_pretrained(model_name) # Function to translate text from English to Urdu def translate_text(text): inputs = tokenizer(text, return_tensors="pt", padding=True) translated = model.generate(**inputs) return tokenizer.decode(translated[0], skip_special_tokens=True) # Streamlit app st.title("Dataset Translator From English to Urdu For Chatbot") st.title("Upload Csv file for translation into Urdu. Remember Csv file must contain Only Question and Answer Column") # Upload CSV file uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"]) if uploaded_file: # Read the file into a pandas DataFrame data = pd.read_csv(uploaded_file) # Translate questions and answers if 'Question' in data.columns and 'Answer' in data.columns: data['Question_Urdu'] = data['Question'].apply(translate_text) data['Answer_Urdu'] = data['Answer'].apply(translate_text) # Display the translated dataframe st.write(data) # Provide option to download the translated CSV translated_file = data.to_csv(index=False) st.download_button("Download Translated CSV", translated_file, "Diabetes_Translated_Urdu.csv") else: st.error("CSV file must contain 'Question' and 'Answer' columns")