Spaces:
Runtime error
Runtime error
File size: 3,217 Bytes
40e9898 36338f2 40e9898 fb12737 40e9898 36338f2 40e9898 36338f2 40e9898 36338f2 40e9898 36338f2 2b02259 40e9898 36338f2 2b02259 40e9898 36338f2 2b02259 40e9898 36338f2 2b02259 36338f2 2b02259 36338f2 2b02259 36338f2 2b02259 36338f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
""" Script for streamlit demo
@author: AbinayaM02
"""
# Install necessary libraries
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
import streamlit as st
import json
# Read the config
with open("config.json") as f:
config = json.loads(f.read())
# Set page layout
st.set_page_config(
page_title="Tamil Language Models",
layout="wide",
initial_sidebar_state="expanded"
)
# Load the model
@st.cache(allow_output_mutation=True)
def load_model(model_name):
with st.spinner('Waiting for the model to load.....'):
model = AutoModelWithLMHead.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
st.success('Model loaded!!')
return model, tokenizer
# Side bar
img = st.sidebar.image("images/tamil_logo.jpg", width=300)
# Choose the model based on selection
page = st.sidebar.selectbox("Model", config["models"])
data = st.sidebar.selectbox("Data", config[page])
# Main page
st.title("Tamil Language Demos")
st.markdown(
"This demo uses [GPT2 trained on Oscar dataset](https://huggingface.co/flax-community/gpt-2-tamil) "
"and [GPT2 trained on Oscar & Indic Corpus dataset] (https://huggingface.co/abinayam/gpt-2-tamil) "
"to show language generation!"
)
if page == 'Text Generation' and data == 'Oscar':
st.header('Tamil text generation with GPT2')
st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
model, tokenizer = load_model(config[data])
# Set default options
seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
#seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
max_len = st.number_input('Length of the sentence', 5, 300, 100)
gen_bt = st.button('Generate')
if gen_bt:
try:
with st.spinner('Generating...'):
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
seqs = generator(seed, max_length=max_len)[0]['generated_text']# num_return_sequences=seq_num)
st.write(seqs)
except Exception as e:
st.exception(f'Exception: {e}')
elif page == 'Text Generation' and data == "Oscar + Indic Corpus":
st.header('Tamil text generation with GPT2')
st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
model, tokenizer = load_model(config[data])
# Set default options
seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
#seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
max_len = st.number_input('Length of the sentence', 5, 300, 100)
gen_bt = st.button('Generate')
if gen_bt:
try:
with st.spinner('Generating...'):
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
seqs = generator(seed, max_length=max_len)[0]['generated_text'] #num_return_sequences=seq_num)
st.write(seqs)
except Exception as e:
st.exception(f'Exception: {e}')
else:
st.title('Tamil News classification with Finetuned GPT2')
st.markdown('In progress')
|