File size: 3,217 Bytes
40e9898
 
 
 
 
36338f2
40e9898
fb12737
40e9898
 
 
36338f2
40e9898
 
36338f2
 
 
 
 
40e9898
 
 
36338f2
 
 
 
 
 
40e9898
36338f2
2b02259
40e9898
36338f2
 
 
 
 
2b02259
40e9898
 
36338f2
2b02259
40e9898
36338f2
 
2b02259
36338f2
 
 
 
 
 
 
 
 
 
 
2b02259
36338f2
 
 
 
2b02259
36338f2
 
 
 
 
 
 
 
 
 
 
2b02259
36338f2
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
""" Script for streamlit demo
    @author: AbinayaM02
"""

# Install necessary libraries
from transformers import AutoTokenizer, AutoModelWithLMHead, pipeline
import streamlit as st
import json

# Read the config
with open("config.json") as f:
    config = json.loads(f.read())

# Set page layout
st.set_page_config(
        page_title="Tamil Language Models",
        layout="wide",
        initial_sidebar_state="expanded"
    )

# Load the model
@st.cache(allow_output_mutation=True)
def load_model(model_name):
    with st.spinner('Waiting for the model to load.....'):
        model = AutoModelWithLMHead.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
    st.success('Model loaded!!')
    return model, tokenizer

# Side bar
img = st.sidebar.image("images/tamil_logo.jpg", width=300)

# Choose the model based on selection
page = st.sidebar.selectbox("Model", config["models"])
data = st.sidebar.selectbox("Data", config[page])

# Main page
st.title("Tamil Language Demos")
st.markdown(
    "This demo uses [GPT2 trained on Oscar dataset](https://huggingface.co/flax-community/gpt-2-tamil) "
    "and [GPT2 trained on Oscar & Indic Corpus dataset] (https://huggingface.co/abinayam/gpt-2-tamil) "
    "to show language generation!"
)

if page == 'Text Generation' and data == 'Oscar':
    st.header('Tamil text generation with GPT2')
    st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
    model, tokenizer = load_model(config[data])
    # Set default options
    seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
    #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
    max_len = st.number_input('Length of the sentence', 5, 300, 100)
    gen_bt = st.button('Generate')
    if gen_bt:
        try:
            with st.spinner('Generating...'):
                generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
                seqs = generator(seed, max_length=max_len)[0]['generated_text']# num_return_sequences=seq_num)
            st.write(seqs)
        except Exception as e:
            st.exception(f'Exception: {e}')
elif page == 'Text Generation' and data == "Oscar + Indic Corpus":
    st.header('Tamil text generation with GPT2')
    st.markdown('A simple demo using gpt-2-tamil model trained on Oscar data')
    model, tokenizer = load_model(config[data])
    # Set default options
    seed = st.text_input('Starting text', 'அகர முதல எழுதெல்லம்')
    #seq_num = st.number_input('Number of sentences to generate ', 1, 20, 5)
    max_len = st.number_input('Length of the sentence', 5, 300, 100)
    gen_bt = st.button('Generate')
    if gen_bt:
        try:
            with st.spinner('Generating...'):
                generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
                seqs = generator(seed, max_length=max_len)[0]['generated_text'] #num_return_sequences=seq_num)
            st.write(seqs)
        except Exception as e:
            st.exception(f'Exception: {e}')
else:
    st.title('Tamil News classification with Finetuned GPT2')
    st.markdown('In progress')