File size: 6,973 Bytes
74688de
 
 
 
 
 
 
 
 
 
1f845b3
74688de
 
7e0543d
74688de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8cfb467
f1f40cb
8cfb467
f1f40cb
8cfb467
 
 
 
 
 
93d78ca
74688de
 
 
 
 
93d78ca
74688de
93d78ca
 
 
74688de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08e3335
 
 
 
0008647
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2d8cf6
8cfb467
c2d8cf6
 
df73138
8cfb467
c2d8cf6
 
 
 
df73138
8cfb467
 
c2d8cf6
b54fe3d
c2d8cf6
8cfb467
 
 
0008647
a15f9dd
 
0008647
 
74386d3
a15f9dd
e7aa0c9
 
a15f9dd
 
 
0008647
 
 
 
 
 
 
 
 
650f9cf
 
 
 
0008647
8cfb467
 
 
 
 
 
81e37bb
8cfb467
0008647
74688de
 
 
fd56584
a15f9dd
 
fd56584
1c313d2
fd56584
 
 
a15f9dd
fd56584
 
 
74688de
 
a0f65dc
e5e710a
 
a0f65dc
 
1c313d2
74688de
93d78ca
 
08e3335
a0f65dc
93d78ca
74688de
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# coding=utf-8
# Copyright 2023 The GIRT Authors.
# Lint as: python3


# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space.
# GIRT Space

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import pandas as pd
import base64


@st.cache_data
def render_svg(svg):
    """Renders the given svg string."""
    b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
    html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>'
    c = st.container()
    c.write(html, unsafe_allow_html=True)


@st.cache_resource
def load_model(model_name):
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return model

@st.cache_resource
def load_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return tokenizer

with st.spinner(text="Please wait while the model is loading...."):

    model = load_model('nafisehNik/girt-t5-base')
    tokenizer = load_tokenizer('nafisehNik/girt-t5-base')


def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary):
    value_list = [name, about, title, labels, assignees, headline_type, headline]

    value_list = ['<|MASK|>' if not element else element for element in val_list]
    if not summary:
        summary = '<|EMPTY|>'
    
    instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}'
    return instruction

def compute(sample, top_p, top_k, do_sample, max_length, min_length):

    inputs = tokenizer(sample, return_tensors="pt").to('cpu')

    outputs = model.generate(
        **inputs,
        min_length= min_length,
        max_length=max_length,
        do_sample=do_sample,
        top_p=top_p,
        top_k=top_k).to('cpu')

    generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
    generated_text = generated_texts[0]
    
    replace_dict = {
        '\n ': '\n',
        '</s>': '',
        '<pad> ': '',
        '<pad>': '',
        '<unk>': ''
    }
    
    postprocess_text = generated_text
    for key, value in replace_dict.items():
        postprocess_text = postprocess_text.replace(key, value)


    return postprocess_text

st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14)](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)")

render_svg(open("assets/logo.svg").read())

st.markdown(
    """
    <style>
    [data-testid="stSidebar"][aria-expanded="true"]{
        min-width: 450px;
        max-width: 450px;
    }
    """,
    unsafe_allow_html=True)


with st.sidebar:
    st.title(" πŸ”§ Settings")

    with st.expander("πŸ— Issue Template Inputs", True):
        
        in_name = st.text_input("Name Metadata: ", placeholder="e.g., Bug Report or Feqture Request or Question", on_change=None)
        in_about = st.text_input("About Metadata: ", placeholder="e.g., File a bug report", on_change=None)

        empty_title = st.checkbox('without title')
        if empty_title == False:
            in_title = st.text_input("Title Metadata: ", placeholder="e.g., [Bug]: ", on_change=None)
        else:
            in_title = '<|EMPTY|>'    
        
        empty_labels = st.checkbox('without labels')
        if empty_labels == False:
            in_labels = st.text_input("Labels Metadata: ", placeholder="e.g., feature, enhancement", on_change=None)
        else:
            in_labels = '<|EMPTY|>'

        empty_assignees = st.checkbox('without Assignees')
        if empty_assignees == False:
            in_assignees = st.text_input("Assignees Metadata: ", placeholder="e.g., USER_1, USER_2", on_change=None)
        else:
            in_assignees = '<|EMPTY|>'
            
        # if no headlines is selected, force the headlines to be empty as well.
        in_headline_type = st.selectbox(
        'How would you like to be your Headlines?',
        ('**Emphasis**', '# Header', 'No headlines'))

        if in_headline_type!='No headlines':
            in_headlines = st.text_area("Headlines: ", placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info", on_change=None, height=200)
            in_headlines = in_headlines.split('\n')
            in_headlines = [element.strip() for element in in_headlines]
        else:
            in_headline_type = '<|EMPTY|>'
            in_headlines = '<|EMPTY|>'

        # df = pd.DataFrame(
        # [{"headline": "Welcome"},{"headline": "Concise Description"}, {"headline": "Additional Info"}])
        # in_headlines = st.experimental_data_editor(df, num_rows="dynamic")

        in_summary = st.text_area("Summary: ", placeholder="This Github Issue Template is ...", on_change=None, height=200)


    with st.expander("πŸŽ› Model Configs", False):
        max_length_in = st.slider("max_length", 30, 512, 300)
        min_length_in = st.slider("min_length", 0, 300, 30)
        top_p_in = st.slider("top_p", 0.0, 1.0, 0.92)
        top_k_in = st.slider("top_k", 0, 100, 0)

    
    clicked = st.button("Submit", key='prompt')

    with st.spinner("Please Wait..."):
        prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary)

        res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in)
        st.code(res, language="python")

tab1, tab2 = st.tabs(["Design GitHub Issue Template", "Manual Prompt"])

with tab1:

    template_prompt = "name:"
    filled_prompt = "name:"

    clicked = st.button("Submit", key='design')

    with st.spinner("Please Wait..."):

        if filled_prompt!=template_prompt:
            res = compute(prompt, top_p=0.92, top_k=0, do_sample=True, max_length=300, min_length=0)
            st.code(res, language="python")


with tab2:

    st.markdown('This part is only based on the prompt you provide here and not the issue template inputs.')

    prompt = st.text_area("Prompt: ", placeholder="Enter your prompt.", on_change=None, height=200)

    clicked = st.button("Submit", key='prompt')

    with st.spinner("Please Wait..."):

        if prompt:
            res = compute(prompt, top_p=0.92, top_k=0, do_sample=True, max_length=300, min_length=0)
            st.code(res, language="python")