Spaces:

nafisehNik
/

girt-space

Running

File size: 6,973 Bytes

74688de
 
 
 
 
 
 
 
 
 
1f845b3
74688de
 
7e0543d
74688de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8cfb467
f1f40cb
8cfb467
f1f40cb
8cfb467
 
 
 
 
 
93d78ca
74688de
 
 
 
 
93d78ca
74688de
93d78ca
 
 
74688de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08e3335
 
 
 
0008647
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2d8cf6
8cfb467
c2d8cf6
 
df73138
8cfb467
c2d8cf6
 
 
 
df73138
8cfb467
 
c2d8cf6
b54fe3d
c2d8cf6
8cfb467
 
 
0008647
a15f9dd
 
0008647
 
74386d3
a15f9dd
e7aa0c9
 
a15f9dd
 
 
0008647
 
 
 
 
 
 
 
 
650f9cf
 
 
 
0008647
8cfb467
 
 
 
 
 
81e37bb
8cfb467
0008647
74688de
 
 
fd56584
a15f9dd
 
fd56584
1c313d2
fd56584
 
 
a15f9dd
fd56584
 
 
74688de
 
a0f65dc
e5e710a
 
a0f65dc
 
1c313d2
74688de
93d78ca
 
08e3335
a0f65dc
93d78ca
74688de

# coding=utf-8
# Copyright 2023 The GIRT Authors.
# Lint as: python3


# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space.
# GIRT Space

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import pandas as pd
import base64


@st.cache_data
def render_svg(svg):
    """Renders the given svg string."""
    b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
    html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>'
    c = st.container()
    c.write(html, unsafe_allow_html=True)


@st.cache_resource
def load_model(model_name):
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return model

@st.cache_resource
def load_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return tokenizer

with st.spinner(text="Please wait while the model is loading...."):

    model = load_model('nafisehNik/girt-t5-base')
    tokenizer = load_tokenizer('nafisehNik/girt-t5-base')


def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary):
    value_list = [name, about, title, labels, assignees, headline_type, headline]

    value_list = ['<|MASK|>' if not element else element for element in val_list]
    if not summary:
        summary = '<|EMPTY|>'
    
    instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}'
    return instruction

def compute(sample, top_p, top_k, do_sample, max_length, min_length):

    inputs = tokenizer(sample, return_tensors="pt").to('cpu')

    outputs = model.generate(
        **inputs,
        min_length= min_length,
        max_length=max_length,
        do_sample=do_sample,
        top_p=top_p,
        top_k=top_k).to('cpu')

    generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
    generated_text = generated_texts[0]
    
    replace_dict = {
        '\n ': '\n',
        '</s>': '',
        '<pad> ': '',
        '<pad>': '',
        '<unk>': ''
    }
    
    postprocess_text = generated_text
    for key, value in replace_dict.items():
        postprocess_text = postprocess_text.replace(key, value)


    return postprocess_text

st.markdown("[![Duplicate Space](https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14)](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)")

render_svg(open("assets/logo.svg").read())

st.markdown(
    """
    <style>
    [data-testid="stSidebar"][aria-expanded="true"]{
        min-width: 450px;
        max-width: 450px;
    }
    """,
    unsafe_allow_html=True)


with st.sidebar:
    st.title(" 🔧 Settings")

    with st.expander("🏗 Issue Template Inputs", True):
        
        in_name = st.text_input("Name Metadata: ", placeholder="e.g., Bug Report or Feqture Request or Question", on_change=None)
        in_about = st.text_input("About Metadata: ", placeholder="e.g., File a bug report", on_change=None)

        empty_title = st.checkbox('without title')
        if empty_title == False:
            in_title = st.text_input("Title Metadata: ", placeholder="e.g., [Bug]: ", on_change=None)
        else:
            in_title = '<|EMPTY|>'    
        
        empty_labels = st.checkbox('without labels')
        if empty_labels == False:
            in_labels = st.text_input("Labels Metadata: ", placeholder="e.g., feature, enhancement", on_change=None)
        else:
            in_labels = '<|EMPTY|>'

        empty_assignees = st.checkbox('without Assignees')
        if empty_assignees == False:
            in_assignees = st.text_input("Assignees Metadata: ", placeholder="e.g., USER_1, USER_2", on_change=None)
        else:
            in_assignees = '<|EMPTY|>'
            
        # if no headlines is selected, force the headlines to be empty as well.
        in_headline_type = st.selectbox(
        'How would you like to be your Headlines?',
        ('**Emphasis**', '# Header', 'No headlines'))

        if in_headline_type!='No headlines':
            in_headlines = st.text_area("Headlines: ", placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info", on_change=None, height=200)
            in_headlines = in_headlines.split('\n')
            in_headlines = [element.strip() for element in in_headlines]
        else:
            in_headline_type = '<|EMPTY|>'
            in_headlines = '<|EMPTY|>'

        # df = pd.DataFrame(
        # [{"headline": "Welcome"},{"headline": "Concise Description"}, {"headline": "Additional Info"}])
        # in_headlines = st.experimental_data_editor(df, num_rows="dynamic")

        in_summary = st.text_area("Summary: ", placeholder="This Github Issue Template is ...", on_change=None, height=200)


    with st.expander("🎛 Model Configs", False):
        max_length_in = st.slider("max_length", 30, 512, 300)
        min_length_in = st.slider("min_length", 0, 300, 30)
        top_p_in = st.slider("top_p", 0.0, 1.0, 0.92)
        top_k_in = st.slider("top_k", 0, 100, 0)

    
    clicked = st.button("Submit", key='prompt')

    with st.spinner("Please Wait..."):
        prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary)

        res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in)
        st.code(res, language="python")

tab1, tab2 = st.tabs(["Design GitHub Issue Template", "Manual Prompt"])

with tab1:

    template_prompt = "name:"
    filled_prompt = "name:"

    clicked = st.button("Submit", key='design')

    with st.spinner("Please Wait..."):

        if filled_prompt!=template_prompt:
            res = compute(prompt, top_p=0.92, top_k=0, do_sample=True, max_length=300, min_length=0)
            st.code(res, language="python")


with tab2:

    st.markdown('This part is only based on the prompt you provide here and not the issue template inputs.')

    prompt = st.text_area("Prompt: ", placeholder="Enter your prompt.", on_change=None, height=200)

    clicked = st.button("Submit", key='prompt')

    with st.spinner("Please Wait..."):

        if prompt:
            res = compute(prompt, top_p=0.92, top_k=0, do_sample=True, max_length=300, min_length=0)
            st.code(res, language="python")