Spaces:
Running
Running
File size: 6,973 Bytes
74688de 1f845b3 74688de 7e0543d 74688de 8cfb467 f1f40cb 8cfb467 f1f40cb 8cfb467 93d78ca 74688de 93d78ca 74688de 93d78ca 74688de 08e3335 0008647 c2d8cf6 8cfb467 c2d8cf6 df73138 8cfb467 c2d8cf6 df73138 8cfb467 c2d8cf6 b54fe3d c2d8cf6 8cfb467 0008647 a15f9dd 0008647 74386d3 a15f9dd e7aa0c9 a15f9dd 0008647 650f9cf 0008647 8cfb467 81e37bb 8cfb467 0008647 74688de fd56584 a15f9dd fd56584 1c313d2 fd56584 a15f9dd fd56584 74688de a0f65dc e5e710a a0f65dc 1c313d2 74688de 93d78ca 08e3335 a0f65dc 93d78ca 74688de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# coding=utf-8
# Copyright 2023 The GIRT Authors.
# Lint as: python3
# This space is built based on AMR-KELEG/ALDi and cis-lmu/GlotLID space.
# GIRT Space
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import streamlit as st
import pandas as pd
import base64
@st.cache_data
def render_svg(svg):
"""Renders the given svg string."""
b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
html = rf'<p align="center"> <img src="data:image/svg+xml;base64,{b64}", width="40%"/> </p>'
c = st.container()
c.write(html, unsafe_allow_html=True)
@st.cache_resource
def load_model(model_name):
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
return model
@st.cache_resource
def load_tokenizer(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
return tokenizer
with st.spinner(text="Please wait while the model is loading...."):
model = load_model('nafisehNik/girt-t5-base')
tokenizer = load_tokenizer('nafisehNik/girt-t5-base')
def create_instruction(name, about, title, labels, assignees, headline_type, headline, summary):
value_list = [name, about, title, labels, assignees, headline_type, headline]
value_list = ['<|MASK|>' if not element else element for element in val_list]
if not summary:
summary = '<|EMPTY|>'
instruction = f'name: {value_list[0]}\nabout: {value_list[1]}\ntitle: {value_list[2]}\nlabels: {value_list[3]}\nassignees: {value_list[4]}\nheadlines_type: {value_list[5]}\nheadlines: {value_list[6]}\nsummary: {summary}'
return instruction
def compute(sample, top_p, top_k, do_sample, max_length, min_length):
inputs = tokenizer(sample, return_tensors="pt").to('cpu')
outputs = model.generate(
**inputs,
min_length= min_length,
max_length=max_length,
do_sample=do_sample,
top_p=top_p,
top_k=top_k).to('cpu')
generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=False)
generated_text = generated_texts[0]
replace_dict = {
'\n ': '\n',
'</s>': '',
'<pad> ': '',
'<pad>': '',
'<unk>': ''
}
postprocess_text = generated_text
for key, value in replace_dict.items():
postprocess_text = postprocess_text.replace(key, value)
return postprocess_text
st.markdown("[](https://huggingface.co/spaces/nafisehNik/girt-space?duplicate=true)")
render_svg(open("assets/logo.svg").read())
st.markdown(
"""
<style>
[data-testid="stSidebar"][aria-expanded="true"]{
min-width: 450px;
max-width: 450px;
}
""",
unsafe_allow_html=True)
with st.sidebar:
st.title(" π§ Settings")
with st.expander("π Issue Template Inputs", True):
in_name = st.text_input("Name Metadata: ", placeholder="e.g., Bug Report or Feqture Request or Question", on_change=None)
in_about = st.text_input("About Metadata: ", placeholder="e.g., File a bug report", on_change=None)
empty_title = st.checkbox('without title')
if empty_title == False:
in_title = st.text_input("Title Metadata: ", placeholder="e.g., [Bug]: ", on_change=None)
else:
in_title = '<|EMPTY|>'
empty_labels = st.checkbox('without labels')
if empty_labels == False:
in_labels = st.text_input("Labels Metadata: ", placeholder="e.g., feature, enhancement", on_change=None)
else:
in_labels = '<|EMPTY|>'
empty_assignees = st.checkbox('without Assignees')
if empty_assignees == False:
in_assignees = st.text_input("Assignees Metadata: ", placeholder="e.g., USER_1, USER_2", on_change=None)
else:
in_assignees = '<|EMPTY|>'
# if no headlines is selected, force the headlines to be empty as well.
in_headline_type = st.selectbox(
'How would you like to be your Headlines?',
('**Emphasis**', '# Header', 'No headlines'))
if in_headline_type!='No headlines':
in_headlines = st.text_area("Headlines: ", placeholder="Enter each headline in one line. e.g.,\nWelcome\nConcise Description\nAdditional Info", on_change=None, height=200)
in_headlines = in_headlines.split('\n')
in_headlines = [element.strip() for element in in_headlines]
else:
in_headline_type = '<|EMPTY|>'
in_headlines = '<|EMPTY|>'
# df = pd.DataFrame(
# [{"headline": "Welcome"},{"headline": "Concise Description"}, {"headline": "Additional Info"}])
# in_headlines = st.experimental_data_editor(df, num_rows="dynamic")
in_summary = st.text_area("Summary: ", placeholder="This Github Issue Template is ...", on_change=None, height=200)
with st.expander("π Model Configs", False):
max_length_in = st.slider("max_length", 30, 512, 300)
min_length_in = st.slider("min_length", 0, 300, 30)
top_p_in = st.slider("top_p", 0.0, 1.0, 0.92)
top_k_in = st.slider("top_k", 0, 100, 0)
clicked = st.button("Submit", key='prompt')
with st.spinner("Please Wait..."):
prompt = create_instruction(in_name, in_about, in_title, in_labels, in_assignees, in_headline_type, in_headlines, in_summary)
res = compute(prompt, top_p = top_p_in, top_k=top_k_in, do_sample=True, max_length=max_length_in, min_length=min_length_in)
st.code(res, language="python")
tab1, tab2 = st.tabs(["Design GitHub Issue Template", "Manual Prompt"])
with tab1:
template_prompt = "name:"
filled_prompt = "name:"
clicked = st.button("Submit", key='design')
with st.spinner("Please Wait..."):
if filled_prompt!=template_prompt:
res = compute(prompt, top_p=0.92, top_k=0, do_sample=True, max_length=300, min_length=0)
st.code(res, language="python")
with tab2:
st.markdown('This part is only based on the prompt you provide here and not the issue template inputs.')
prompt = st.text_area("Prompt: ", placeholder="Enter your prompt.", on_change=None, height=200)
clicked = st.button("Submit", key='prompt')
with st.spinner("Please Wait..."):
if prompt:
res = compute(prompt, top_p=0.92, top_k=0, do_sample=True, max_length=300, min_length=0)
st.code(res, language="python")
|