Spaces:
Sleeping
Sleeping
Ezi Ozoani
commited on
Commit
Β·
5aec804
1
Parent(s):
7f7f5a4
side bar
Browse files- app.py +82 -71
- requirements.txt +1 -1
app.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
from pathlib import Path
|
3 |
import base64
|
4 |
-
#
|
5 |
-
|
6 |
from PIL import Image
|
7 |
|
8 |
|
@@ -18,7 +18,7 @@ st.set_page_config(
|
|
18 |
def main():
|
19 |
cs_sidebar()
|
20 |
cs_body()
|
21 |
-
load_model()
|
22 |
|
23 |
return None
|
24 |
|
@@ -31,29 +31,92 @@ def img_to_bytes(img_path):
|
|
31 |
|
32 |
# sidebar
|
33 |
|
34 |
-
def load_model():
|
35 |
-
|
36 |
-
return model_out
|
37 |
|
38 |
def cs_sidebar():
|
39 |
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
|
49 |
-
st.sidebar.code('[type]')
|
50 |
|
51 |
-
|
52 |
-
st.sidebar.code('''
|
53 |
-
[type])
|
54 |
-
''')
|
55 |
-
|
56 |
-
|
57 |
|
58 |
return None
|
59 |
|
@@ -85,39 +148,6 @@ Users of this model card should also consider information about the design, trai
|
|
85 |
|
86 |
''')
|
87 |
|
88 |
-
# Uses, Limitations and Risks
|
89 |
-
|
90 |
-
with col1.subheader('Limitations and Risks'):
|
91 |
-
col1.subheader('')
|
92 |
-
with col1.expander(""):
|
93 |
-
st.markdown('''
|
94 |
-
|
95 |
-
As the developers of GPT-2 (OpenAI) note in their [model card](https://github.com/openai/gpt-2/blob/master/model_card.md), "language models like GPT-2 reflect the biases inherent to the systems they were trained on." Significant research has explored bias and fairness issues with models for language generation including GPT-2 (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
|
96 |
-
|
97 |
-
DistilGPT2 also suffers from persistent bias issues, as highlighted in the demonstrative examples below. Note that these examples are not a comprehensive stress-testing of the model. Readers considering using the model should consider more rigorous evaluations of the model depending on their use case and context.
|
98 |
-
|
99 |
-
The impact of model compression techniques - such as knowledge distillation - on bias and fairness issues associated with language models is an active area of research. For example:
|
100 |
-
|
101 |
-
- [Silva, Tambwekar and Gombolay (2021)](https://aclanthology.org/2021.naacl-main.189.pdf) find that distilled versions of BERT and RoBERTa consistently exhibit statistically significant bias (with regard to gender and race) with effect sizes larger than the teacher models.
|
102 |
-
- [Xu and Hu (2022)](https://arxiv.org/pdf/2201.08542.pdf) find that distilled versions of GPT-2 showed consistent reductions in toxicity and bias compared to the teacher model (see the paper for more detail on metrics used to define/measure toxicity and bias).
|
103 |
-
- [Gupta et al. (2022)](https://arxiv.org/pdf/2203.12574.pdf) find that DistilGPT2 exhibits greater gender disparities than GPT-2 and propose a technique for mitigating gender bias in distilled language models like DistilGPT2.
|
104 |
-
|
105 |
-
```python
|
106 |
-
>>> from transformers import pipeline, set_seed
|
107 |
-
>>> generator = pipeline('text-generation', model='distilgpt2')
|
108 |
-
>>> set_seed(48)
|
109 |
-
>>> generator("The White man worked as a", max_length=20, num_return_sequences=3)
|
110 |
-
[{'generated_text': "The White man worked as a salesman at a McDonald's restaurant called Kia at the time of the"},
|
111 |
-
{'generated_text': 'The White man worked as a contractor in the Army in the late 1990s. He became a "'},
|
112 |
-
{'generated_text': 'The White man worked as a police spokesman to the US Navy in the 1930s.'}]
|
113 |
-
|
114 |
-
>>> set_seed(48)
|
115 |
-
>>> generator("The Black man worked as a", max_length=20, num_return_sequences=3)
|
116 |
-
[{'generated_text': 'The Black man worked as a shop assistant for an hour at Wal-Mart at Wal-Mart in'},
|
117 |
-
{'generated_text': 'The Black man worked as a waiter in the hotel when he was assaulted when he got out of a'},
|
118 |
-
{'generated_text': 'The Black man worked as a police spokesman four months ago...'}]
|
119 |
-
```
|
120 |
-
''')
|
121 |
|
122 |
col1.subheader('Potential Uses')
|
123 |
col1.markdown('''
|
@@ -232,25 +262,6 @@ GPT-2 reaches a perplexity on the test set of 16.3 compared to 21.1 for DistilGP
|
|
232 |
''')
|
233 |
|
234 |
|
235 |
-
# Environmental Impact
|
236 |
-
|
237 |
-
col1.subheader('Environmental Impact')
|
238 |
-
col1.markdown('''
|
239 |
-
*Carbon emissions were estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute)
|
240 |
-
presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). The hardware, runtime, cloud provider, and compute region
|
241 |
-
were utilized to estimate the carbon impact.*
|
242 |
-
|
243 |
-
- **Hardware Type:** 8 16GB V100
|
244 |
-
- **Hours used:** 168 (1 week)
|
245 |
-
- **Cloud Provider:** Azure
|
246 |
-
- **Compute Region:** unavailable, assumed East US for calculations
|
247 |
-
- **Carbon Emitted** *(Power consumption x Time x Carbon produced based on location of power grid)*: 149.2 kg eq. CO2
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
''')
|
253 |
-
|
254 |
|
255 |
# Citation
|
256 |
|
@@ -295,7 +306,7 @@ were utilized to estimate the carbon impact.*
|
|
295 |
# Placeholders, help, and options
|
296 |
|
297 |
col2.subheader('Placeholders, help, and anything else')
|
298 |
-
pipeline = load_model()
|
299 |
|
300 |
col2.code('''
|
301 |
|
|
|
1 |
import streamlit as st
|
2 |
from pathlib import Path
|
3 |
import base64
|
4 |
+
#
|
5 |
+
#import robustnessgym as rg
|
6 |
from PIL import Image
|
7 |
|
8 |
|
|
|
18 |
def main():
|
19 |
cs_sidebar()
|
20 |
cs_body()
|
21 |
+
#load_model()
|
22 |
|
23 |
return None
|
24 |
|
|
|
31 |
|
32 |
# sidebar
|
33 |
|
34 |
+
#def load_model():
|
35 |
+
# model_out = pipeline(task="text-generation", model="distilgpt2")
|
36 |
+
#return model_out
|
37 |
|
38 |
def cs_sidebar():
|
39 |
|
40 |
+
#limitations & Risks
|
41 |
+
|
42 |
+
with st.sidebar.header('Limitations and Risks'):
|
43 |
+
st.sidebar.markdown('''
|
44 |
+
As the developers of GPT-2 (OpenAI) note in their [model card](https://github.com/openai/gpt-2/blob/master/model_card.md), "language models like GPT-2 reflect the biases inherent to the systems they were trained on." Significant research has explored bias and fairness issues with models for language generation including GPT-2 (see, e.g., [Sheng et al. (2021)](https://aclanthology.org/2021.acl-long.330.pdf) and [Bender et al. (2021)](https://dl.acm.org/doi/pdf/10.1145/3442188.3445922)).
|
45 |
+
|
46 |
+
''')
|
47 |
+
with st.sidebar.subheader(''):
|
48 |
+
st.sidebar.markdown('''
|
49 |
+
|
50 |
+
DistilGPT2 also suffers from persistent bias issues, as highlighted in the demonstrative examples below. Note that these examples are not a comprehensive stress-testing of the model. Readers considering using the model should consider more rigorous evaluations of the model depending on their use case and context.
|
51 |
+
''')
|
52 |
+
with st.expander(" Distillation Bias"):
|
53 |
+
st.markdown('''
|
54 |
+
|
55 |
+
The impact of model compression techniques - such as knowledge distillation - on bias and fairness issues associated with language models is an active area of research. For example:
|
56 |
+
|
57 |
+
- [Silva, Tambwekar and Gombolay (2021)](https://aclanthology.org/2021.naacl-main.189.pdf) find that distilled versions of BERT and RoBERTa consistently exhibit statistically significant bias (with regard to gender and race) with effect sizes larger than the teacher models.
|
58 |
+
- [Xu and Hu (2022)](https://arxiv.org/pdf/2201.08542.pdf) find that distilled versions of GPT-2 showed consistent reductions in toxicity and bias compared to the teacher model (see the paper for more detail on metrics used to define/measure toxicity and bias).
|
59 |
+
- [Gupta et al. (2022)](https://arxiv.org/pdf/2203.12574.pdf) find that DistilGPT2 exhibits greater gender disparities than GPT-2 and propose a technique for mitigating gender bias in distilled language models like DistilGPT2.
|
60 |
+
''')
|
61 |
+
with st.sidebar.subheader(''):
|
62 |
+
st.sidebar.markdown(''' ''')
|
63 |
+
with st.expander("Demonstrated Examples"):
|
64 |
+
st.markdown('''
|
65 |
+
|
66 |
+
|
67 |
+
```python
|
68 |
+
>>> from transformers import pipeline, set_seed
|
69 |
+
>>> generator = pipeline('text-generation', model='distilgpt2')
|
70 |
+
>>> set_seed(48)
|
71 |
+
>>> generator("The White man worked as a", max_length=20, num_return_sequences=3)
|
72 |
+
[{'generated_text': "The White man worked as a salesman at a McDonald's restaurant called Kia at the time of the"},
|
73 |
+
{'generated_text': 'The White man worked as a contractor in the Army in the late 1990s. He became a "'},
|
74 |
+
{'generated_text': 'The White man worked as a police spokesman to the US Navy in the 1930s.'}]
|
75 |
+
|
76 |
+
>>> set_seed(48)
|
77 |
+
>>> generator("The Black man worked as a", max_length=20, num_return_sequences=3)
|
78 |
+
[{'generated_text': 'The Black man worked as a shop assistant for an hour at Wal-Mart at Wal-Mart in'},
|
79 |
+
{'generated_text': 'The Black man worked as a waiter in the hotel when he was assaulted when he got out of a'},
|
80 |
+
{'generated_text': 'The Black man worked as a police spokesman four months ago...'}]
|
81 |
+
```
|
82 |
+
''')
|
83 |
+
|
84 |
+
|
85 |
+
|
86 |
+
"""
|
87 |
+
st.sidebar.header('Out-of-Scope Uses:')
|
88 |
+
with st.sidebar.subheader('Limitations'):
|
89 |
+
st.warning('This is a warning')
|
90 |
+
# Object notation
|
91 |
+
st.subheader('+')
|
92 |
+
with st.expander(""):
|
93 |
+
st.markdown('''
|
94 |
+
''')"""
|
95 |
+
|
96 |
|
97 |
+
# Environmental Impact
|
98 |
+
with st.sidebar.header('Environmental Impact'):
|
99 |
+
st.sidebar.markdown(''' *Carbon emissions were estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute)
|
100 |
+
presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). The hardware, runtime, cloud provider, and compute region
|
101 |
+
were utilized to estimate the carbon impact.*
|
102 |
+
''')
|
103 |
|
104 |
+
with st.sidebar.subheader('Environmental Impact'):
|
105 |
+
st.warning('This is a warning')
|
106 |
+
# Object notation
|
107 |
+
st.subheader('π²')
|
108 |
+
with st.expander("π π³"):
|
109 |
+
st.markdown('''
|
110 |
|
111 |
+
- **Hardware Type:** 8 16GB V100
|
112 |
+
- **Hours used:** 168 (1 week)
|
113 |
+
- **Cloud Provider:** Azure
|
114 |
+
- **Compute Region:** unavailable, assumed East US for calculations
|
115 |
+
- **Carbon Emitted** *(Power consumption x Time x Carbon produced based on location of power grid)*: 149.2 kg eq. CO2
|
116 |
|
117 |
+
''')
|
|
|
118 |
|
119 |
+
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
return None
|
122 |
|
|
|
148 |
|
149 |
''')
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
col1.subheader('Potential Uses')
|
153 |
col1.markdown('''
|
|
|
262 |
''')
|
263 |
|
264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
# Citation
|
267 |
|
|
|
306 |
# Placeholders, help, and options
|
307 |
|
308 |
col2.subheader('Placeholders, help, and anything else')
|
309 |
+
#pipeline = load_model()
|
310 |
|
311 |
col2.code('''
|
312 |
|
requirements.txt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
transformers
|
2 |
torch
|
3 |
-
transformers-interpret
|
|
|
1 |
transformers
|
2 |
torch
|
3 |
+
transformers-interpret
|