jhon parra commited on
Commit
b9e3404
·
1 Parent(s): fcc2ea7

app definition

Browse files
Files changed (3) hide show
  1. .streamlit/config.toml +2 -0
  2. app.py +69 -0
  3. requirements.txt +175 -0
.streamlit/config.toml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [theme]
2
+ base = "dark"
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM,pipeline
2
+ import torch
3
+ import streamlit as st
4
+
5
+
6
+
7
+ MODELS={
8
+ 'uribe':{
9
+ 'tokenizer':AutoTokenizer.from_pretrained("jhonparra18/uribe-twitter-assistant-30ep"),
10
+ 'model':AutoModelForCausalLM.from_pretrained("jhonparra18/uribe-twitter-assistant-30ep")},
11
+ 'petro':{
12
+ 'tokenizer':AutoTokenizer.from_pretrained("jhonparra18/petro-twitter-assistant-30ep"),
13
+ 'model':AutoModelForCausalLM.from_pretrained("jhonparra18/petro-twitter-assistant-30ep")}}
14
+
15
+ # MODELS={'petro':pipeline('text-generation', model='jhonparra18/petro-twitter-assistant-30ep-large'),
16
+ # 'uribe':pipeline('text-generation', model='jhonparra18/uribe-twitter-assistant-30ep')}
17
+
18
+
19
+ def text_completion(tokenizer,model,input_text:str,max_len:int=100):
20
+ torch.manual_seed(444) ##for reproducibility
21
+ tokenizer.padding_side="left" ##start padding from left to right
22
+ tokenizer.pad_token = tokenizer.eos_token
23
+ input_ids = tokenizer([input_text], return_tensors="pt",truncation=True,max_length=128)
24
+ outputs = model.generate(**input_ids, do_sample=True, max_length=max_len,top_k=100,top_p=0.95)
25
+ out_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
26
+ return out_text
27
+
28
+
29
+
30
+ if "input_user_txt" not in st.session_state:
31
+ st.session_state["input_user_txt"] = None
32
+
33
+ if "user_max_length" not in st.session_state:
34
+ st.session_state["user_max_length"] = 100
35
+
36
+
37
+ st.markdown("<h3 style='text-align: center; color: white;'>Tweet de Político Colombiano: Autocompletado/generación de texto a partir de GPT2</h3>", unsafe_allow_html=True)
38
+ st.text("")
39
+ st.markdown("<h3 style='text-align: center; color: white;'>Causal Language Modeling</h3>", unsafe_allow_html=True)
40
+ st.text("")
41
+
42
+
43
+ col1,col2 = st.columns(2)
44
+
45
+
46
+ with col1:
47
+ with st.form("input_values"):
48
+ politician = st.selectbox(
49
+ "Selecciona el político",
50
+ ("Uribe", "Petro")
51
+ )
52
+ st.text("")
53
+ max_length_text=st.slider('Num Max Tokens', 100, 200, 100,10,key="user_max_length")
54
+ st.text("")
55
+ input_user_text = st.text_area('Input Text', 'Mi gobierno es',key="input_user_txt")
56
+ st.text("")
57
+ go_button=st.form_submit_button('Generate',use_container_width=True)
58
+
59
+
60
+ with col2:
61
+
62
+ if go_button: ##avoid re running script
63
+ with st.spinner('Generating Text...'):
64
+ output_text=text_completion(MODELS[politician.lower()]['tokenizer'],MODELS[politician.lower()]['model'],input_user_text,max_length_text)
65
+ st.text_area("output text",output_text,height=380,key="output_text")
66
+ else:
67
+ st.text_area("output text","",height=380,key="output_text")
68
+
69
+
requirements.txt ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # This file is autogenerated by pip-compile with Python 3.9
3
+ # by the following command:
4
+ #
5
+ # pip-compile --output-file=requirements.txt requirements.in
6
+ #
7
+ aiohttp==3.8.4
8
+ # via
9
+ # datasets
10
+ # fsspec
11
+ aiosignal==1.3.1
12
+ # via aiohttp
13
+ appdirs==1.4.4
14
+ # via wandb
15
+ async-timeout==4.0.2
16
+ # via aiohttp
17
+ attrs==22.2.0
18
+ # via aiohttp
19
+ beautifulsoup4==4.11.2
20
+ # via snscrape
21
+ certifi==2022.12.7
22
+ # via
23
+ # requests
24
+ # sentry-sdk
25
+ charset-normalizer==3.0.1
26
+ # via
27
+ # aiohttp
28
+ # requests
29
+ click==8.1.3
30
+ # via wandb
31
+ datasets==2.9.0
32
+ # via -r requirements.in
33
+ dill==0.3.6
34
+ # via
35
+ # datasets
36
+ # multiprocess
37
+ docker-pycreds==0.4.0
38
+ # via wandb
39
+ filelock==3.9.0
40
+ # via
41
+ # huggingface-hub
42
+ # snscrape
43
+ # transformers
44
+ frozenlist==1.3.3
45
+ # via
46
+ # aiohttp
47
+ # aiosignal
48
+ fsspec[http]==2023.1.0
49
+ # via datasets
50
+ gitdb==4.0.10
51
+ # via gitpython
52
+ gitpython==3.1.31
53
+ # via wandb
54
+ huggingface-hub==0.12.1
55
+ # via
56
+ # datasets
57
+ # transformers
58
+ idna==3.4
59
+ # via
60
+ # requests
61
+ # yarl
62
+ lxml==4.9.2
63
+ # via snscrape
64
+ multidict==6.0.4
65
+ # via
66
+ # aiohttp
67
+ # yarl
68
+ multiprocess==0.70.14
69
+ # via datasets
70
+ numpy==1.24.2
71
+ # via
72
+ # datasets
73
+ # pandas
74
+ # pyarrow
75
+ # transformers
76
+ nvidia-cublas-cu11==11.10.3.66
77
+ # via
78
+ # nvidia-cudnn-cu11
79
+ # torch
80
+ nvidia-cuda-nvrtc-cu11==11.7.99
81
+ # via torch
82
+ nvidia-cuda-runtime-cu11==11.7.99
83
+ # via torch
84
+ nvidia-cudnn-cu11==8.5.0.96
85
+ # via torch
86
+ packaging==23.0
87
+ # via
88
+ # datasets
89
+ # huggingface-hub
90
+ # transformers
91
+ pandas==1.5.3
92
+ # via
93
+ # -r requirements.in
94
+ # datasets
95
+ pathtools==0.1.2
96
+ # via wandb
97
+ protobuf==4.22.0
98
+ # via wandb
99
+ psutil==5.9.4
100
+ # via wandb
101
+ pyarrow==11.0.0
102
+ # via datasets
103
+ pysocks==1.7.1
104
+ # via requests
105
+ python-dateutil==2.8.2
106
+ # via pandas
107
+ pytz==2022.7.1
108
+ # via pandas
109
+ pyyaml==6.0
110
+ # via
111
+ # datasets
112
+ # huggingface-hub
113
+ # transformers
114
+ # wandb
115
+ regex==2022.10.31
116
+ # via transformers
117
+ requests[socks]==2.28.2
118
+ # via
119
+ # datasets
120
+ # fsspec
121
+ # huggingface-hub
122
+ # responses
123
+ # snscrape
124
+ # transformers
125
+ # wandb
126
+ responses==0.18.0
127
+ # via datasets
128
+ sentry-sdk==1.15.0
129
+ # via wandb
130
+ setproctitle==1.3.2
131
+ # via wandb
132
+ six==1.16.0
133
+ # via
134
+ # docker-pycreds
135
+ # python-dateutil
136
+ smmap==5.0.0
137
+ # via gitdb
138
+ snscrape==0.5.0.20230113
139
+ # via -r requirements.in
140
+ soupsieve==2.4
141
+ # via beautifulsoup4
142
+ tokenizers==0.13.2
143
+ # via transformers
144
+ torch==1.13.1
145
+ # via -r requirements.in
146
+ tqdm==4.64.1
147
+ # via
148
+ # datasets
149
+ # huggingface-hub
150
+ # transformers
151
+ transformers==4.26.1
152
+ # via -r requirements.in
153
+ typing-extensions==4.5.0
154
+ # via
155
+ # huggingface-hub
156
+ # torch
157
+ # wandb
158
+ urllib3==1.26.14
159
+ # via
160
+ # requests
161
+ # responses
162
+ # sentry-sdk
163
+ wandb==0.13.10
164
+ # via -r requirements.in
165
+ wheel==0.38.4
166
+ # via
167
+ # nvidia-cublas-cu11
168
+ # nvidia-cuda-runtime-cu11
169
+ xxhash==3.2.0
170
+ # via datasets
171
+ yarl==1.8.2
172
+ # via aiohttp
173
+
174
+ # The following packages are considered to be unsafe in a requirements file:
175
+ # setuptools