ahmadtalha commited on
Commit
8abeb87
Β·
1 Parent(s): 7a9e4f3

Adding files

Browse files
1. Transformer Models.ipynb ADDED
@@ -0,0 +1,691 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# TRANSFORMER MODELS"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "## Transformers, what can they do?"
15
+ ]
16
+ },
17
+ {
18
+ "cell_type": "markdown",
19
+ "metadata": {},
20
+ "source": [
21
+ "### Sentiment Analysis"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 1,
27
+ "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stderr",
31
+ "output_type": "stream",
32
+ "text": [
33
+ "No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision 714eb0f (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).\n",
34
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n"
35
+ ]
36
+ },
37
+ {
38
+ "name": "stdout",
39
+ "output_type": "stream",
40
+ "text": [
41
+ "WARNING:tensorflow:From c:\\Users\\ACER\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
42
+ "\n"
43
+ ]
44
+ },
45
+ {
46
+ "data": {
47
+ "text/plain": [
48
+ "[{'label': 'POSITIVE', 'score': 0.9598049521446228}]"
49
+ ]
50
+ },
51
+ "execution_count": 1,
52
+ "metadata": {},
53
+ "output_type": "execute_result"
54
+ }
55
+ ],
56
+ "source": [
57
+ "from transformers import pipeline\n",
58
+ "\n",
59
+ "classifier = pipeline(\"sentiment-analysis\")\n",
60
+ "classifier(\"I've been waiting for a HuggingFace course my whole life.\")"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 2,
66
+ "metadata": {},
67
+ "outputs": [
68
+ {
69
+ "data": {
70
+ "text/plain": [
71
+ "[{'label': 'POSITIVE', 'score': 0.9598049521446228},\n",
72
+ " {'label': 'NEGATIVE', 'score': 0.9994558691978455}]"
73
+ ]
74
+ },
75
+ "execution_count": 2,
76
+ "metadata": {},
77
+ "output_type": "execute_result"
78
+ }
79
+ ],
80
+ "source": [
81
+ "# we can pass several sentences\n",
82
+ "classifier(\n",
83
+ " [\"I've been waiting for a HuggingFace course my whole life.\", \"I hate this so much!\"]\n",
84
+ ")"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "markdown",
89
+ "metadata": {},
90
+ "source": [
91
+ "### Zero-shot classification"
92
+ ]
93
+ },
94
+ {
95
+ "cell_type": "code",
96
+ "execution_count": 3,
97
+ "metadata": {},
98
+ "outputs": [
99
+ {
100
+ "name": "stderr",
101
+ "output_type": "stream",
102
+ "text": [
103
+ "No model was supplied, defaulted to facebook/bart-large-mnli and revision d7645e1 (https://huggingface.co/facebook/bart-large-mnli).\n",
104
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n"
105
+ ]
106
+ },
107
+ {
108
+ "data": {
109
+ "application/vnd.jupyter.widget-view+json": {
110
+ "model_id": "13af57499d894e8aa77c7ed39138d3dd",
111
+ "version_major": 2,
112
+ "version_minor": 0
113
+ },
114
+ "text/plain": [
115
+ "model.safetensors: 98%|#########8| 1.60G/1.63G [00:00<?, ?B/s]"
116
+ ]
117
+ },
118
+ "metadata": {},
119
+ "output_type": "display_data"
120
+ },
121
+ {
122
+ "name": "stderr",
123
+ "output_type": "stream",
124
+ "text": [
125
+ "c:\\Users\\ACER\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\huggingface_hub\\file_download.py:147: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\ACER\\.cache\\huggingface\\hub\\models--facebook--bart-large-mnli. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
126
+ "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
127
+ " warnings.warn(message)\n"
128
+ ]
129
+ },
130
+ {
131
+ "data": {
132
+ "application/vnd.jupyter.widget-view+json": {
133
+ "model_id": "5184b998013d4eacac2a0e943ebcbfdf",
134
+ "version_major": 2,
135
+ "version_minor": 0
136
+ },
137
+ "text/plain": [
138
+ "tokenizer_config.json: 0%| | 0.00/26.0 [00:00<?, ?B/s]"
139
+ ]
140
+ },
141
+ "metadata": {},
142
+ "output_type": "display_data"
143
+ },
144
+ {
145
+ "data": {
146
+ "application/vnd.jupyter.widget-view+json": {
147
+ "model_id": "af001870e23b4808862f0f4e160327ef",
148
+ "version_major": 2,
149
+ "version_minor": 0
150
+ },
151
+ "text/plain": [
152
+ "vocab.json: 0%| | 0.00/899k [00:00<?, ?B/s]"
153
+ ]
154
+ },
155
+ "metadata": {},
156
+ "output_type": "display_data"
157
+ },
158
+ {
159
+ "data": {
160
+ "application/vnd.jupyter.widget-view+json": {
161
+ "model_id": "743eb773e873441c813a1d13925215cf",
162
+ "version_major": 2,
163
+ "version_minor": 0
164
+ },
165
+ "text/plain": [
166
+ "merges.txt: 0%| | 0.00/456k [00:00<?, ?B/s]"
167
+ ]
168
+ },
169
+ "metadata": {},
170
+ "output_type": "display_data"
171
+ },
172
+ {
173
+ "data": {
174
+ "application/vnd.jupyter.widget-view+json": {
175
+ "model_id": "f29eb797c99242558fe742a00411262c",
176
+ "version_major": 2,
177
+ "version_minor": 0
178
+ },
179
+ "text/plain": [
180
+ "tokenizer.json: 0%| | 0.00/1.36M [00:00<?, ?B/s]"
181
+ ]
182
+ },
183
+ "metadata": {},
184
+ "output_type": "display_data"
185
+ },
186
+ {
187
+ "data": {
188
+ "text/plain": [
189
+ "{'sequence': 'This is a course about the Transformers library.',\n",
190
+ " 'labels': ['education', 'business', 'politics'],\n",
191
+ " 'scores': [0.8719874024391174, 0.09406554698944092, 0.033947039395570755]}"
192
+ ]
193
+ },
194
+ "execution_count": 3,
195
+ "metadata": {},
196
+ "output_type": "execute_result"
197
+ }
198
+ ],
199
+ "source": [
200
+ "from transformers import pipeline\n",
201
+ "\n",
202
+ "classifier = pipeline(\"zero-shot-classification\")\n",
203
+ "\n",
204
+ "classifier(\n",
205
+ " \"This is a course about the Transformers library.\",\n",
206
+ " candidate_labels = [\"education\", \"politics\", \"business\"]\n",
207
+ ")"
208
+ ]
209
+ },
210
+ {
211
+ "cell_type": "markdown",
212
+ "metadata": {},
213
+ "source": [
214
+ "### Text generation"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": 4,
220
+ "metadata": {},
221
+ "outputs": [
222
+ {
223
+ "name": "stderr",
224
+ "output_type": "stream",
225
+ "text": [
226
+ "No model was supplied, defaulted to openai-community/gpt2 and revision 607a30d (https://huggingface.co/openai-community/gpt2).\n",
227
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n",
228
+ "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
229
+ ]
230
+ },
231
+ {
232
+ "data": {
233
+ "text/plain": [
234
+ "[{'generated_text': 'In this course, we will teach you how to build a custom script and a WebScript web server that uses the JQuery 4.3 framework.\\n\\nYou will run up to 60 minutes with a single setup, in our example JQuery J'}]"
235
+ ]
236
+ },
237
+ "execution_count": 4,
238
+ "metadata": {},
239
+ "output_type": "execute_result"
240
+ }
241
+ ],
242
+ "source": [
243
+ "from transformers import pipeline\n",
244
+ "\n",
245
+ "generator = pipeline(\"text-generation\")\n",
246
+ "generator(\"In this course, we will teach you how to\")"
247
+ ]
248
+ },
249
+ {
250
+ "cell_type": "markdown",
251
+ "metadata": {},
252
+ "source": [
253
+ "### Using any model from the Hub in a pipeline"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": 5,
259
+ "metadata": {},
260
+ "outputs": [
261
+ {
262
+ "name": "stderr",
263
+ "output_type": "stream",
264
+ "text": [
265
+ "Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.\n",
266
+ "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n"
267
+ ]
268
+ },
269
+ {
270
+ "data": {
271
+ "text/plain": [
272
+ "[{'generated_text': 'In this course, we will teach you how to implement an API that can only be used by a single user.\\n\\n\\nHere are the slides'},\n",
273
+ " {'generated_text': 'In this course, we will teach you how to put food in order to reduce the risk of heart disease and even kill yourself as part of a program'}]"
274
+ ]
275
+ },
276
+ "execution_count": 5,
277
+ "metadata": {},
278
+ "output_type": "execute_result"
279
+ }
280
+ ],
281
+ "source": [
282
+ "from transformers import pipeline\n",
283
+ "\n",
284
+ "generator = pipeline(\"text-generation\", model=\"distilgpt2\")\n",
285
+ "\n",
286
+ "generator(\n",
287
+ " \"In this course, we will teach you how to\",\n",
288
+ " max_length=30,\n",
289
+ " num_return_sequences=2)"
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "markdown",
294
+ "metadata": {},
295
+ "source": [
296
+ "### Mask filling"
297
+ ]
298
+ },
299
+ {
300
+ "cell_type": "code",
301
+ "execution_count": 6,
302
+ "metadata": {},
303
+ "outputs": [
304
+ {
305
+ "name": "stderr",
306
+ "output_type": "stream",
307
+ "text": [
308
+ "No model was supplied, defaulted to distilbert/distilroberta-base and revision fb53ab8 (https://huggingface.co/distilbert/distilroberta-base).\n",
309
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n",
310
+ "Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
311
+ "- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
312
+ "- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
313
+ ]
314
+ },
315
+ {
316
+ "data": {
317
+ "text/plain": [
318
+ "[{'score': 0.19198469817638397,\n",
319
+ " 'token': 30412,\n",
320
+ " 'token_str': ' mathematical',\n",
321
+ " 'sequence': 'This course will teach you all about mathematical models.'},\n",
322
+ " {'score': 0.04209211468696594,\n",
323
+ " 'token': 38163,\n",
324
+ " 'token_str': ' computational',\n",
325
+ " 'sequence': 'This course will teach you all about computational models.'}]"
326
+ ]
327
+ },
328
+ "execution_count": 6,
329
+ "metadata": {},
330
+ "output_type": "execute_result"
331
+ }
332
+ ],
333
+ "source": [
334
+ "from transformers import pipeline\n",
335
+ "\n",
336
+ "unmasker = pipeline(\"fill-mask\")\n",
337
+ "unmasker(\"This course will teach you all about <mask> models.\", top_k=2)"
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "markdown",
342
+ "metadata": {},
343
+ "source": [
344
+ "### Named Entity Recognition"
345
+ ]
346
+ },
347
+ {
348
+ "cell_type": "code",
349
+ "execution_count": 7,
350
+ "metadata": {},
351
+ "outputs": [
352
+ {
353
+ "name": "stderr",
354
+ "output_type": "stream",
355
+ "text": [
356
+ "No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision 4c53496 (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).\n",
357
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n",
358
+ "Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']\n",
359
+ "- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
360
+ "- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
361
+ "c:\\Users\\ACER\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\pipelines\\token_classification.py:170: UserWarning: `grouped_entities` is deprecated and will be removed in version v5.0.0, defaulted to `aggregation_strategy=\"AggregationStrategy.SIMPLE\"` instead.\n",
362
+ " warnings.warn(\n"
363
+ ]
364
+ },
365
+ {
366
+ "data": {
367
+ "text/plain": [
368
+ "[{'entity_group': 'PER',\n",
369
+ " 'score': 0.99884915,\n",
370
+ " 'word': 'Ahmad',\n",
371
+ " 'start': 11,\n",
372
+ " 'end': 16},\n",
373
+ " {'entity_group': 'ORG',\n",
374
+ " 'score': 0.9950792,\n",
375
+ " 'word': 'University of Engineering and Technology',\n",
376
+ " 'start': 31,\n",
377
+ " 'end': 71},\n",
378
+ " {'entity_group': 'LOC',\n",
379
+ " 'score': 0.97850055,\n",
380
+ " 'word': 'Lahore',\n",
381
+ " 'start': 73,\n",
382
+ " 'end': 79},\n",
383
+ " {'entity_group': 'ORG',\n",
384
+ " 'score': 0.78072757,\n",
385
+ " 'word': \"Bechelor ' s\",\n",
386
+ " 'start': 95,\n",
387
+ " 'end': 105},\n",
388
+ " {'entity_group': 'ORG',\n",
389
+ " 'score': 0.92247367,\n",
390
+ " 'word': 'Computer Science',\n",
391
+ " 'start': 109,\n",
392
+ " 'end': 125}]"
393
+ ]
394
+ },
395
+ "execution_count": 7,
396
+ "metadata": {},
397
+ "output_type": "execute_result"
398
+ }
399
+ ],
400
+ "source": [
401
+ "from transformers import pipeline\n",
402
+ "\n",
403
+ "ner = pipeline(\"ner\", grouped_entities=True)\n",
404
+ "ner(\"My name is Ahmad and I work at University of Engineering and Technology, Lahore. I was prsuing Bechelor's of Computer Science.\")"
405
+ ]
406
+ },
407
+ {
408
+ "cell_type": "markdown",
409
+ "metadata": {},
410
+ "source": [
411
+ "### Question answering"
412
+ ]
413
+ },
414
+ {
415
+ "cell_type": "code",
416
+ "execution_count": 2,
417
+ "metadata": {},
418
+ "outputs": [
419
+ {
420
+ "name": "stderr",
421
+ "output_type": "stream",
422
+ "text": [
423
+ "No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).\n",
424
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n"
425
+ ]
426
+ }
427
+ ],
428
+ "source": [
429
+ "from transformers import pipeline\n",
430
+ "\n",
431
+ "question_answerer = pipeline(\"question-answering\")\n",
432
+ "\n",
433
+ "ans = question_answerer(\n",
434
+ " question=\"where do I work?\",\n",
435
+ " context = \"My name is Ahmad and I work at University of Engineering and Technology, Lahore\"\n",
436
+ ")"
437
+ ]
438
+ },
439
+ {
440
+ "cell_type": "code",
441
+ "execution_count": 4,
442
+ "metadata": {},
443
+ "outputs": [
444
+ {
445
+ "data": {
446
+ "text/plain": [
447
+ "'University of Engineering and Technology, Lahore'"
448
+ ]
449
+ },
450
+ "execution_count": 4,
451
+ "metadata": {},
452
+ "output_type": "execute_result"
453
+ }
454
+ ],
455
+ "source": [
456
+ "ans['answer']"
457
+ ]
458
+ },
459
+ {
460
+ "cell_type": "markdown",
461
+ "metadata": {},
462
+ "source": [
463
+ "### Summarization"
464
+ ]
465
+ },
466
+ {
467
+ "cell_type": "code",
468
+ "execution_count": 9,
469
+ "metadata": {},
470
+ "outputs": [
471
+ {
472
+ "name": "stderr",
473
+ "output_type": "stream",
474
+ "text": [
475
+ "No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).\n",
476
+ "Using a pipeline without specifying a model name and revision in production is not recommended.\n"
477
+ ]
478
+ }
479
+ ],
480
+ "source": [
481
+ "from transformers import pipeline\n",
482
+ "\n",
483
+ "summarizer = pipeline(\"summarization\")\n",
484
+ "summary = summarizer(\n",
485
+ " \"\"\"\n",
486
+ " America has changed dramatically during recent years. Not only has the number of \n",
487
+ " graduates in traditional engineering disciplines such as mechanical, civil, \n",
488
+ " electrical, chemical, and aeronautical engineering declined, but in most of \n",
489
+ " the premier American universities engineering curricula now concentrate on \n",
490
+ " and encourage largely the study of engineering science. As a result, there \n",
491
+ " are declining offerings in engineering subjects dealing with infrastructure, \n",
492
+ " the environment, and related issues, and greater concentration on high \n",
493
+ " technology subjects, largely supporting increasingly complex scientific \n",
494
+ " developments. While the latter is important, it should not be at the expense \n",
495
+ " of more traditional engineering.\n",
496
+ "\n",
497
+ " Rapidly developing economies such as China and India, as well as other \n",
498
+ " industrial countries in Europe and Asia, continue to encourage and advance \n",
499
+ " the teaching of engineering. Both China and India, respectively, graduate \n",
500
+ " six and eight times as many traditional engineers as does the United States. \n",
501
+ " Other industrial countries at minimum maintain their output, while America \n",
502
+ " suffers an increasingly serious decline in the number of engineering graduates \n",
503
+ " and a lack of well-educated engineers.\n",
504
+ "\"\"\"\n",
505
+ ")"
506
+ ]
507
+ },
508
+ {
509
+ "cell_type": "code",
510
+ "execution_count": 10,
511
+ "metadata": {},
512
+ "outputs": [
513
+ {
514
+ "name": "stdout",
515
+ "output_type": "stream",
516
+ "text": [
517
+ " America has changed dramatically during recent years . The number of engineering graduates in the U.S. has declined in traditional engineering disciplines such as mechanical, civil, electrical, chemical, and aeronautical engineering . Rapidly developing economies such as China and India continue to encourage and advance the teaching of engineering .\n"
518
+ ]
519
+ }
520
+ ],
521
+ "source": [
522
+ "print(summary[0]['summary_text'])"
523
+ ]
524
+ },
525
+ {
526
+ "cell_type": "markdown",
527
+ "metadata": {},
528
+ "source": [
529
+ "### Translation"
530
+ ]
531
+ },
532
+ {
533
+ "cell_type": "code",
534
+ "execution_count": 11,
535
+ "metadata": {},
536
+ "outputs": [],
537
+ "source": [
538
+ "import sentencepiece"
539
+ ]
540
+ },
541
+ {
542
+ "cell_type": "code",
543
+ "execution_count": 12,
544
+ "metadata": {},
545
+ "outputs": [
546
+ {
547
+ "data": {
548
+ "application/vnd.jupyter.widget-view+json": {
549
+ "model_id": "e7521143fb794a39b66b0f5d00f9fac8",
550
+ "version_major": 2,
551
+ "version_minor": 0
552
+ },
553
+ "text/plain": [
554
+ "source.spm: 0%| | 0.00/802k [00:00<?, ?B/s]"
555
+ ]
556
+ },
557
+ "metadata": {},
558
+ "output_type": "display_data"
559
+ },
560
+ {
561
+ "name": "stderr",
562
+ "output_type": "stream",
563
+ "text": [
564
+ "c:\\Users\\ACER\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\huggingface_hub\\file_download.py:147: UserWarning: `huggingface_hub` cache-system uses symlinks by default to efficiently store duplicated files but your machine does not support them in C:\\Users\\ACER\\.cache\\huggingface\\hub\\models--Helsinki-NLP--opus-mt-fr-en. Caching files will still work but in a degraded version that might require more space on your disk. This warning can be disabled by setting the `HF_HUB_DISABLE_SYMLINKS_WARNING` environment variable. For more details, see https://huggingface.co/docs/huggingface_hub/how-to-cache#limitations.\n",
565
+ "To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development\n",
566
+ " warnings.warn(message)\n"
567
+ ]
568
+ },
569
+ {
570
+ "data": {
571
+ "application/vnd.jupyter.widget-view+json": {
572
+ "model_id": "d658b08296d64e4081ac272272b520d7",
573
+ "version_major": 2,
574
+ "version_minor": 0
575
+ },
576
+ "text/plain": [
577
+ "target.spm: 0%| | 0.00/778k [00:00<?, ?B/s]"
578
+ ]
579
+ },
580
+ "metadata": {},
581
+ "output_type": "display_data"
582
+ },
583
+ {
584
+ "data": {
585
+ "application/vnd.jupyter.widget-view+json": {
586
+ "model_id": "92ea52e7b8d446e7a21d844815c4045b",
587
+ "version_major": 2,
588
+ "version_minor": 0
589
+ },
590
+ "text/plain": [
591
+ "vocab.json: 0%| | 0.00/1.34M [00:00<?, ?B/s]"
592
+ ]
593
+ },
594
+ "metadata": {},
595
+ "output_type": "display_data"
596
+ },
597
+ {
598
+ "name": "stderr",
599
+ "output_type": "stream",
600
+ "text": [
601
+ "c:\\Users\\ACER\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\models\\marian\\tokenization_marian.py:175: UserWarning: Recommended: pip install sacremoses.\n",
602
+ " warnings.warn(\"Recommended: pip install sacremoses.\")\n"
603
+ ]
604
+ },
605
+ {
606
+ "data": {
607
+ "text/plain": [
608
+ "[{'translation_text': 'This course is produced by Hugging Face.'}]"
609
+ ]
610
+ },
611
+ "execution_count": 12,
612
+ "metadata": {},
613
+ "output_type": "execute_result"
614
+ }
615
+ ],
616
+ "source": [
617
+ "import sentencepiece\n",
618
+ "from transformers import pipeline\n",
619
+ "\n",
620
+ "translator = pipeline(\"translation\", model=\"Helsinki-NLP/opus-mt-fr-en\")\n",
621
+ "translator(\"Ce cours est produit par Hugging Face.\")"
622
+ ]
623
+ },
624
+ {
625
+ "cell_type": "markdown",
626
+ "metadata": {},
627
+ "source": [
628
+ "## Bias and limitations"
629
+ ]
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "execution_count": 13,
634
+ "metadata": {},
635
+ "outputs": [
636
+ {
637
+ "name": "stderr",
638
+ "output_type": "stream",
639
+ "text": [
640
+ "BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From πŸ‘‰v4.50πŸ‘ˆ onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.\n",
641
+ " - If you're using `trust_remote_code=True`, you can get rid of this warning by loading the model with an auto class. See https://huggingface.co/docs/transformers/en/model_doc/auto#auto-classes\n",
642
+ " - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).\n",
643
+ " - If you are not the owner of the model architecture class, please contact the model code owner to update it.\n",
644
+ "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']\n",
645
+ "- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
646
+ "- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
647
+ ]
648
+ },
649
+ {
650
+ "name": "stdout",
651
+ "output_type": "stream",
652
+ "text": [
653
+ "['carpenter', 'lawyer', 'farmer', 'businessman', 'doctor']\n",
654
+ "['nurse', 'maid', 'teacher', 'waitress', 'prostitute']\n"
655
+ ]
656
+ }
657
+ ],
658
+ "source": [
659
+ "from transformers import pipeline\n",
660
+ "\n",
661
+ "unmasker = pipeline(\"fill-mask\", model=\"bert-base-uncased\")\n",
662
+ "result = unmasker(\"This man works as a [MASK].\")\n",
663
+ "print([r[\"token_str\"] for r in result])\n",
664
+ "\n",
665
+ "result = unmasker(\"This woman works as a [MASK].\")\n",
666
+ "print([r[\"token_str\"] for r in result])"
667
+ ]
668
+ }
669
+ ],
670
+ "metadata": {
671
+ "kernelspec": {
672
+ "display_name": "huggingface-nlp",
673
+ "language": "python",
674
+ "name": "python3"
675
+ },
676
+ "language_info": {
677
+ "codemirror_mode": {
678
+ "name": "ipython",
679
+ "version": 3
680
+ },
681
+ "file_extension": ".py",
682
+ "mimetype": "text/x-python",
683
+ "name": "python",
684
+ "nbconvert_exporter": "python",
685
+ "pygments_lexer": "ipython3",
686
+ "version": "3.10.16"
687
+ }
688
+ },
689
+ "nbformat": 4,
690
+ "nbformat_minor": 2
691
+ }
pages/1_🧠_Sentiment Analysis.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import streamlit as st
4
+ from torch.nn import Softmax
5
+ import plotly.graph_objects as go
6
+ from transformers import AutoConfig, AutoTokenizer
7
+ from transformers import AutoModelForSequenceClassification
8
+
9
+
10
+ st.set_page_config(
11
+ page_title="Sentiment Analysis",
12
+ page_icon="🧠")
13
+
14
+ st.write("# Sentiment Analysis")
15
+
16
+
17
+ MODEL = f"cardiffnlp/twitter-roberta-base-sentiment-latest"
18
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
19
+ config = AutoConfig.from_pretrained(MODEL)
20
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
21
+
22
+ user_input = st.text_input('What\'s in your mind?')
23
+
24
+ if st.button("Perform Sentiment Analysis"):
25
+ if not user_input:
26
+ st.warning("Please enter some text!")
27
+ else:
28
+ try:
29
+ st.write("## Sentiment Plot")
30
+ encoded_input = tokenizer(user_input, return_tensors='pt')
31
+ output = model(**encoded_input)
32
+ scores = output[0][0].detach().numpy()
33
+ softmax = Softmax(dim=1)
34
+ scores = softmax(torch.tensor([scores]))
35
+ scores = scores.numpy()[0]
36
+
37
+ categories = []
38
+ probabilities = []
39
+ ranking = np.argsort(scores)
40
+ ranking = ranking[::-1]
41
+ for i in range(scores.shape[0]):
42
+ categories.append(config.id2label[ranking[i]])
43
+ probabilities.append(np.round(float(scores[ranking[i]]), 4).tolist())
44
+
45
+ res = [[cat, sco] for cat,sco in zip(categories, probabilities)]
46
+ res.sort(key=lambda x: x[0], reverse=True)
47
+ probabilities = [i[1] for i in res]
48
+
49
+
50
+ # Create the bar chart
51
+ fig = go.Figure(data=[
52
+ go.Bar(
53
+ x=['Positive', 'Neutral', 'Negative'],
54
+ y=probabilities,
55
+ marker_color=['green', 'blue', 'red'], # Colors for each category
56
+ text=probabilities, # Show values on the bars
57
+ textposition='auto'
58
+ )
59
+ ])
60
+
61
+ # Customize layout
62
+ fig.update_layout(
63
+ # title="Sentiment Analysis Results",
64
+ xaxis_title="Sentiment Categories",
65
+ yaxis_title="Probability",
66
+ template="plotly_white"
67
+ )
68
+
69
+ # Show the figure
70
+
71
+ st.plotly_chart(fig, use_container_width=True)
72
+ except Exception as e:
73
+ st.error("An error occurred: " + str(e))
pages/2_πŸ“_Fill Mask.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import streamlit as st
3
+ from transformers import pipeline
4
+
5
+ st.set_page_config(
6
+ page_title="Fill Mask",
7
+ page_icon="πŸ“")
8
+
9
+ st.write("# Fill Mask")
10
+ unmasker = pipeline('fill-mask', model='bert-base-uncased')
11
+
12
+ st.write("Enter a sentence with a masked word using `[MASK]`.")
13
+ user_input = st.text_input("Input your sentence:", "The capital of France is [MASK].")
14
+
15
+ num_responses = st.slider("Select the number of predictions:", min_value=1, max_value=20, value=5)
16
+
17
+ if st.button("Generate Predictions"):
18
+ if "[MASK]" not in user_input:
19
+ st.error("Please include '[MASK]' in your input sentence.")
20
+ else:
21
+ try:
22
+ st.write("### Predictions:")
23
+ predictions = unmasker(user_input, top_k=num_responses)
24
+ for i, prediction in enumerate(predictions):
25
+ token = prediction['token_str']
26
+ score = prediction['score']
27
+ user_input_before,user_input_after = user_input.split("[MASK]")
28
+ user_input_with_token = user_input_before + "`" + token + "`"+ user_input_after
29
+ st.write(user_input_with_token)
30
+ except Exception as e:
31
+ st.error(f"An error occurred: {e}")
pages/3_πŸš€_Zero Shot Classification.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import streamlit as st
3
+ import plotly.graph_objects as go
4
+ from transformers import pipeline
5
+
6
+ st.set_page_config(
7
+ page_title="Fill Mask",
8
+ page_icon="πŸš€")
9
+
10
+ # App Title
11
+ st.title("Zero-Shot Text Classification")
12
+
13
+ # Initialize the zero-shot classification pipeline
14
+ zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
15
+
16
+ # Colors
17
+ colors = ['rgba(24, 203, 162, 1)', 'rgba(34, 180, 20, 1)', 'rgba(231, 110, 212, 1)', 'rgba(191, 206, 164, 1)', 'rgba(100, 233, 42, 1)',
18
+ 'rgba(185, 222, 92, 1)', 'rgba(27, 157, 138, 1)', 'rgba(212, 207, 155, 1)', 'rgba(172, 202, 164, 1)', 'rgba(47, 65, 177, 1)',
19
+ 'rgba(26, 44, 233, 1)', 'rgba(65, 242, 9, 1)', 'rgba(171, 50, 253, 1)', 'rgba(125, 201, 227, 1)', 'rgba(135, 196, 15, 1)',
20
+ 'rgba(114, 106, 242, 1)', 'rgba(176, 50, 34, 1)', 'rgba(100, 159, 247, 1)', 'rgba(246, 103, 72, 1)', 'rgba(180, 180, 5, 1)',
21
+ 'rgba(64, 29, 164, 1)', 'rgba(65, 192, 5, 1)', 'rgba(149, 97, 155, 1)', 'rgba(210, 2, 107, 1)', 'rgba(70, 203, 162, 1)',
22
+ 'rgba(68, 74, 64, 1)', 'rgba(164, 42, 173, 1)', 'rgba(220, 37, 239, 1)', 'rgba(76, 89, 84, 1)', 'rgba(29, 190, 84, 1)',
23
+ 'rgba(180, 35, 240, 1)', 'rgba(222, 72, 217, 1)', 'rgba(203, 80, 243, 1)', 'rgba(121, 164, 68, 1)', 'rgba(107, 218, 79, 1)',
24
+ 'rgba(152, 225, 65, 1)', 'rgba(57, 170, 43, 1)', 'rgba(77, 131, 61, 1)', 'rgba(145, 101, 161, 1)', 'rgba(115, 77, 3, 1)',
25
+ 'rgba(29, 159, 63, 1)', 'rgba(71, 105, 200, 1)', 'rgba(98, 78, 55, 1)', 'rgba(242, 159, 60, 1)', 'rgba(175, 67, 54, 1)',
26
+ 'rgba(120, 246, 81, 1)', 'rgba(216, 132, 219, 1)', 'rgba(82, 77, 251, 1)', 'rgba(213, 29, 120, 1)', 'rgba(252, 90, 31, 1)',
27
+ 'rgba(194, 181, 168, 1)', 'rgba(246, 60, 189, 1)', 'rgba(22, 50, 26, 1)', 'rgba(54, 11, 134, 1)', 'rgba(27, 103, 59, 1)',
28
+ 'rgba(234, 96, 187, 1)', 'rgba(167, 157, 215, 1)', 'rgba(104, 1, 252, 1)', 'rgba(76, 121, 131, 1)', 'rgba(65, 250, 218, 1)',
29
+ 'rgba(219, 59, 127, 1)', 'rgba(18, 242, 194, 1)', 'rgba(14, 132, 131, 1)', 'rgba(82, 68, 61, 1)', 'rgba(109, 229, 43, 1)',
30
+ 'rgba(202, 96, 66, 1)', 'rgba(216, 112, 64, 1)', 'rgba(101, 215, 114, 1)', 'rgba(85, 234, 109, 1)', 'rgba(17, 43, 113, 1)',
31
+ 'rgba(104, 132, 5, 1)', 'rgba(23, 177, 214, 1)', 'rgba(112, 131, 160, 1)', 'rgba(142, 43, 188, 1)', 'rgba(189, 61, 176, 1)',
32
+ 'rgba(196, 198, 61, 1)', 'rgba(253, 176, 165, 1)', 'rgba(113, 143, 126, 1)', 'rgba(122, 156, 220, 1)', 'rgba(221, 11, 29, 1)',
33
+ 'rgba(233, 200, 5, 1)', 'rgba(232, 176, 217, 1)', 'rgba(199, 6, 130, 1)', 'rgba(140, 118, 154, 1)', 'rgba(177, 46, 36, 1)',
34
+ 'rgba(244, 81, 66, 1)', 'rgba(94, 99, 24, 1)', 'rgba(159, 90, 50, 1)', 'rgba(67, 144, 236, 1)', 'rgba(78, 202, 143, 1)',
35
+ 'rgba(13, 116, 114, 1)', 'rgba(139, 194, 124, 1)', 'rgba(174, 63, 214, 1)', 'rgba(84, 114, 130, 1)', 'rgba(143, 208, 199, 1)',
36
+ 'rgba(27, 60, 225, 1)', 'rgba(69, 228, 28, 1)', 'rgba(167, 157, 10, 1)', 'rgba(61, 185, 55, 1)', 'rgba(143, 52, 233, 1)']
37
+
38
+ colors = np.array(colors)
39
+
40
+ # Input Section
41
+ st.write("Enter a sentence or text to classify and provide possible labels.")
42
+
43
+ user_input = st.text_input("Input your text:", "Streamlit is an amazing tool for building web apps.")
44
+ labels_input = st.text_input("Enter possible labels (comma-separated):", "technology, finance, health")
45
+
46
+ # Process and Display Results
47
+ if st.button("Classify Text"):
48
+ labels = [label.strip().title() for label in labels_input.split(",") if label.strip()]
49
+ if not user_input or not labels:
50
+ st.error("Please provide both text and at least one label.")
51
+ else:
52
+ try:
53
+ st.write("## Classification Results:")
54
+ probabilities = []
55
+ result = zero_shot(user_input, labels)
56
+
57
+ for label, score in zip(result['labels'], result['scores']):
58
+ probabilities.append(round(score, 2))
59
+
60
+ fig = go.Figure(data=[
61
+ go.Bar(
62
+ x=labels,
63
+ y=probabilities,
64
+ marker_color=np.random.choice(colors, len(labels)).tolist(), # Colors for each category
65
+ text=probabilities, # Show values on the bars
66
+ textposition='auto'
67
+ )
68
+ ])
69
+
70
+ # Customize layout
71
+ fig.update_layout(
72
+ # title="Sentiment Analysis Results",
73
+ xaxis_title="Label",
74
+ yaxis_title="Probability",
75
+ template="seaborn",
76
+ )
77
+
78
+ # Show the figure
79
+
80
+ st.plotly_chart(fig, use_container_width=True, theme=None)
81
+
82
+ except Exception as e:
83
+ st.error(f"An error occurred: {e}")
84
+
pages/4_❓_Question Answer.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+
4
+
5
+ st.set_page_config(
6
+ page_title="Question Answer",
7
+ page_icon="❓")
8
+
9
+ # App Name
10
+ st.write("# Question Answer")
11
+
12
+ # Model
13
+ qa_model = pipeline("question-answering", model="distilbert/distilbert-base-cased-distilled-squad")
14
+
15
+
16
+ st.write("Provide context and question.")
17
+
18
+ question = st.text_input("Enter your question:")
19
+ context = st.text_input("Enter the context:")
20
+
21
+ if st.button("Generate Answer"):
22
+ if not (question or context):
23
+ st.warning("Provide both question and context.")
24
+ else:
25
+ try:
26
+ st.write("## Answer")
27
+ ans = qa_model(question=question, context=context)
28
+ st.write(ans['answer'])
29
+ except Exception as e:
30
+ st.error(f"An error occurred: {e}")
31
+
pages/5_✍️_Text_Summarization.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+
4
+
5
+ st.set_page_config(
6
+ page_title="Question Answer",
7
+ page_icon="✍️")
8
+
9
+ st.write("# Text Summarization")
10
+
11
+ # Model
12
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
13
+
14
+ user_input = st.text_area("Enter text to summarize")
15
+
16
+ if st.button("Generate Predictions"):
17
+ try:
18
+ st.write("## Summary:")
19
+ generated_summary = summarizer(user_input)
20
+ st.write(generated_summary[0]["summary_text"])
21
+ except Exception as e:
22
+ st.error(f"An error occurred: {e}")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ streamlit
3
+ torch
4
+ plotly
🏠_Home.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import streamlit as st
3
+ from transformers import pipeline
4
+
5
+ st.set_page_config(
6
+ page_title="Transformers in Action",
7
+ page_icon="🏠",
8
+ )
9
+
10
+ st.sidebar.success("Select a Demo above.")
11
+
12
+ st.markdown(
13
+ """
14
+ # **Transformers in Action**
15
+ **Welcome to the Future of AI!**
16
+
17
+ Discover the incredible power of modern **Transformer models** and how they can revolutionize the way you approach everyday tasks. Whether you want to analyze sentiment, fill in missing text, or classify data with zero-shot precision, this interactive app provides a seamless playground to explore Hugging Face models in action.
18
+
19
+ ### **What Can You Do Here?**
20
+ 🧠 **Sentiment Analysis** - Understand emotions in text, from happiness to frustration.
21
+ πŸ“ **Fill Mask** - Predict missing words with precision using intelligent language models.
22
+ πŸš€ **Zero-Shot Classification** - Classify text into categories without pre-training.
23
+ ❓ **Question Answering** - Get instant answers to your queries with context-aware AI.
24
+ ✍️ **Text Summarization** - Condense lengthy content into concise summaries.
25
+
26
+ **Ready to experience the magic of AI?**
27
+ Pick a task from the left, explore, and bring your ideas to life!
28
+
29
+ """
30
+ )