File size: 63,341 Bytes
f176e31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "import pandas as pd\n",
    "import traceback"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chat_models import ChatOpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "KEY=os.getenv(\"OPENAI_API_KEY\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "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\n"
     ]
    }
   ],
   "source": [
    "print(KEY)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "llm=ChatOpenAI(openai_api_key=KEY,model_name=\"gpt-3.5-turbo\", temperature=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x00000209CF21F920>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x00000209CF240FB0>, temperature=0.5, openai_api_key='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', openai_proxy='')"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "llm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.chains import LLMChain\n",
    "from langchain.chains import SequentialChain\n",
    "from langchain.callbacks import get_openai_callback\n",
    "import PyPDF2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "RESPONSE_JSON = {\n",
    "    \"1\": {\n",
    "        \"mcq\": \"multiple choice question\",\n",
    "        \"options\": {\n",
    "            \"a\": \"choice here\",\n",
    "            \"b\": \"choice here\",\n",
    "            \"c\": \"choice here\",\n",
    "            \"d\": \"choice here\",\n",
    "        },\n",
    "        \"correct\": \"correct answer\",\n",
    "    },\n",
    "    \"2\": {\n",
    "        \"mcq\": \"multiple choice question\",\n",
    "        \"options\": {\n",
    "            \"a\": \"choice here\",\n",
    "            \"b\": \"choice here\",\n",
    "            \"c\": \"choice here\",\n",
    "            \"d\": \"choice here\",\n",
    "        },\n",
    "        \"correct\": \"correct answer\",\n",
    "    },\n",
    "    \"3\": {\n",
    "        \"mcq\": \"multiple choice question\",\n",
    "        \"options\": {\n",
    "            \"a\": \"choice here\",\n",
    "            \"b\": \"choice here\",\n",
    "            \"c\": \"choice here\",\n",
    "            \"d\": \"choice here\",\n",
    "        },\n",
    "        \"correct\": \"correct answer\",\n",
    "    },\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "TEMPLATE=\"\"\"\n",
    "Text:{text}\n",
    "You are an expert MCQ maker. Given the above text, it is your job to \\\n",
    "create a quiz  of {number} multiple choice questions for {subject} students in {tone} tone. \n",
    "Make sure the questions are not repeated and check all the questions to be conforming the text as well.\n",
    "Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. \\\n",
    "Ensure to make {number} MCQs\n",
    "### RESPONSE_JSON\n",
    "{response_json}\n",
    "\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz_generation_prompt = PromptTemplate(\n",
    "    input_variables=[\"text\", \"number\", \"subject\", \"tone\", \"response_json\"],\n",
    "    template=TEMPLATE\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key=\"quiz\", verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "TEMPLATE2=\"\"\"\n",
    "You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\\\n",
    "You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. \n",
    "if the quiz is not at per with the cognitive and analytical abilities of the students,\\\n",
    "update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities\n",
    "Quiz_MCQs:\n",
    "{quiz}\n",
    "\n",
    "Check from an expert English Writer of the above quiz:\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz_evaluation_prompt=PromptTemplate(input_variables=[\"subject\", \"quiz\"], template=TEMPLATE2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key=\"review\", verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=[\"text\", \"number\", \"subject\", \"tone\", \"response_json\"],\n",
    "                                        output_variables=[\"quiz\", \"review\"], verbose=True,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path=r\"C:\\Users\\student\\Downloads\\mcqgenerator\\data.txt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(file_path, 'r') as file:\n",
    "    TEXT = file.read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[9][10] The synonym self-teaching computers was also used in this time period.[11][12]\n",
      "\n",
      "Although the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[13] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[14] Hebb's model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[13] Other researchers who have studied human cognitive systems contributed to the modern machine learning technologies as well, including logician Walter Pitts and Warren McCulloch, who proposed the early mathematical models of neural networks to come up with algorithms that mirror human thought processes.[13]\n",
      "\n",
      "By the early 1960s an experimental \"learning machine\" with punched tape memory, called Cybertron, had been developed by Raytheon Company to analyze sonar signals, electrocardiograms, and speech patterns using rudimentary reinforcement learning. It was repetitively \"trained\" by a human operator/teacher to recognize patterns and equipped with a \"goof\" button to cause it to re-evaluate incorrect decisions.[15] A representative book on research into machine learning during the 1960s was Nilsson's book on Learning Machines, dealing mostly with machine learning for pattern classification.[16] Interest related to pattern recognition continued into the 1970s, as described by Duda and Hart in 1973.[17] In 1981 a report was given on using teaching strategies so that an artificial neural network learns to recognize 40 characters (26 letters, 10 digits, and 4 special symbols) from a computer terminal.[18]\n",
      "\n",
      "Tom M. Mitchell provided a widely quoted, more formal definition of the algorithms studied in the machine learning field: \"A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P if its performance at tasks in T, as measured by P, improves with experience E.\"[19] This definition of the tasks in which machine learning is concerned offers a fundamentally operational definition rather than defining the field in cognitive terms. This follows Alan Turing's proposal in his paper \"Computing Machinery and Intelligence\", in which the question \"Can machines think?\" is replaced with the question \"Can machines do what we (as thinking entities) can do?\".[20]\n",
      "\n",
      "Modern-day machine learning has two objectives. One is to classify data based on models which have been developed; the other purpose is to make predictions for future outcomes based on these models. A hypothetical algorithm specific to classifying data may use computer vision of moles coupled with supervised learning in order to train it to classify the cancerous moles. A machine learning algorithm for stock trading may inform the trader of future potential predictions.[21]\n",
      "\n",
      "Relationships to other fields\n",
      "Artificial intelligence\n",
      "\n",
      "Machine learning as subfield of AI[22]\n",
      "As a scientific endeavor, machine learning grew out of the quest for artificial intelligence (AI). In the early days of AI as an academic discipline, some researchers were interested in having machines learn from data. They attempted to approach the problem with various symbolic methods, as well as what were then termed \"neural networks\"; these were mostly perceptrons and other models that were later found to be reinventions of the generalized linear models of statistics.[23] Probabilistic reasoning was also employed, especially in automated medical diagnosis.[24]: 488 \n",
      "\n",
      "However, an increasing emphasis on the logical, knowledge-based approach caused a rift between AI and machine learning. Probabilistic systems were plagued by theoretical and practical problems of data acquisition and representation.[24]: 488  By 1980, expert systems had come to dominate AI, and statistics was out of favor.[25] Work on symbolic/knowledge-based learning did continue within AI, leading to inductive logic programming(ILP), but the more statistical line of research was now outside the field of AI proper, in pattern recognition and information retrieval.[24]: 708–710, 755  Neural networks research had been abandoned by AI and computer science around the same time. This line, too, was continued outside the AI/CS field, as \"connectionism\", by researchers from other disciplines including Hopfield, Rumelhart, and Hinton. Their main success came in the mid-1980s with the reinvention of backpropagation.[24]: 25 \n",
      "\n",
      "Machine learning (ML), reorganized and recognized as its own field, started to flourish in the 1990s. The field changed its goal from achieving artificial intelligence to tackling solvable problems of a practical nature. It shifted focus away from the symbolic approaches it had inherited from AI, and toward methods and models borrowed from statistics, fuzzy logic, and probability theory.[25]\n",
      "\n",
      "Data compression\n",
      "This section is an excerpt from Data compression § Machine learning.[edit]\n",
      "There is a close connection between machine learning and compression. A system that predicts the posterior probabilities of a sequence given its entire history can be used for optimal data compression (by using arithmetic coding on the output distribution). Conversely, an optimal compressor can be used for prediction (by finding the symbol that compresses best, given the previous history). This equivalence has been used as a justification for using data compression as a benchmark for \"general intelligence\".[26][27][28]\n",
      "\n",
      "An alternative view can show compression algorithms implicitly map strings into implicit feature space vectors, and compression-based similarity measures compute similarity within these feature spaces. For each compressor C(.) we define an associated vector space ℵ, such that C(.) maps an input string x, corresponding to the vector norm ||~x||. An exhaustive examination of the feature spaces underlying all compression algorithms is precluded by space; instead, feature vectors chooses to examine three representative lossless compression methods, LZW, LZ77, and PPM.[29]\n",
      "\n",
      "According to AIXI theory, a connection more directly explained in Hutter Prize, the best possible compression of x is the smallest possible software that generates x. For example, in that model, a zip file's compressed size includes both the zip file and the unzipping software, since you can not unzip it without both, but there may be an even smaller combined form.\n",
      "\n",
      "Examples of AI-powered audio/video compression software include VP9, NVIDIA Maxine, AIVC, AccMPEG.[30] Examples of software that can perform AI-powered image compression include OpenCV, TensorFlow, MATLAB's Image Processing Toolbox (IPT) and High-Fidelity Generative Image Compression.[31]\n",
      "\n",
      "In unsupervised machine learning, k-means clustering can be utilized to compress data by grouping similar data points into clusters. This technique simplifies handling extensive datasets that lack predefined labels and finds widespread use in fields such as image compression.[32] Large language models are also capable of lossless data compression.[33]\n",
      "Data mining\n",
      "Machine learning and data mining often employ the same methods and overlap significantly, but while machine learning focuses on prediction, based on known properties learned from the training data, data mining focuses on the discovery of (previously) unknown properties in the data (this is the analysis step of knowledge discovery in databases). Data mining uses many machine learning methods, but with different goals; on the other hand, machine learning also employs data mining methods as \"unsupervised learning\" or as a preprocessing step to improve learner accuracy. Much of the confusion between these two research communities (which do often have separate conferences and separate journals, ECML PKDD being a major exception) comes from the basic assumptions they work with: in machine learning, performance is usually evaluated with respect to the ability to reproduce known knowledge, while in knowledge discovery and data mining (KDD) the key task is the discovery of previously unknown knowledge. Evaluated with respect to known knowledge, an uninformed (unsupervised) method will easily be outperformed by other supervised methods, while in a typical KDD task, supervised methods cannot be used due to the unavailability of training data.\n",
      "\n",
      "Machine learning also has intimate ties to optimization: many learning problems are formulated as minimization of some loss function on a training set of examples. Loss functions express the discrepancy between the predictions of the model being trained and the actual problem instances (for example, in classification, one wants to assign a label to instances, and models are trained to correctly predict the pre-assigned labels of a set of examples).[34]\n",
      "\n",
      "Generalization\n",
      "The difference between optimization and machine learning arises from the goal of generalization: while optimization algorithms can minimize the loss on a training set, machine learning is concerned with minimizing the loss on unseen samples. Characterizing the generalization of various learning algorithms is an active topic of current research, especially for deep learning algorithms.\n",
      "\n",
      "Statistics\n",
      "Machine learning and statistics are closely related fields in terms of methods, but distinct in their principal goal: statistics draws population inferences from a sample, while machine learning finds generalizable predictive patterns.[35] According to Michael I. Jordan, the ideas of machine learning, from methodological principles to theoretical tools, have had a long pre-history in statistics.[36] He also suggested the term data science as a placeholder to call the overall field.[36]\n",
      "\n",
      "Conventional statistical analyses require the a priori selection of a model most suitable for the study data set. In addition, only significant or theoretically relevant variables based on previous experience are included for analysis. In contrast, machine learning is not built on a pre-structured model; rather, the data shape the model by detecting underlying patterns. The more variables (input) used to train the model, the more accurate the ultimate model will be.[37]\n",
      "\n",
      "Leo Breiman distinguished two statistical modeling paradigms: data model and algorithmic model,[38] wherein \"algorithmic model\" means more or less the machine learning algorithms like Random Forest.\n",
      "\n",
      "Some statisticians have adopted methods from machine learning, leading to a combined field that they call statistical learning.[39]\n",
      "\n",
      "Statistical physics\n",
      "Analytical and computational techniques derived from deep-rooted physics of disordered systems can be extended to large-scale problems, including machine learning, e.g., to analyze the weight space of deep neural networks.[40] Statistical physics is thus finding applications in the area of medical diagnostics.[41]\n",
      "\n",
      "Theory\n",
      "Main articles: Computational learning theory and Statistical learning theory\n",
      "A core objective of a learner is to generalize from its experience.[6][42] Generalization in this context is the ability of a learning machine to perform accurately on new, unseen examples/tasks after having experienced a learning data set. The training examples come from some generally unknown probability distribution (considered representative of the space of occurrences) and the learner has to build a general model about this space that enables it to produce sufficiently accurate predictions in new cases.\n",
      "\n",
      "The computational analysis of machine learning algorithms and their performance is a branch of theoretical computer science known as computational learning theory via the Probably Approximately Correct Learning (PAC) model. Because training sets are finite and the future is uncertain, learning theory usually does not yield guarantees of the performance of algorithms. Instead, probabilistic bounds on the performance are quite common. The bias–variance decomposition is one way to quantify generalization error.\n",
      "\n",
      "For the best performance in the context of generalization, the complexity of the hypothesis should match the complexity of the function underlying the data. If the hypothesis is less complex than the function, then the model has under fitted the data. If the complexity of the model is increased in response, then the training error decreases. But if the hypothesis is too complex, then the model is subject to overfitting and generalization will be poorer.[43]\n",
      "\n",
      "In addition to performance bounds, learning theorists study the time complexity and feasibility of learning. In computational learning theory, a computation is considered feasible if it can be done in polynomial time. There are two kinds of time complexity results: Positive results show that a certain class of functions can be learned in polynomial time. Negative results show that certain classes cannot be learned in polynomial time.\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(TEXT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'{\"1\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"2\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"3\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}}'"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Serialize the Python dictionary into a JSON-formatted string\n",
    "json.dumps(RESPONSE_JSON)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "NUMBER=5 \n",
    "SUBJECT=\"Machine learning\"\n",
    "TONE=\"simple\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\student\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python312\\site-packages\\langchain_core\\_api\\deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
      "  warn_deprecated(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new SequentialChain chain...\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
      "Prompt after formatting:\n",
      "\u001b[32;1m\u001b[1;3m\n",
      "Text:The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[9][10] The synonym self-teaching computers was also used in this time period.[11][12]\n",
      "\n",
      "Although the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[13] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[14] Hebb's model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[13] Other researchers who have studied human cognitive systems contributed to the modern machine learning technologies as well, including logician Walter Pitts and Warren McCulloch, who proposed the early mathematical models of neural networks to come up with algorithms that mirror human thought processes.[13]\n",
      "\n",
      "By the early 1960s an experimental \"learning machine\" with punched tape memory, called Cybertron, had been developed by Raytheon Company to analyze sonar signals, electrocardiograms, and speech patterns using rudimentary reinforcement learning. It was repetitively \"trained\" by a human operator/teacher to recognize patterns and equipped with a \"goof\" button to cause it to re-evaluate incorrect decisions.[15] A representative book on research into machine learning during the 1960s was Nilsson's book on Learning Machines, dealing mostly with machine learning for pattern classification.[16] Interest related to pattern recognition continued into the 1970s, as described by Duda and Hart in 1973.[17] In 1981 a report was given on using teaching strategies so that an artificial neural network learns to recognize 40 characters (26 letters, 10 digits, and 4 special symbols) from a computer terminal.[18]\n",
      "\n",
      "Tom M. Mitchell provided a widely quoted, more formal definition of the algorithms studied in the machine learning field: \"A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P if its performance at tasks in T, as measured by P, improves with experience E.\"[19] This definition of the tasks in which machine learning is concerned offers a fundamentally operational definition rather than defining the field in cognitive terms. This follows Alan Turing's proposal in his paper \"Computing Machinery and Intelligence\", in which the question \"Can machines think?\" is replaced with the question \"Can machines do what we (as thinking entities) can do?\".[20]\n",
      "\n",
      "Modern-day machine learning has two objectives. One is to classify data based on models which have been developed; the other purpose is to make predictions for future outcomes based on these models. A hypothetical algorithm specific to classifying data may use computer vision of moles coupled with supervised learning in order to train it to classify the cancerous moles. A machine learning algorithm for stock trading may inform the trader of future potential predictions.[21]\n",
      "\n",
      "Relationships to other fields\n",
      "Artificial intelligence\n",
      "\n",
      "Machine learning as subfield of AI[22]\n",
      "As a scientific endeavor, machine learning grew out of the quest for artificial intelligence (AI). In the early days of AI as an academic discipline, some researchers were interested in having machines learn from data. They attempted to approach the problem with various symbolic methods, as well as what were then termed \"neural networks\"; these were mostly perceptrons and other models that were later found to be reinventions of the generalized linear models of statistics.[23] Probabilistic reasoning was also employed, especially in automated medical diagnosis.[24]: 488 \n",
      "\n",
      "However, an increasing emphasis on the logical, knowledge-based approach caused a rift between AI and machine learning. Probabilistic systems were plagued by theoretical and practical problems of data acquisition and representation.[24]: 488  By 1980, expert systems had come to dominate AI, and statistics was out of favor.[25] Work on symbolic/knowledge-based learning did continue within AI, leading to inductive logic programming(ILP), but the more statistical line of research was now outside the field of AI proper, in pattern recognition and information retrieval.[24]: 708–710, 755  Neural networks research had been abandoned by AI and computer science around the same time. This line, too, was continued outside the AI/CS field, as \"connectionism\", by researchers from other disciplines including Hopfield, Rumelhart, and Hinton. Their main success came in the mid-1980s with the reinvention of backpropagation.[24]: 25 \n",
      "\n",
      "Machine learning (ML), reorganized and recognized as its own field, started to flourish in the 1990s. The field changed its goal from achieving artificial intelligence to tackling solvable problems of a practical nature. It shifted focus away from the symbolic approaches it had inherited from AI, and toward methods and models borrowed from statistics, fuzzy logic, and probability theory.[25]\n",
      "\n",
      "Data compression\n",
      "This section is an excerpt from Data compression § Machine learning.[edit]\n",
      "There is a close connection between machine learning and compression. A system that predicts the posterior probabilities of a sequence given its entire history can be used for optimal data compression (by using arithmetic coding on the output distribution). Conversely, an optimal compressor can be used for prediction (by finding the symbol that compresses best, given the previous history). This equivalence has been used as a justification for using data compression as a benchmark for \"general intelligence\".[26][27][28]\n",
      "\n",
      "An alternative view can show compression algorithms implicitly map strings into implicit feature space vectors, and compression-based similarity measures compute similarity within these feature spaces. For each compressor C(.) we define an associated vector space ℵ, such that C(.) maps an input string x, corresponding to the vector norm ||~x||. An exhaustive examination of the feature spaces underlying all compression algorithms is precluded by space; instead, feature vectors chooses to examine three representative lossless compression methods, LZW, LZ77, and PPM.[29]\n",
      "\n",
      "According to AIXI theory, a connection more directly explained in Hutter Prize, the best possible compression of x is the smallest possible software that generates x. For example, in that model, a zip file's compressed size includes both the zip file and the unzipping software, since you can not unzip it without both, but there may be an even smaller combined form.\n",
      "\n",
      "Examples of AI-powered audio/video compression software include VP9, NVIDIA Maxine, AIVC, AccMPEG.[30] Examples of software that can perform AI-powered image compression include OpenCV, TensorFlow, MATLAB's Image Processing Toolbox (IPT) and High-Fidelity Generative Image Compression.[31]\n",
      "\n",
      "In unsupervised machine learning, k-means clustering can be utilized to compress data by grouping similar data points into clusters. This technique simplifies handling extensive datasets that lack predefined labels and finds widespread use in fields such as image compression.[32] Large language models are also capable of lossless data compression.[33]\n",
      "Data mining\n",
      "Machine learning and data mining often employ the same methods and overlap significantly, but while machine learning focuses on prediction, based on known properties learned from the training data, data mining focuses on the discovery of (previously) unknown properties in the data (this is the analysis step of knowledge discovery in databases). Data mining uses many machine learning methods, but with different goals; on the other hand, machine learning also employs data mining methods as \"unsupervised learning\" or as a preprocessing step to improve learner accuracy. Much of the confusion between these two research communities (which do often have separate conferences and separate journals, ECML PKDD being a major exception) comes from the basic assumptions they work with: in machine learning, performance is usually evaluated with respect to the ability to reproduce known knowledge, while in knowledge discovery and data mining (KDD) the key task is the discovery of previously unknown knowledge. Evaluated with respect to known knowledge, an uninformed (unsupervised) method will easily be outperformed by other supervised methods, while in a typical KDD task, supervised methods cannot be used due to the unavailability of training data.\n",
      "\n",
      "Machine learning also has intimate ties to optimization: many learning problems are formulated as minimization of some loss function on a training set of examples. Loss functions express the discrepancy between the predictions of the model being trained and the actual problem instances (for example, in classification, one wants to assign a label to instances, and models are trained to correctly predict the pre-assigned labels of a set of examples).[34]\n",
      "\n",
      "Generalization\n",
      "The difference between optimization and machine learning arises from the goal of generalization: while optimization algorithms can minimize the loss on a training set, machine learning is concerned with minimizing the loss on unseen samples. Characterizing the generalization of various learning algorithms is an active topic of current research, especially for deep learning algorithms.\n",
      "\n",
      "Statistics\n",
      "Machine learning and statistics are closely related fields in terms of methods, but distinct in their principal goal: statistics draws population inferences from a sample, while machine learning finds generalizable predictive patterns.[35] According to Michael I. Jordan, the ideas of machine learning, from methodological principles to theoretical tools, have had a long pre-history in statistics.[36] He also suggested the term data science as a placeholder to call the overall field.[36]\n",
      "\n",
      "Conventional statistical analyses require the a priori selection of a model most suitable for the study data set. In addition, only significant or theoretically relevant variables based on previous experience are included for analysis. In contrast, machine learning is not built on a pre-structured model; rather, the data shape the model by detecting underlying patterns. The more variables (input) used to train the model, the more accurate the ultimate model will be.[37]\n",
      "\n",
      "Leo Breiman distinguished two statistical modeling paradigms: data model and algorithmic model,[38] wherein \"algorithmic model\" means more or less the machine learning algorithms like Random Forest.\n",
      "\n",
      "Some statisticians have adopted methods from machine learning, leading to a combined field that they call statistical learning.[39]\n",
      "\n",
      "Statistical physics\n",
      "Analytical and computational techniques derived from deep-rooted physics of disordered systems can be extended to large-scale problems, including machine learning, e.g., to analyze the weight space of deep neural networks.[40] Statistical physics is thus finding applications in the area of medical diagnostics.[41]\n",
      "\n",
      "Theory\n",
      "Main articles: Computational learning theory and Statistical learning theory\n",
      "A core objective of a learner is to generalize from its experience.[6][42] Generalization in this context is the ability of a learning machine to perform accurately on new, unseen examples/tasks after having experienced a learning data set. The training examples come from some generally unknown probability distribution (considered representative of the space of occurrences) and the learner has to build a general model about this space that enables it to produce sufficiently accurate predictions in new cases.\n",
      "\n",
      "The computational analysis of machine learning algorithms and their performance is a branch of theoretical computer science known as computational learning theory via the Probably Approximately Correct Learning (PAC) model. Because training sets are finite and the future is uncertain, learning theory usually does not yield guarantees of the performance of algorithms. Instead, probabilistic bounds on the performance are quite common. The bias–variance decomposition is one way to quantify generalization error.\n",
      "\n",
      "For the best performance in the context of generalization, the complexity of the hypothesis should match the complexity of the function underlying the data. If the hypothesis is less complex than the function, then the model has under fitted the data. If the complexity of the model is increased in response, then the training error decreases. But if the hypothesis is too complex, then the model is subject to overfitting and generalization will be poorer.[43]\n",
      "\n",
      "In addition to performance bounds, learning theorists study the time complexity and feasibility of learning. In computational learning theory, a computation is considered feasible if it can be done in polynomial time. There are two kinds of time complexity results: Positive results show that a certain class of functions can be learned in polynomial time. Negative results show that certain classes cannot be learned in polynomial time.\n",
      "\n",
      "\n",
      "You are an expert MCQ maker. Given the above text, it is your job to create a quiz  of 5 multiple choice questions for Machine learning students in simple tone. \n",
      "Make sure the questions are not repeated and check all the questions to be conforming the text as well.\n",
      "Make sure to format your response like  RESPONSE_JSON below  and use it as a guide. Ensure to make 5 MCQs\n",
      "### RESPONSE_JSON\n",
      "{\"1\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"2\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"3\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}}\n",
      "\n",
      "\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
      "Prompt after formatting:\n",
      "\u001b[32;1m\u001b[1;3m\n",
      "You are an expert english grammarian and writer. Given a Multiple Choice Quiz for Machine learning students.You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. \n",
      "if the quiz is not at per with the cognitive and analytical abilities of the students,update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities\n",
      "Quiz_MCQs:\n",
      "{\n",
      "\"1\": {\n",
      "\"mcq\": \"What term was coined in 1959 by Arthur Samuel to describe the field of computer gaming and artificial intelligence?\",\n",
      "\"options\": {\n",
      "\"a\": \"Artificial Intelligence\",\n",
      "\"b\": \"Machine Learning\",\n",
      "\"c\": \"Data Mining\",\n",
      "\"d\": \"Neural Networks\"\n",
      "},\n",
      "\"correct\": \"b\"\n",
      "},\n",
      "\"2\": {\n",
      "\"mcq\": \"What was the name of the 'learning machine' with punched tape memory developed by Raytheon Company in the early 1960s?\",\n",
      "\"options\": {\n",
      "\"a\": \"Cyberdyne\",\n",
      "\"b\": \"Cybernet\",\n",
      "\"c\": \"Cybertron\",\n",
      "\"d\": \"Cyberlink\"\n",
      "},\n",
      "\"correct\": \"c\"\n",
      "},\n",
      "\"3\": {\n",
      "\"mcq\": \"Which field focuses on population inferences from a sample, while the other finds generalizable predictive patterns?\",\n",
      "\"options\": {\n",
      "\"a\": \"Machine Learning\",\n",
      "\"b\": \"Data Mining\",\n",
      "\"c\": \"Statistics\",\n",
      "\"d\": \"Artificial Intelligence\"\n",
      "},\n",
      "\"correct\": \"c\"\n",
      "},\n",
      "\"4\": {\n",
      "\"mcq\": \"What is the main goal of machine learning in terms of generalization?\",\n",
      "\"options\": {\n",
      "\"a\": \"Minimize loss on training set\",\n",
      "\"b\": \"Minimize loss on unseen samples\",\n",
      "\"c\": \"Maximize loss on unseen samples\",\n",
      "\"d\": \"Maximize loss on training set\"\n",
      "},\n",
      "\"correct\": \"b\"\n",
      "},\n",
      "\"5\": {\n",
      "\"mcq\": \"What is the branch of theoretical computer science that deals with the computational analysis of machine learning algorithms?\",\n",
      "\"options\": {\n",
      "\"a\": \"Artificial Intelligence Theory\",\n",
      "\"b\": \"Data Mining Theory\",\n",
      "\"c\": \"Computational Learning Theory\",\n",
      "\"d\": \"Statistical Learning Theory\"\n",
      "},\n",
      "\"correct\": \"c\"\n",
      "}\n",
      "}\n",
      "\n",
      "Check from an expert English Writer of the above quiz:\n",
      "\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "#https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking\n",
    "\n",
    "#How to setup Token Usage Tracking in LangChain\n",
    "with get_openai_callback() as cb:\n",
    "    response=generate_evaluate_chain(\n",
    "        {\n",
    "            \"text\": TEXT,\n",
    "            \"number\": NUMBER,\n",
    "            \"subject\":SUBJECT,\n",
    "            \"tone\": TONE,\n",
    "            \"response_json\": json.dumps(RESPONSE_JSON)\n",
    "        }\n",
    "        )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total Tokens:3670\n",
      "Prompt Tokens:3257\n",
      "Completion Tokens:413\n",
      "Total Cost:0.005711500000000001\n"
     ]
    }
   ],
   "source": [
    "print(f\"Total Tokens:{cb.total_tokens}\")\n",
    "print(f\"Prompt Tokens:{cb.prompt_tokens}\")\n",
    "print(f\"Completion Tokens:{cb.completion_tokens}\")\n",
    "print(f\"Total Cost:{cb.total_cost}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'text': 'The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[9][10] The synonym self-teaching computers was also used in this time period.[11][12]\\n\\nAlthough the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[13] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[14] Hebb\\'s model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[13] Other researchers who have studied human cognitive systems contributed to the modern machine learning technologies as well, including logician Walter Pitts and Warren McCulloch, who proposed the early mathematical models of neural networks to come up with algorithms that mirror human thought processes.[13]\\n\\nBy the early 1960s an experimental \"learning machine\" with punched tape memory, called Cybertron, had been developed by Raytheon Company to analyze sonar signals, electrocardiograms, and speech patterns using rudimentary reinforcement learning. It was repetitively \"trained\" by a human operator/teacher to recognize patterns and equipped with a \"goof\" button to cause it to re-evaluate incorrect decisions.[15] A representative book on research into machine learning during the 1960s was Nilsson\\'s book on Learning Machines, dealing mostly with machine learning for pattern classification.[16] Interest related to pattern recognition continued into the 1970s, as described by Duda and Hart in 1973.[17] In 1981 a report was given on using teaching strategies so that an artificial neural network learns to recognize 40 characters (26 letters, 10 digits, and 4 special symbols) from a computer terminal.[18]\\n\\nTom M. Mitchell provided a widely quoted, more formal definition of the algorithms studied in the machine learning field: \"A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P if its performance at tasks in T, as measured by P, improves with experience E.\"[19] This definition of the tasks in which machine learning is concerned offers a fundamentally operational definition rather than defining the field in cognitive terms. This follows Alan Turing\\'s proposal in his paper \"Computing Machinery and Intelligence\", in which the question \"Can machines think?\" is replaced with the question \"Can machines do what we (as thinking entities) can do?\".[20]\\n\\nModern-day machine learning has two objectives. One is to classify data based on models which have been developed; the other purpose is to make predictions for future outcomes based on these models. A hypothetical algorithm specific to classifying data may use computer vision of moles coupled with supervised learning in order to train it to classify the cancerous moles. A machine learning algorithm for stock trading may inform the trader of future potential predictions.[21]\\n\\nRelationships to other fields\\nArtificial intelligence\\n\\nMachine learning as subfield of AI[22]\\nAs a scientific endeavor, machine learning grew out of the quest for artificial intelligence (AI). In the early days of AI as an academic discipline, some researchers were interested in having machines learn from data. They attempted to approach the problem with various symbolic methods, as well as what were then termed \"neural networks\"; these were mostly perceptrons and other models that were later found to be reinventions of the generalized linear models of statistics.[23] Probabilistic reasoning was also employed, especially in automated medical diagnosis.[24]: 488 \\n\\nHowever, an increasing emphasis on the logical, knowledge-based approach caused a rift between AI and machine learning. Probabilistic systems were plagued by theoretical and practical problems of data acquisition and representation.[24]: 488  By 1980, expert systems had come to dominate AI, and statistics was out of favor.[25] Work on symbolic/knowledge-based learning did continue within AI, leading to inductive logic programming(ILP), but the more statistical line of research was now outside the field of AI proper, in pattern recognition and information retrieval.[24]: 708–710, 755  Neural networks research had been abandoned by AI and computer science around the same time. This line, too, was continued outside the AI/CS field, as \"connectionism\", by researchers from other disciplines including Hopfield, Rumelhart, and Hinton. Their main success came in the mid-1980s with the reinvention of backpropagation.[24]: 25 \\n\\nMachine learning (ML), reorganized and recognized as its own field, started to flourish in the 1990s. The field changed its goal from achieving artificial intelligence to tackling solvable problems of a practical nature. It shifted focus away from the symbolic approaches it had inherited from AI, and toward methods and models borrowed from statistics, fuzzy logic, and probability theory.[25]\\n\\nData compression\\nThis section is an excerpt from Data compression § Machine learning.[edit]\\nThere is a close connection between machine learning and compression. A system that predicts the posterior probabilities of a sequence given its entire history can be used for optimal data compression (by using arithmetic coding on the output distribution). Conversely, an optimal compressor can be used for prediction (by finding the symbol that compresses best, given the previous history). This equivalence has been used as a justification for using data compression as a benchmark for \"general intelligence\".[26][27][28]\\n\\nAn alternative view can show compression algorithms implicitly map strings into implicit feature space vectors, and compression-based similarity measures compute similarity within these feature spaces. For each compressor C(.) we define an associated vector space ℵ, such that C(.) maps an input string x, corresponding to the vector norm ||~x||. An exhaustive examination of the feature spaces underlying all compression algorithms is precluded by space; instead, feature vectors chooses to examine three representative lossless compression methods, LZW, LZ77, and PPM.[29]\\n\\nAccording to AIXI theory, a connection more directly explained in Hutter Prize, the best possible compression of x is the smallest possible software that generates x. For example, in that model, a zip file\\'s compressed size includes both the zip file and the unzipping software, since you can not unzip it without both, but there may be an even smaller combined form.\\n\\nExamples of AI-powered audio/video compression software include VP9, NVIDIA Maxine, AIVC, AccMPEG.[30] Examples of software that can perform AI-powered image compression include OpenCV, TensorFlow, MATLAB\\'s Image Processing Toolbox (IPT) and High-Fidelity Generative Image Compression.[31]\\n\\nIn unsupervised machine learning, k-means clustering can be utilized to compress data by grouping similar data points into clusters. This technique simplifies handling extensive datasets that lack predefined labels and finds widespread use in fields such as image compression.[32] Large language models are also capable of lossless data compression.[33]\\nData mining\\nMachine learning and data mining often employ the same methods and overlap significantly, but while machine learning focuses on prediction, based on known properties learned from the training data, data mining focuses on the discovery of (previously) unknown properties in the data (this is the analysis step of knowledge discovery in databases). Data mining uses many machine learning methods, but with different goals; on the other hand, machine learning also employs data mining methods as \"unsupervised learning\" or as a preprocessing step to improve learner accuracy. Much of the confusion between these two research communities (which do often have separate conferences and separate journals, ECML PKDD being a major exception) comes from the basic assumptions they work with: in machine learning, performance is usually evaluated with respect to the ability to reproduce known knowledge, while in knowledge discovery and data mining (KDD) the key task is the discovery of previously unknown knowledge. Evaluated with respect to known knowledge, an uninformed (unsupervised) method will easily be outperformed by other supervised methods, while in a typical KDD task, supervised methods cannot be used due to the unavailability of training data.\\n\\nMachine learning also has intimate ties to optimization: many learning problems are formulated as minimization of some loss function on a training set of examples. Loss functions express the discrepancy between the predictions of the model being trained and the actual problem instances (for example, in classification, one wants to assign a label to instances, and models are trained to correctly predict the pre-assigned labels of a set of examples).[34]\\n\\nGeneralization\\nThe difference between optimization and machine learning arises from the goal of generalization: while optimization algorithms can minimize the loss on a training set, machine learning is concerned with minimizing the loss on unseen samples. Characterizing the generalization of various learning algorithms is an active topic of current research, especially for deep learning algorithms.\\n\\nStatistics\\nMachine learning and statistics are closely related fields in terms of methods, but distinct in their principal goal: statistics draws population inferences from a sample, while machine learning finds generalizable predictive patterns.[35] According to Michael I. Jordan, the ideas of machine learning, from methodological principles to theoretical tools, have had a long pre-history in statistics.[36] He also suggested the term data science as a placeholder to call the overall field.[36]\\n\\nConventional statistical analyses require the a priori selection of a model most suitable for the study data set. In addition, only significant or theoretically relevant variables based on previous experience are included for analysis. In contrast, machine learning is not built on a pre-structured model; rather, the data shape the model by detecting underlying patterns. The more variables (input) used to train the model, the more accurate the ultimate model will be.[37]\\n\\nLeo Breiman distinguished two statistical modeling paradigms: data model and algorithmic model,[38] wherein \"algorithmic model\" means more or less the machine learning algorithms like Random Forest.\\n\\nSome statisticians have adopted methods from machine learning, leading to a combined field that they call statistical learning.[39]\\n\\nStatistical physics\\nAnalytical and computational techniques derived from deep-rooted physics of disordered systems can be extended to large-scale problems, including machine learning, e.g., to analyze the weight space of deep neural networks.[40] Statistical physics is thus finding applications in the area of medical diagnostics.[41]\\n\\nTheory\\nMain articles: Computational learning theory and Statistical learning theory\\nA core objective of a learner is to generalize from its experience.[6][42] Generalization in this context is the ability of a learning machine to perform accurately on new, unseen examples/tasks after having experienced a learning data set. The training examples come from some generally unknown probability distribution (considered representative of the space of occurrences) and the learner has to build a general model about this space that enables it to produce sufficiently accurate predictions in new cases.\\n\\nThe computational analysis of machine learning algorithms and their performance is a branch of theoretical computer science known as computational learning theory via the Probably Approximately Correct Learning (PAC) model. Because training sets are finite and the future is uncertain, learning theory usually does not yield guarantees of the performance of algorithms. Instead, probabilistic bounds on the performance are quite common. The bias–variance decomposition is one way to quantify generalization error.\\n\\nFor the best performance in the context of generalization, the complexity of the hypothesis should match the complexity of the function underlying the data. If the hypothesis is less complex than the function, then the model has under fitted the data. If the complexity of the model is increased in response, then the training error decreases. But if the hypothesis is too complex, then the model is subject to overfitting and generalization will be poorer.[43]\\n\\nIn addition to performance bounds, learning theorists study the time complexity and feasibility of learning. In computational learning theory, a computation is considered feasible if it can be done in polynomial time. There are two kinds of time complexity results: Positive results show that a certain class of functions can be learned in polynomial time. Negative results show that certain classes cannot be learned in polynomial time.\\n\\n',\n",
       " 'number': 5,\n",
       " 'subject': 'Machine learning',\n",
       " 'tone': 'simple',\n",
       " 'response_json': '{\"1\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"2\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"3\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}}',\n",
       " 'quiz': '{\\n\"1\": {\\n\"mcq\": \"What term was coined in 1959 by Arthur Samuel to describe the field of computer gaming and artificial intelligence?\",\\n\"options\": {\\n\"a\": \"Artificial Intelligence\",\\n\"b\": \"Machine Learning\",\\n\"c\": \"Data Mining\",\\n\"d\": \"Neural Networks\"\\n},\\n\"correct\": \"b\"\\n},\\n\"2\": {\\n\"mcq\": \"What was the name of the \\'learning machine\\' with punched tape memory developed by Raytheon Company in the early 1960s?\",\\n\"options\": {\\n\"a\": \"Cyberdyne\",\\n\"b\": \"Cybernet\",\\n\"c\": \"Cybertron\",\\n\"d\": \"Cyberlink\"\\n},\\n\"correct\": \"c\"\\n},\\n\"3\": {\\n\"mcq\": \"Which field focuses on population inferences from a sample, while the other finds generalizable predictive patterns?\",\\n\"options\": {\\n\"a\": \"Machine Learning\",\\n\"b\": \"Data Mining\",\\n\"c\": \"Statistics\",\\n\"d\": \"Artificial Intelligence\"\\n},\\n\"correct\": \"c\"\\n},\\n\"4\": {\\n\"mcq\": \"What is the main goal of machine learning in terms of generalization?\",\\n\"options\": {\\n\"a\": \"Minimize loss on training set\",\\n\"b\": \"Minimize loss on unseen samples\",\\n\"c\": \"Maximize loss on unseen samples\",\\n\"d\": \"Maximize loss on training set\"\\n},\\n\"correct\": \"b\"\\n},\\n\"5\": {\\n\"mcq\": \"What is the branch of theoretical computer science that deals with the computational analysis of machine learning algorithms?\",\\n\"options\": {\\n\"a\": \"Artificial Intelligence Theory\",\\n\"b\": \"Data Mining Theory\",\\n\"c\": \"Computational Learning Theory\",\\n\"d\": \"Statistical Learning Theory\"\\n},\\n\"correct\": \"c\"\\n}\\n}',\n",
       " 'review': 'The complexity of the quiz questions is suitable for machine learning students. The questions cover key concepts in the field and require a good understanding of the terminology and principles. No changes are needed as the tone and difficulty level are appropriate for the target audience.'}"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz=response.get(\"quiz\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz=json.loads(quiz)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz_table_data = []\n",
    "for key, value in quiz.items():\n",
    "    mcq = value[\"mcq\"]\n",
    "    options = \" | \".join(\n",
    "        [\n",
    "            f\"{option}: {option_value}\"\n",
    "            for option, option_value in value[\"options\"].items()\n",
    "            ]\n",
    "        )\n",
    "    correct = value[\"correct\"]\n",
    "    quiz_table_data.append({\"MCQ\": mcq, \"Choices\": options, \"Correct\": correct})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'MCQ': 'What term was coined in 1959 by Arthur Samuel to describe the field of computer gaming and artificial intelligence?',\n",
       "  'Choices': 'a: Artificial Intelligence | b: Machine Learning | c: Data Mining | d: Neural Networks',\n",
       "  'Correct': 'b'},\n",
       " {'MCQ': \"What was the name of the 'learning machine' with punched tape memory developed by Raytheon Company in the early 1960s?\",\n",
       "  'Choices': 'a: Cyberdyne | b: Cybernet | c: Cybertron | d: Cyberlink',\n",
       "  'Correct': 'c'},\n",
       " {'MCQ': 'Which field focuses on population inferences from a sample, while the other finds generalizable predictive patterns?',\n",
       "  'Choices': 'a: Machine Learning | b: Data Mining | c: Statistics | d: Artificial Intelligence',\n",
       "  'Correct': 'c'},\n",
       " {'MCQ': 'What is the main goal of machine learning in terms of generalization?',\n",
       "  'Choices': 'a: Minimize loss on training set | b: Minimize loss on unseen samples | c: Maximize loss on unseen samples | d: Maximize loss on training set',\n",
       "  'Correct': 'b'},\n",
       " {'MCQ': 'What is the branch of theoretical computer science that deals with the computational analysis of machine learning algorithms?',\n",
       "  'Choices': 'a: Artificial Intelligence Theory | b: Data Mining Theory | c: Computational Learning Theory | d: Statistical Learning Theory',\n",
       "  'Correct': 'c'}]"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "quiz_table_data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz=pd.DataFrame(quiz_table_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "quiz.to_csv(\"machinelearning.csv\",index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'03_09_2024_11_59_14'"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "datetime.now().strftime('%m_%d_%Y_%H_%M_%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'03_11_2024_09_28_03'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "datetime.now().strftime('%m_%d_%Y_%H_%M_%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}