Spaces:
Sleeping
Sleeping
Upload 27 files
Browse files- experiment/machinelearning.csv +6 -0
- experiment/mcq.ipynb +761 -0
- logs/03_11_2024_09_36_10.log +0 -0
- logs/03_11_2024_09_41_37.log +1 -0
- logs/03_11_2024_11_53_51.log +0 -0
- logs/03_11_2024_12_29_49.log +0 -0
- logs/03_11_2024_12_31_09.log +0 -0
- logs/03_11_2024_12_32_21.log +0 -0
- logs/03_11_2024_12_32_57.log +0 -0
- logs/03_11_2024_12_33_22.log +1 -0
- logs/03_11_2024_12_37_49.log +0 -0
- logs/03_11_2024_12_38_02.log +2 -0
- mcqgenrator.egg-info/PKG-INFO +10 -0
- mcqgenrator.egg-info/SOURCES.txt +12 -0
- mcqgenrator.egg-info/dependency_links.txt +1 -0
- mcqgenrator.egg-info/requires.txt +5 -0
- mcqgenrator.egg-info/top_level.txt +1 -0
- src/__init__.py +0 -0
- src/__pycache__/__init__.cpython-312.pyc +0 -0
- src/mcqgenerator/__init__.py +0 -0
- src/mcqgenerator/__pycache__/__init__.cpython-312.pyc +0 -0
- src/mcqgenerator/__pycache__/logger.cpython-312.pyc +0 -0
- src/mcqgenerator/__pycache__/mcqgenerator.cpython-312.pyc +0 -0
- src/mcqgenerator/__pycache__/utilis.cpython-312.pyc +0 -0
- src/mcqgenerator/logger.py +19 -0
- src/mcqgenerator/mcqgenerator.py +66 -0
- src/mcqgenerator/utilis.py +39 -0
experiment/machinelearning.csv
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MCQ,Choices,Correct
|
2 |
+
What term was coined in 1959 by Arthur Samuel to describe the field of computer gaming and artificial intelligence?,a: Artificial Intelligence | b: Machine Learning | c: Data Mining | d: Neural Networks,b
|
3 |
+
What was the name of the 'learning machine' with punched tape memory developed by Raytheon Company in the early 1960s?,a: Cyberdyne | b: Cybernet | c: Cybertron | d: Cyberlink,c
|
4 |
+
"Which field focuses on population inferences from a sample, while the other finds generalizable predictive patterns?",a: Machine Learning | b: Data Mining | c: Statistics | d: Artificial Intelligence,c
|
5 |
+
What is the main goal of machine learning in terms of generalization?,a: Minimize loss on training set | b: Minimize loss on unseen samples | c: Maximize loss on unseen samples | d: Maximize loss on training set,b
|
6 |
+
What is the branch of theoretical computer science that deals with the computational analysis of machine learning algorithms?,a: Artificial Intelligence Theory | b: Data Mining Theory | c: Computational Learning Theory | d: Statistical Learning Theory,c
|
experiment/mcq.ipynb
ADDED
@@ -0,0 +1,761 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 3,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"import json\n",
|
11 |
+
"import pandas as pd\n",
|
12 |
+
"import traceback"
|
13 |
+
]
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"cell_type": "code",
|
17 |
+
"execution_count": 6,
|
18 |
+
"metadata": {},
|
19 |
+
"outputs": [],
|
20 |
+
"source": [
|
21 |
+
"from langchain.chat_models import ChatOpenAI"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": 17,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [
|
29 |
+
{
|
30 |
+
"data": {
|
31 |
+
"text/plain": [
|
32 |
+
"True"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
"execution_count": 17,
|
36 |
+
"metadata": {},
|
37 |
+
"output_type": "execute_result"
|
38 |
+
}
|
39 |
+
],
|
40 |
+
"source": [
|
41 |
+
"from dotenv import load_dotenv\n",
|
42 |
+
"\n",
|
43 |
+
"load_dotenv() "
|
44 |
+
]
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"cell_type": "code",
|
48 |
+
"execution_count": 18,
|
49 |
+
"metadata": {},
|
50 |
+
"outputs": [],
|
51 |
+
"source": [
|
52 |
+
"KEY=os.getenv(\"OPENAI_API_KEY\")"
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "code",
|
57 |
+
"execution_count": 19,
|
58 |
+
"metadata": {},
|
59 |
+
"outputs": [
|
60 |
+
{
|
61 |
+
"name": "stdout",
|
62 |
+
"output_type": "stream",
|
63 |
+
"text": [
|
64 |
+
"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\n"
|
65 |
+
]
|
66 |
+
}
|
67 |
+
],
|
68 |
+
"source": [
|
69 |
+
"print(KEY)"
|
70 |
+
]
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"cell_type": "code",
|
74 |
+
"execution_count": 22,
|
75 |
+
"metadata": {},
|
76 |
+
"outputs": [],
|
77 |
+
"source": [
|
78 |
+
"llm=ChatOpenAI(openai_api_key=KEY,model_name=\"gpt-3.5-turbo\", temperature=0.5)"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": 23,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [
|
86 |
+
{
|
87 |
+
"data": {
|
88 |
+
"text/plain": [
|
89 |
+
"ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x00000209CF21F920>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x00000209CF240FB0>, temperature=0.5, openai_api_key='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', openai_proxy='')"
|
90 |
+
]
|
91 |
+
},
|
92 |
+
"execution_count": 23,
|
93 |
+
"metadata": {},
|
94 |
+
"output_type": "execute_result"
|
95 |
+
}
|
96 |
+
],
|
97 |
+
"source": [
|
98 |
+
"llm"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"cell_type": "code",
|
103 |
+
"execution_count": 26,
|
104 |
+
"metadata": {},
|
105 |
+
"outputs": [],
|
106 |
+
"source": [
|
107 |
+
"from langchain.chat_models import ChatOpenAI\n",
|
108 |
+
"from langchain.prompts import PromptTemplate\n",
|
109 |
+
"from langchain.chains import LLMChain\n",
|
110 |
+
"from langchain.chains import SequentialChain\n",
|
111 |
+
"from langchain.callbacks import get_openai_callback\n",
|
112 |
+
"import PyPDF2"
|
113 |
+
]
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"cell_type": "code",
|
117 |
+
"execution_count": 27,
|
118 |
+
"metadata": {},
|
119 |
+
"outputs": [],
|
120 |
+
"source": [
|
121 |
+
"RESPONSE_JSON = {\n",
|
122 |
+
" \"1\": {\n",
|
123 |
+
" \"mcq\": \"multiple choice question\",\n",
|
124 |
+
" \"options\": {\n",
|
125 |
+
" \"a\": \"choice here\",\n",
|
126 |
+
" \"b\": \"choice here\",\n",
|
127 |
+
" \"c\": \"choice here\",\n",
|
128 |
+
" \"d\": \"choice here\",\n",
|
129 |
+
" },\n",
|
130 |
+
" \"correct\": \"correct answer\",\n",
|
131 |
+
" },\n",
|
132 |
+
" \"2\": {\n",
|
133 |
+
" \"mcq\": \"multiple choice question\",\n",
|
134 |
+
" \"options\": {\n",
|
135 |
+
" \"a\": \"choice here\",\n",
|
136 |
+
" \"b\": \"choice here\",\n",
|
137 |
+
" \"c\": \"choice here\",\n",
|
138 |
+
" \"d\": \"choice here\",\n",
|
139 |
+
" },\n",
|
140 |
+
" \"correct\": \"correct answer\",\n",
|
141 |
+
" },\n",
|
142 |
+
" \"3\": {\n",
|
143 |
+
" \"mcq\": \"multiple choice question\",\n",
|
144 |
+
" \"options\": {\n",
|
145 |
+
" \"a\": \"choice here\",\n",
|
146 |
+
" \"b\": \"choice here\",\n",
|
147 |
+
" \"c\": \"choice here\",\n",
|
148 |
+
" \"d\": \"choice here\",\n",
|
149 |
+
" },\n",
|
150 |
+
" \"correct\": \"correct answer\",\n",
|
151 |
+
" },\n",
|
152 |
+
"}"
|
153 |
+
]
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"cell_type": "code",
|
157 |
+
"execution_count": 28,
|
158 |
+
"metadata": {},
|
159 |
+
"outputs": [],
|
160 |
+
"source": [
|
161 |
+
"TEMPLATE=\"\"\"\n",
|
162 |
+
"Text:{text}\n",
|
163 |
+
"You are an expert MCQ maker. Given the above text, it is your job to \\\n",
|
164 |
+
"create a quiz of {number} multiple choice questions for {subject} students in {tone} tone. \n",
|
165 |
+
"Make sure the questions are not repeated and check all the questions to be conforming the text as well.\n",
|
166 |
+
"Make sure to format your response like RESPONSE_JSON below and use it as a guide. \\\n",
|
167 |
+
"Ensure to make {number} MCQs\n",
|
168 |
+
"### RESPONSE_JSON\n",
|
169 |
+
"{response_json}\n",
|
170 |
+
"\n",
|
171 |
+
"\"\"\""
|
172 |
+
]
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"cell_type": "code",
|
176 |
+
"execution_count": 29,
|
177 |
+
"metadata": {},
|
178 |
+
"outputs": [],
|
179 |
+
"source": [
|
180 |
+
"quiz_generation_prompt = PromptTemplate(\n",
|
181 |
+
" input_variables=[\"text\", \"number\", \"subject\", \"tone\", \"response_json\"],\n",
|
182 |
+
" template=TEMPLATE\n",
|
183 |
+
" )"
|
184 |
+
]
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"cell_type": "code",
|
188 |
+
"execution_count": 30,
|
189 |
+
"metadata": {},
|
190 |
+
"outputs": [],
|
191 |
+
"source": [
|
192 |
+
"quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key=\"quiz\", verbose=True)"
|
193 |
+
]
|
194 |
+
},
|
195 |
+
{
|
196 |
+
"cell_type": "code",
|
197 |
+
"execution_count": 31,
|
198 |
+
"metadata": {},
|
199 |
+
"outputs": [],
|
200 |
+
"source": [
|
201 |
+
"TEMPLATE2=\"\"\"\n",
|
202 |
+
"You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\\\n",
|
203 |
+
"You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. \n",
|
204 |
+
"if the quiz is not at per with the cognitive and analytical abilities of the students,\\\n",
|
205 |
+
"update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities\n",
|
206 |
+
"Quiz_MCQs:\n",
|
207 |
+
"{quiz}\n",
|
208 |
+
"\n",
|
209 |
+
"Check from an expert English Writer of the above quiz:\n",
|
210 |
+
"\"\"\""
|
211 |
+
]
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"cell_type": "code",
|
215 |
+
"execution_count": 34,
|
216 |
+
"metadata": {},
|
217 |
+
"outputs": [],
|
218 |
+
"source": [
|
219 |
+
"quiz_evaluation_prompt=PromptTemplate(input_variables=[\"subject\", \"quiz\"], template=TEMPLATE2)"
|
220 |
+
]
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"cell_type": "code",
|
224 |
+
"execution_count": 33,
|
225 |
+
"metadata": {},
|
226 |
+
"outputs": [],
|
227 |
+
"source": [
|
228 |
+
"review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key=\"review\", verbose=True)"
|
229 |
+
]
|
230 |
+
},
|
231 |
+
{
|
232 |
+
"cell_type": "code",
|
233 |
+
"execution_count": 35,
|
234 |
+
"metadata": {},
|
235 |
+
"outputs": [],
|
236 |
+
"source": [
|
237 |
+
"generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=[\"text\", \"number\", \"subject\", \"tone\", \"response_json\"],\n",
|
238 |
+
" output_variables=[\"quiz\", \"review\"], verbose=True,)"
|
239 |
+
]
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"cell_type": "code",
|
243 |
+
"execution_count": 36,
|
244 |
+
"metadata": {},
|
245 |
+
"outputs": [],
|
246 |
+
"source": [
|
247 |
+
"file_path=r\"C:\\Users\\student\\Downloads\\mcqgenerator\\data.txt\""
|
248 |
+
]
|
249 |
+
},
|
250 |
+
{
|
251 |
+
"cell_type": "code",
|
252 |
+
"execution_count": 37,
|
253 |
+
"metadata": {},
|
254 |
+
"outputs": [],
|
255 |
+
"source": [
|
256 |
+
"with open(file_path, 'r') as file:\n",
|
257 |
+
" TEXT = file.read()"
|
258 |
+
]
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"cell_type": "code",
|
262 |
+
"execution_count": 38,
|
263 |
+
"metadata": {},
|
264 |
+
"outputs": [
|
265 |
+
{
|
266 |
+
"name": "stdout",
|
267 |
+
"output_type": "stream",
|
268 |
+
"text": [
|
269 |
+
"The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[9][10] The synonym self-teaching computers was also used in this time period.[11][12]\n",
|
270 |
+
"\n",
|
271 |
+
"Although the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[13] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[14] Hebb's model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[13] Other researchers who have studied human cognitive systems contributed to the modern machine learning technologies as well, including logician Walter Pitts and Warren McCulloch, who proposed the early mathematical models of neural networks to come up with algorithms that mirror human thought processes.[13]\n",
|
272 |
+
"\n",
|
273 |
+
"By the early 1960s an experimental \"learning machine\" with punched tape memory, called Cybertron, had been developed by Raytheon Company to analyze sonar signals, electrocardiograms, and speech patterns using rudimentary reinforcement learning. It was repetitively \"trained\" by a human operator/teacher to recognize patterns and equipped with a \"goof\" button to cause it to re-evaluate incorrect decisions.[15] A representative book on research into machine learning during the 1960s was Nilsson's book on Learning Machines, dealing mostly with machine learning for pattern classification.[16] Interest related to pattern recognition continued into the 1970s, as described by Duda and Hart in 1973.[17] In 1981 a report was given on using teaching strategies so that an artificial neural network learns to recognize 40 characters (26 letters, 10 digits, and 4 special symbols) from a computer terminal.[18]\n",
|
274 |
+
"\n",
|
275 |
+
"Tom M. Mitchell provided a widely quoted, more formal definition of the algorithms studied in the machine learning field: \"A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P if its performance at tasks in T, as measured by P, improves with experience E.\"[19] This definition of the tasks in which machine learning is concerned offers a fundamentally operational definition rather than defining the field in cognitive terms. This follows Alan Turing's proposal in his paper \"Computing Machinery and Intelligence\", in which the question \"Can machines think?\" is replaced with the question \"Can machines do what we (as thinking entities) can do?\".[20]\n",
|
276 |
+
"\n",
|
277 |
+
"Modern-day machine learning has two objectives. One is to classify data based on models which have been developed; the other purpose is to make predictions for future outcomes based on these models. A hypothetical algorithm specific to classifying data may use computer vision of moles coupled with supervised learning in order to train it to classify the cancerous moles. A machine learning algorithm for stock trading may inform the trader of future potential predictions.[21]\n",
|
278 |
+
"\n",
|
279 |
+
"Relationships to other fields\n",
|
280 |
+
"Artificial intelligence\n",
|
281 |
+
"\n",
|
282 |
+
"Machine learning as subfield of AI[22]\n",
|
283 |
+
"As a scientific endeavor, machine learning grew out of the quest for artificial intelligence (AI). In the early days of AI as an academic discipline, some researchers were interested in having machines learn from data. They attempted to approach the problem with various symbolic methods, as well as what were then termed \"neural networks\"; these were mostly perceptrons and other models that were later found to be reinventions of the generalized linear models of statistics.[23] Probabilistic reasoning was also employed, especially in automated medical diagnosis.[24]: 488 \n",
|
284 |
+
"\n",
|
285 |
+
"However, an increasing emphasis on the logical, knowledge-based approach caused a rift between AI and machine learning. Probabilistic systems were plagued by theoretical and practical problems of data acquisition and representation.[24]: 488  By 1980, expert systems had come to dominate AI, and statistics was out of favor.[25] Work on symbolic/knowledge-based learning did continue within AI, leading to inductive logic programming(ILP), but the more statistical line of research was now outside the field of AI proper, in pattern recognition and information retrieval.[24]: 708–710, 755  Neural networks research had been abandoned by AI and computer science around the same time. This line, too, was continued outside the AI/CS field, as \"connectionism\", by researchers from other disciplines including Hopfield, Rumelhart, and Hinton. Their main success came in the mid-1980s with the reinvention of backpropagation.[24]: 25 \n",
|
286 |
+
"\n",
|
287 |
+
"Machine learning (ML), reorganized and recognized as its own field, started to flourish in the 1990s. The field changed its goal from achieving artificial intelligence to tackling solvable problems of a practical nature. It shifted focus away from the symbolic approaches it had inherited from AI, and toward methods and models borrowed from statistics, fuzzy logic, and probability theory.[25]\n",
|
288 |
+
"\n",
|
289 |
+
"Data compression\n",
|
290 |
+
"This section is an excerpt from Data compression § Machine learning.[edit]\n",
|
291 |
+
"There is a close connection between machine learning and compression. A system that predicts the posterior probabilities of a sequence given its entire history can be used for optimal data compression (by using arithmetic coding on the output distribution). Conversely, an optimal compressor can be used for prediction (by finding the symbol that compresses best, given the previous history). This equivalence has been used as a justification for using data compression as a benchmark for \"general intelligence\".[26][27][28]\n",
|
292 |
+
"\n",
|
293 |
+
"An alternative view can show compression algorithms implicitly map strings into implicit feature space vectors, and compression-based similarity measures compute similarity within these feature spaces. For each compressor C(.) we define an associated vector space ℵ, such that C(.) maps an input string x, corresponding to the vector norm ||~x||. An exhaustive examination of the feature spaces underlying all compression algorithms is precluded by space; instead, feature vectors chooses to examine three representative lossless compression methods, LZW, LZ77, and PPM.[29]\n",
|
294 |
+
"\n",
|
295 |
+
"According to AIXI theory, a connection more directly explained in Hutter Prize, the best possible compression of x is the smallest possible software that generates x. For example, in that model, a zip file's compressed size includes both the zip file and the unzipping software, since you can not unzip it without both, but there may be an even smaller combined form.\n",
|
296 |
+
"\n",
|
297 |
+
"Examples of AI-powered audio/video compression software include VP9, NVIDIA Maxine, AIVC, AccMPEG.[30] Examples of software that can perform AI-powered image compression include OpenCV, TensorFlow, MATLAB's Image Processing Toolbox (IPT) and High-Fidelity Generative Image Compression.[31]\n",
|
298 |
+
"\n",
|
299 |
+
"In unsupervised machine learning, k-means clustering can be utilized to compress data by grouping similar data points into clusters. This technique simplifies handling extensive datasets that lack predefined labels and finds widespread use in fields such as image compression.[32] Large language models are also capable of lossless data compression.[33]\n",
|
300 |
+
"Data mining\n",
|
301 |
+
"Machine learning and data mining often employ the same methods and overlap significantly, but while machine learning focuses on prediction, based on known properties learned from the training data, data mining focuses on the discovery of (previously) unknown properties in the data (this is the analysis step of knowledge discovery in databases). Data mining uses many machine learning methods, but with different goals; on the other hand, machine learning also employs data mining methods as \"unsupervised learning\" or as a preprocessing step to improve learner accuracy. Much of the confusion between these two research communities (which do often have separate conferences and separate journals, ECML PKDD being a major exception) comes from the basic assumptions they work with: in machine learning, performance is usually evaluated with respect to the ability to reproduce known knowledge, while in knowledge discovery and data mining (KDD) the key task is the discovery of previously unknown knowledge. Evaluated with respect to known knowledge, an uninformed (unsupervised) method will easily be outperformed by other supervised methods, while in a typical KDD task, supervised methods cannot be used due to the unavailability of training data.\n",
|
302 |
+
"\n",
|
303 |
+
"Machine learning also has intimate ties to optimization: many learning problems are formulated as minimization of some loss function on a training set of examples. Loss functions express the discrepancy between the predictions of the model being trained and the actual problem instances (for example, in classification, one wants to assign a label to instances, and models are trained to correctly predict the pre-assigned labels of a set of examples).[34]\n",
|
304 |
+
"\n",
|
305 |
+
"Generalization\n",
|
306 |
+
"The difference between optimization and machine learning arises from the goal of generalization: while optimization algorithms can minimize the loss on a training set, machine learning is concerned with minimizing the loss on unseen samples. Characterizing the generalization of various learning algorithms is an active topic of current research, especially for deep learning algorithms.\n",
|
307 |
+
"\n",
|
308 |
+
"Statistics\n",
|
309 |
+
"Machine learning and statistics are closely related fields in terms of methods, but distinct in their principal goal: statistics draws population inferences from a sample, while machine learning finds generalizable predictive patterns.[35] According to Michael I. Jordan, the ideas of machine learning, from methodological principles to theoretical tools, have had a long pre-history in statistics.[36] He also suggested the term data science as a placeholder to call the overall field.[36]\n",
|
310 |
+
"\n",
|
311 |
+
"Conventional statistical analyses require the a priori selection of a model most suitable for the study data set. In addition, only significant or theoretically relevant variables based on previous experience are included for analysis. In contrast, machine learning is not built on a pre-structured model; rather, the data shape the model by detecting underlying patterns. The more variables (input) used to train the model, the more accurate the ultimate model will be.[37]\n",
|
312 |
+
"\n",
|
313 |
+
"Leo Breiman distinguished two statistical modeling paradigms: data model and algorithmic model,[38] wherein \"algorithmic model\" means more or less the machine learning algorithms like Random Forest.\n",
|
314 |
+
"\n",
|
315 |
+
"Some statisticians have adopted methods from machine learning, leading to a combined field that they call statistical learning.[39]\n",
|
316 |
+
"\n",
|
317 |
+
"Statistical physics\n",
|
318 |
+
"Analytical and computational techniques derived from deep-rooted physics of disordered systems can be extended to large-scale problems, including machine learning, e.g., to analyze the weight space of deep neural networks.[40] Statistical physics is thus finding applications in the area of medical diagnostics.[41]\n",
|
319 |
+
"\n",
|
320 |
+
"Theory\n",
|
321 |
+
"Main articles: Computational learning theory and Statistical learning theory\n",
|
322 |
+
"A core objective of a learner is to generalize from its experience.[6][42] Generalization in this context is the ability of a learning machine to perform accurately on new, unseen examples/tasks after having experienced a learning data set. The training examples come from some generally unknown probability distribution (considered representative of the space of occurrences) and the learner has to build a general model about this space that enables it to produce sufficiently accurate predictions in new cases.\n",
|
323 |
+
"\n",
|
324 |
+
"The computational analysis of machine learning algorithms and their performance is a branch of theoretical computer science known as computational learning theory via the Probably Approximately Correct Learning (PAC) model. Because training sets are finite and the future is uncertain, learning theory usually does not yield guarantees of the performance of algorithms. Instead, probabilistic bounds on the performance are quite common. The bias–variance decomposition is one way to quantify generalization error.\n",
|
325 |
+
"\n",
|
326 |
+
"For the best performance in the context of generalization, the complexity of the hypothesis should match the complexity of the function underlying the data. If the hypothesis is less complex than the function, then the model has under fitted the data. If the complexity of the model is increased in response, then the training error decreases. But if the hypothesis is too complex, then the model is subject to overfitting and generalization will be poorer.[43]\n",
|
327 |
+
"\n",
|
328 |
+
"In addition to performance bounds, learning theorists study the time complexity and feasibility of learning. In computational learning theory, a computation is considered feasible if it can be done in polynomial time. There are two kinds of time complexity results: Positive results show that a certain class of functions can be learned in polynomial time. Negative results show that certain classes cannot be learned in polynomial time.\n",
|
329 |
+
"\n",
|
330 |
+
"\n"
|
331 |
+
]
|
332 |
+
}
|
333 |
+
],
|
334 |
+
"source": [
|
335 |
+
"print(TEXT)"
|
336 |
+
]
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"cell_type": "code",
|
340 |
+
"execution_count": 39,
|
341 |
+
"metadata": {},
|
342 |
+
"outputs": [
|
343 |
+
{
|
344 |
+
"data": {
|
345 |
+
"text/plain": [
|
346 |
+
"'{\"1\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"2\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"3\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}}'"
|
347 |
+
]
|
348 |
+
},
|
349 |
+
"execution_count": 39,
|
350 |
+
"metadata": {},
|
351 |
+
"output_type": "execute_result"
|
352 |
+
}
|
353 |
+
],
|
354 |
+
"source": [
|
355 |
+
"# Serialize the Python dictionary into a JSON-formatted string\n",
|
356 |
+
"json.dumps(RESPONSE_JSON)"
|
357 |
+
]
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"cell_type": "code",
|
361 |
+
"execution_count": 40,
|
362 |
+
"metadata": {},
|
363 |
+
"outputs": [],
|
364 |
+
"source": [
|
365 |
+
"NUMBER=5 \n",
|
366 |
+
"SUBJECT=\"Machine learning\"\n",
|
367 |
+
"TONE=\"simple\""
|
368 |
+
]
|
369 |
+
},
|
370 |
+
{
|
371 |
+
"cell_type": "code",
|
372 |
+
"execution_count": 41,
|
373 |
+
"metadata": {},
|
374 |
+
"outputs": [
|
375 |
+
{
|
376 |
+
"name": "stderr",
|
377 |
+
"output_type": "stream",
|
378 |
+
"text": [
|
379 |
+
"C:\\Users\\student\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python312\\site-packages\\langchain_core\\_api\\deprecation.py:117: LangChainDeprecationWarning: The function `__call__` was deprecated in LangChain 0.1.0 and will be removed in 0.2.0. Use invoke instead.\n",
|
380 |
+
" warn_deprecated(\n"
|
381 |
+
]
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"name": "stdout",
|
385 |
+
"output_type": "stream",
|
386 |
+
"text": [
|
387 |
+
"\n",
|
388 |
+
"\n",
|
389 |
+
"\u001b[1m> Entering new SequentialChain chain...\u001b[0m\n",
|
390 |
+
"\n",
|
391 |
+
"\n",
|
392 |
+
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
393 |
+
"Prompt after formatting:\n",
|
394 |
+
"\u001b[32;1m\u001b[1;3m\n",
|
395 |
+
"Text:The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[9][10] The synonym self-teaching computers was also used in this time period.[11][12]\n",
|
396 |
+
"\n",
|
397 |
+
"Although the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[13] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[14] Hebb's model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[13] Other researchers who have studied human cognitive systems contributed to the modern machine learning technologies as well, including logician Walter Pitts and Warren McCulloch, who proposed the early mathematical models of neural networks to come up with algorithms that mirror human thought processes.[13]\n",
|
398 |
+
"\n",
|
399 |
+
"By the early 1960s an experimental \"learning machine\" with punched tape memory, called Cybertron, had been developed by Raytheon Company to analyze sonar signals, electrocardiograms, and speech patterns using rudimentary reinforcement learning. It was repetitively \"trained\" by a human operator/teacher to recognize patterns and equipped with a \"goof\" button to cause it to re-evaluate incorrect decisions.[15] A representative book on research into machine learning during the 1960s was Nilsson's book on Learning Machines, dealing mostly with machine learning for pattern classification.[16] Interest related to pattern recognition continued into the 1970s, as described by Duda and Hart in 1973.[17] In 1981 a report was given on using teaching strategies so that an artificial neural network learns to recognize 40 characters (26 letters, 10 digits, and 4 special symbols) from a computer terminal.[18]\n",
|
400 |
+
"\n",
|
401 |
+
"Tom M. Mitchell provided a widely quoted, more formal definition of the algorithms studied in the machine learning field: \"A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P if its performance at tasks in T, as measured by P, improves with experience E.\"[19] This definition of the tasks in which machine learning is concerned offers a fundamentally operational definition rather than defining the field in cognitive terms. This follows Alan Turing's proposal in his paper \"Computing Machinery and Intelligence\", in which the question \"Can machines think?\" is replaced with the question \"Can machines do what we (as thinking entities) can do?\".[20]\n",
|
402 |
+
"\n",
|
403 |
+
"Modern-day machine learning has two objectives. One is to classify data based on models which have been developed; the other purpose is to make predictions for future outcomes based on these models. A hypothetical algorithm specific to classifying data may use computer vision of moles coupled with supervised learning in order to train it to classify the cancerous moles. A machine learning algorithm for stock trading may inform the trader of future potential predictions.[21]\n",
|
404 |
+
"\n",
|
405 |
+
"Relationships to other fields\n",
|
406 |
+
"Artificial intelligence\n",
|
407 |
+
"\n",
|
408 |
+
"Machine learning as subfield of AI[22]\n",
|
409 |
+
"As a scientific endeavor, machine learning grew out of the quest for artificial intelligence (AI). In the early days of AI as an academic discipline, some researchers were interested in having machines learn from data. They attempted to approach the problem with various symbolic methods, as well as what were then termed \"neural networks\"; these were mostly perceptrons and other models that were later found to be reinventions of the generalized linear models of statistics.[23] Probabilistic reasoning was also employed, especially in automated medical diagnosis.[24]: 488 \n",
|
410 |
+
"\n",
|
411 |
+
"However, an increasing emphasis on the logical, knowledge-based approach caused a rift between AI and machine learning. Probabilistic systems were plagued by theoretical and practical problems of data acquisition and representation.[24]: 488  By 1980, expert systems had come to dominate AI, and statistics was out of favor.[25] Work on symbolic/knowledge-based learning did continue within AI, leading to inductive logic programming(ILP), but the more statistical line of research was now outside the field of AI proper, in pattern recognition and information retrieval.[24]: 708–710, 755  Neural networks research had been abandoned by AI and computer science around the same time. This line, too, was continued outside the AI/CS field, as \"connectionism\", by researchers from other disciplines including Hopfield, Rumelhart, and Hinton. Their main success came in the mid-1980s with the reinvention of backpropagation.[24]: 25 \n",
|
412 |
+
"\n",
|
413 |
+
"Machine learning (ML), reorganized and recognized as its own field, started to flourish in the 1990s. The field changed its goal from achieving artificial intelligence to tackling solvable problems of a practical nature. It shifted focus away from the symbolic approaches it had inherited from AI, and toward methods and models borrowed from statistics, fuzzy logic, and probability theory.[25]\n",
|
414 |
+
"\n",
|
415 |
+
"Data compression\n",
|
416 |
+
"This section is an excerpt from Data compression § Machine learning.[edit]\n",
|
417 |
+
"There is a close connection between machine learning and compression. A system that predicts the posterior probabilities of a sequence given its entire history can be used for optimal data compression (by using arithmetic coding on the output distribution). Conversely, an optimal compressor can be used for prediction (by finding the symbol that compresses best, given the previous history). This equivalence has been used as a justification for using data compression as a benchmark for \"general intelligence\".[26][27][28]\n",
|
418 |
+
"\n",
|
419 |
+
"An alternative view can show compression algorithms implicitly map strings into implicit feature space vectors, and compression-based similarity measures compute similarity within these feature spaces. For each compressor C(.) we define an associated vector space ℵ, such that C(.) maps an input string x, corresponding to the vector norm ||~x||. An exhaustive examination of the feature spaces underlying all compression algorithms is precluded by space; instead, feature vectors chooses to examine three representative lossless compression methods, LZW, LZ77, and PPM.[29]\n",
|
420 |
+
"\n",
|
421 |
+
"According to AIXI theory, a connection more directly explained in Hutter Prize, the best possible compression of x is the smallest possible software that generates x. For example, in that model, a zip file's compressed size includes both the zip file and the unzipping software, since you can not unzip it without both, but there may be an even smaller combined form.\n",
|
422 |
+
"\n",
|
423 |
+
"Examples of AI-powered audio/video compression software include VP9, NVIDIA Maxine, AIVC, AccMPEG.[30] Examples of software that can perform AI-powered image compression include OpenCV, TensorFlow, MATLAB's Image Processing Toolbox (IPT) and High-Fidelity Generative Image Compression.[31]\n",
|
424 |
+
"\n",
|
425 |
+
"In unsupervised machine learning, k-means clustering can be utilized to compress data by grouping similar data points into clusters. This technique simplifies handling extensive datasets that lack predefined labels and finds widespread use in fields such as image compression.[32] Large language models are also capable of lossless data compression.[33]\n",
|
426 |
+
"Data mining\n",
|
427 |
+
"Machine learning and data mining often employ the same methods and overlap significantly, but while machine learning focuses on prediction, based on known properties learned from the training data, data mining focuses on the discovery of (previously) unknown properties in the data (this is the analysis step of knowledge discovery in databases). Data mining uses many machine learning methods, but with different goals; on the other hand, machine learning also employs data mining methods as \"unsupervised learning\" or as a preprocessing step to improve learner accuracy. Much of the confusion between these two research communities (which do often have separate conferences and separate journals, ECML PKDD being a major exception) comes from the basic assumptions they work with: in machine learning, performance is usually evaluated with respect to the ability to reproduce known knowledge, while in knowledge discovery and data mining (KDD) the key task is the discovery of previously unknown knowledge. Evaluated with respect to known knowledge, an uninformed (unsupervised) method will easily be outperformed by other supervised methods, while in a typical KDD task, supervised methods cannot be used due to the unavailability of training data.\n",
|
428 |
+
"\n",
|
429 |
+
"Machine learning also has intimate ties to optimization: many learning problems are formulated as minimization of some loss function on a training set of examples. Loss functions express the discrepancy between the predictions of the model being trained and the actual problem instances (for example, in classification, one wants to assign a label to instances, and models are trained to correctly predict the pre-assigned labels of a set of examples).[34]\n",
|
430 |
+
"\n",
|
431 |
+
"Generalization\n",
|
432 |
+
"The difference between optimization and machine learning arises from the goal of generalization: while optimization algorithms can minimize the loss on a training set, machine learning is concerned with minimizing the loss on unseen samples. Characterizing the generalization of various learning algorithms is an active topic of current research, especially for deep learning algorithms.\n",
|
433 |
+
"\n",
|
434 |
+
"Statistics\n",
|
435 |
+
"Machine learning and statistics are closely related fields in terms of methods, but distinct in their principal goal: statistics draws population inferences from a sample, while machine learning finds generalizable predictive patterns.[35] According to Michael I. Jordan, the ideas of machine learning, from methodological principles to theoretical tools, have had a long pre-history in statistics.[36] He also suggested the term data science as a placeholder to call the overall field.[36]\n",
|
436 |
+
"\n",
|
437 |
+
"Conventional statistical analyses require the a priori selection of a model most suitable for the study data set. In addition, only significant or theoretically relevant variables based on previous experience are included for analysis. In contrast, machine learning is not built on a pre-structured model; rather, the data shape the model by detecting underlying patterns. The more variables (input) used to train the model, the more accurate the ultimate model will be.[37]\n",
|
438 |
+
"\n",
|
439 |
+
"Leo Breiman distinguished two statistical modeling paradigms: data model and algorithmic model,[38] wherein \"algorithmic model\" means more or less the machine learning algorithms like Random Forest.\n",
|
440 |
+
"\n",
|
441 |
+
"Some statisticians have adopted methods from machine learning, leading to a combined field that they call statistical learning.[39]\n",
|
442 |
+
"\n",
|
443 |
+
"Statistical physics\n",
|
444 |
+
"Analytical and computational techniques derived from deep-rooted physics of disordered systems can be extended to large-scale problems, including machine learning, e.g., to analyze the weight space of deep neural networks.[40] Statistical physics is thus finding applications in the area of medical diagnostics.[41]\n",
|
445 |
+
"\n",
|
446 |
+
"Theory\n",
|
447 |
+
"Main articles: Computational learning theory and Statistical learning theory\n",
|
448 |
+
"A core objective of a learner is to generalize from its experience.[6][42] Generalization in this context is the ability of a learning machine to perform accurately on new, unseen examples/tasks after having experienced a learning data set. The training examples come from some generally unknown probability distribution (considered representative of the space of occurrences) and the learner has to build a general model about this space that enables it to produce sufficiently accurate predictions in new cases.\n",
|
449 |
+
"\n",
|
450 |
+
"The computational analysis of machine learning algorithms and their performance is a branch of theoretical computer science known as computational learning theory via the Probably Approximately Correct Learning (PAC) model. Because training sets are finite and the future is uncertain, learning theory usually does not yield guarantees of the performance of algorithms. Instead, probabilistic bounds on the performance are quite common. The bias–variance decomposition is one way to quantify generalization error.\n",
|
451 |
+
"\n",
|
452 |
+
"For the best performance in the context of generalization, the complexity of the hypothesis should match the complexity of the function underlying the data. If the hypothesis is less complex than the function, then the model has under fitted the data. If the complexity of the model is increased in response, then the training error decreases. But if the hypothesis is too complex, then the model is subject to overfitting and generalization will be poorer.[43]\n",
|
453 |
+
"\n",
|
454 |
+
"In addition to performance bounds, learning theorists study the time complexity and feasibility of learning. In computational learning theory, a computation is considered feasible if it can be done in polynomial time. There are two kinds of time complexity results: Positive results show that a certain class of functions can be learned in polynomial time. Negative results show that certain classes cannot be learned in polynomial time.\n",
|
455 |
+
"\n",
|
456 |
+
"\n",
|
457 |
+
"You are an expert MCQ maker. Given the above text, it is your job to create a quiz of 5 multiple choice questions for Machine learning students in simple tone. \n",
|
458 |
+
"Make sure the questions are not repeated and check all the questions to be conforming the text as well.\n",
|
459 |
+
"Make sure to format your response like RESPONSE_JSON below and use it as a guide. Ensure to make 5 MCQs\n",
|
460 |
+
"### RESPONSE_JSON\n",
|
461 |
+
"{\"1\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"2\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"3\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}}\n",
|
462 |
+
"\n",
|
463 |
+
"\u001b[0m\n",
|
464 |
+
"\n",
|
465 |
+
"\u001b[1m> Finished chain.\u001b[0m\n",
|
466 |
+
"\n",
|
467 |
+
"\n",
|
468 |
+
"\u001b[1m> Entering new LLMChain chain...\u001b[0m\n",
|
469 |
+
"Prompt after formatting:\n",
|
470 |
+
"\u001b[32;1m\u001b[1;3m\n",
|
471 |
+
"You are an expert english grammarian and writer. Given a Multiple Choice Quiz for Machine learning students.You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis. \n",
|
472 |
+
"if the quiz is not at per with the cognitive and analytical abilities of the students,update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities\n",
|
473 |
+
"Quiz_MCQs:\n",
|
474 |
+
"{\n",
|
475 |
+
"\"1\": {\n",
|
476 |
+
"\"mcq\": \"What term was coined in 1959 by Arthur Samuel to describe the field of computer gaming and artificial intelligence?\",\n",
|
477 |
+
"\"options\": {\n",
|
478 |
+
"\"a\": \"Artificial Intelligence\",\n",
|
479 |
+
"\"b\": \"Machine Learning\",\n",
|
480 |
+
"\"c\": \"Data Mining\",\n",
|
481 |
+
"\"d\": \"Neural Networks\"\n",
|
482 |
+
"},\n",
|
483 |
+
"\"correct\": \"b\"\n",
|
484 |
+
"},\n",
|
485 |
+
"\"2\": {\n",
|
486 |
+
"\"mcq\": \"What was the name of the 'learning machine' with punched tape memory developed by Raytheon Company in the early 1960s?\",\n",
|
487 |
+
"\"options\": {\n",
|
488 |
+
"\"a\": \"Cyberdyne\",\n",
|
489 |
+
"\"b\": \"Cybernet\",\n",
|
490 |
+
"\"c\": \"Cybertron\",\n",
|
491 |
+
"\"d\": \"Cyberlink\"\n",
|
492 |
+
"},\n",
|
493 |
+
"\"correct\": \"c\"\n",
|
494 |
+
"},\n",
|
495 |
+
"\"3\": {\n",
|
496 |
+
"\"mcq\": \"Which field focuses on population inferences from a sample, while the other finds generalizable predictive patterns?\",\n",
|
497 |
+
"\"options\": {\n",
|
498 |
+
"\"a\": \"Machine Learning\",\n",
|
499 |
+
"\"b\": \"Data Mining\",\n",
|
500 |
+
"\"c\": \"Statistics\",\n",
|
501 |
+
"\"d\": \"Artificial Intelligence\"\n",
|
502 |
+
"},\n",
|
503 |
+
"\"correct\": \"c\"\n",
|
504 |
+
"},\n",
|
505 |
+
"\"4\": {\n",
|
506 |
+
"\"mcq\": \"What is the main goal of machine learning in terms of generalization?\",\n",
|
507 |
+
"\"options\": {\n",
|
508 |
+
"\"a\": \"Minimize loss on training set\",\n",
|
509 |
+
"\"b\": \"Minimize loss on unseen samples\",\n",
|
510 |
+
"\"c\": \"Maximize loss on unseen samples\",\n",
|
511 |
+
"\"d\": \"Maximize loss on training set\"\n",
|
512 |
+
"},\n",
|
513 |
+
"\"correct\": \"b\"\n",
|
514 |
+
"},\n",
|
515 |
+
"\"5\": {\n",
|
516 |
+
"\"mcq\": \"What is the branch of theoretical computer science that deals with the computational analysis of machine learning algorithms?\",\n",
|
517 |
+
"\"options\": {\n",
|
518 |
+
"\"a\": \"Artificial Intelligence Theory\",\n",
|
519 |
+
"\"b\": \"Data Mining Theory\",\n",
|
520 |
+
"\"c\": \"Computational Learning Theory\",\n",
|
521 |
+
"\"d\": \"Statistical Learning Theory\"\n",
|
522 |
+
"},\n",
|
523 |
+
"\"correct\": \"c\"\n",
|
524 |
+
"}\n",
|
525 |
+
"}\n",
|
526 |
+
"\n",
|
527 |
+
"Check from an expert English Writer of the above quiz:\n",
|
528 |
+
"\u001b[0m\n",
|
529 |
+
"\n",
|
530 |
+
"\u001b[1m> Finished chain.\u001b[0m\n",
|
531 |
+
"\n",
|
532 |
+
"\u001b[1m> Finished chain.\u001b[0m\n"
|
533 |
+
]
|
534 |
+
}
|
535 |
+
],
|
536 |
+
"source": [
|
537 |
+
"#https://python.langchain.com/docs/modules/model_io/llms/token_usage_tracking\n",
|
538 |
+
"\n",
|
539 |
+
"#How to setup Token Usage Tracking in LangChain\n",
|
540 |
+
"with get_openai_callback() as cb:\n",
|
541 |
+
" response=generate_evaluate_chain(\n",
|
542 |
+
" {\n",
|
543 |
+
" \"text\": TEXT,\n",
|
544 |
+
" \"number\": NUMBER,\n",
|
545 |
+
" \"subject\":SUBJECT,\n",
|
546 |
+
" \"tone\": TONE,\n",
|
547 |
+
" \"response_json\": json.dumps(RESPONSE_JSON)\n",
|
548 |
+
" }\n",
|
549 |
+
" )"
|
550 |
+
]
|
551 |
+
},
|
552 |
+
{
|
553 |
+
"cell_type": "code",
|
554 |
+
"execution_count": 42,
|
555 |
+
"metadata": {},
|
556 |
+
"outputs": [
|
557 |
+
{
|
558 |
+
"name": "stdout",
|
559 |
+
"output_type": "stream",
|
560 |
+
"text": [
|
561 |
+
"Total Tokens:3670\n",
|
562 |
+
"Prompt Tokens:3257\n",
|
563 |
+
"Completion Tokens:413\n",
|
564 |
+
"Total Cost:0.005711500000000001\n"
|
565 |
+
]
|
566 |
+
}
|
567 |
+
],
|
568 |
+
"source": [
|
569 |
+
"print(f\"Total Tokens:{cb.total_tokens}\")\n",
|
570 |
+
"print(f\"Prompt Tokens:{cb.prompt_tokens}\")\n",
|
571 |
+
"print(f\"Completion Tokens:{cb.completion_tokens}\")\n",
|
572 |
+
"print(f\"Total Cost:{cb.total_cost}\")"
|
573 |
+
]
|
574 |
+
},
|
575 |
+
{
|
576 |
+
"cell_type": "code",
|
577 |
+
"execution_count": 43,
|
578 |
+
"metadata": {},
|
579 |
+
"outputs": [
|
580 |
+
{
|
581 |
+
"data": {
|
582 |
+
"text/plain": [
|
583 |
+
"{'text': 'The term machine learning was coined in 1959 by Arthur Samuel, an IBM employee and pioneer in the field of computer gaming and artificial intelligence.[9][10] The synonym self-teaching computers was also used in this time period.[11][12]\\n\\nAlthough the earliest machine learning model was introduced in the 1950s when Arthur Samuel invented a program that calculated the winning chance in checkers for each side, the history of machine learning roots back to decades of human desire and effort to study human cognitive processes.[13] In 1949, Canadian psychologist Donald Hebb published the book The Organization of Behavior, in which he introduced a theoretical neural structure formed by certain interactions among nerve cells.[14] Hebb\\'s model of neurons interacting with one another set a groundwork for how AIs and machine learning algorithms work under nodes, or artificial neurons used by computers to communicate data.[13] Other researchers who have studied human cognitive systems contributed to the modern machine learning technologies as well, including logician Walter Pitts and Warren McCulloch, who proposed the early mathematical models of neural networks to come up with algorithms that mirror human thought processes.[13]\\n\\nBy the early 1960s an experimental \"learning machine\" with punched tape memory, called Cybertron, had been developed by Raytheon Company to analyze sonar signals, electrocardiograms, and speech patterns using rudimentary reinforcement learning. It was repetitively \"trained\" by a human operator/teacher to recognize patterns and equipped with a \"goof\" button to cause it to re-evaluate incorrect decisions.[15] A representative book on research into machine learning during the 1960s was Nilsson\\'s book on Learning Machines, dealing mostly with machine learning for pattern classification.[16] Interest related to pattern recognition continued into the 1970s, as described by Duda and Hart in 1973.[17] In 1981 a report was given on using teaching strategies so that an artificial neural network learns to recognize 40 characters (26 letters, 10 digits, and 4 special symbols) from a computer terminal.[18]\\n\\nTom M. Mitchell provided a widely quoted, more formal definition of the algorithms studied in the machine learning field: \"A computer program is said to learn from experience E with respect to some class of tasks T and performance measure P if its performance at tasks in T, as measured by P, improves with experience E.\"[19] This definition of the tasks in which machine learning is concerned offers a fundamentally operational definition rather than defining the field in cognitive terms. This follows Alan Turing\\'s proposal in his paper \"Computing Machinery and Intelligence\", in which the question \"Can machines think?\" is replaced with the question \"Can machines do what we (as thinking entities) can do?\".[20]\\n\\nModern-day machine learning has two objectives. One is to classify data based on models which have been developed; the other purpose is to make predictions for future outcomes based on these models. A hypothetical algorithm specific to classifying data may use computer vision of moles coupled with supervised learning in order to train it to classify the cancerous moles. A machine learning algorithm for stock trading may inform the trader of future potential predictions.[21]\\n\\nRelationships to other fields\\nArtificial intelligence\\n\\nMachine learning as subfield of AI[22]\\nAs a scientific endeavor, machine learning grew out of the quest for artificial intelligence (AI). In the early days of AI as an academic discipline, some researchers were interested in having machines learn from data. They attempted to approach the problem with various symbolic methods, as well as what were then termed \"neural networks\"; these were mostly perceptrons and other models that were later found to be reinventions of the generalized linear models of statistics.[23] Probabilistic reasoning was also employed, especially in automated medical diagnosis.[24]: 488 \\n\\nHowever, an increasing emphasis on the logical, knowledge-based approach caused a rift between AI and machine learning. Probabilistic systems were plagued by theoretical and practical problems of data acquisition and representation.[24]: 488  By 1980, expert systems had come to dominate AI, and statistics was out of favor.[25] Work on symbolic/knowledge-based learning did continue within AI, leading to inductive logic programming(ILP), but the more statistical line of research was now outside the field of AI proper, in pattern recognition and information retrieval.[24]: 708–710, 755  Neural networks research had been abandoned by AI and computer science around the same time. This line, too, was continued outside the AI/CS field, as \"connectionism\", by researchers from other disciplines including Hopfield, Rumelhart, and Hinton. Their main success came in the mid-1980s with the reinvention of backpropagation.[24]: 25 \\n\\nMachine learning (ML), reorganized and recognized as its own field, started to flourish in the 1990s. The field changed its goal from achieving artificial intelligence to tackling solvable problems of a practical nature. It shifted focus away from the symbolic approaches it had inherited from AI, and toward methods and models borrowed from statistics, fuzzy logic, and probability theory.[25]\\n\\nData compression\\nThis section is an excerpt from Data compression § Machine learning.[edit]\\nThere is a close connection between machine learning and compression. A system that predicts the posterior probabilities of a sequence given its entire history can be used for optimal data compression (by using arithmetic coding on the output distribution). Conversely, an optimal compressor can be used for prediction (by finding the symbol that compresses best, given the previous history). This equivalence has been used as a justification for using data compression as a benchmark for \"general intelligence\".[26][27][28]\\n\\nAn alternative view can show compression algorithms implicitly map strings into implicit feature space vectors, and compression-based similarity measures compute similarity within these feature spaces. For each compressor C(.) we define an associated vector space ℵ, such that C(.) maps an input string x, corresponding to the vector norm ||~x||. An exhaustive examination of the feature spaces underlying all compression algorithms is precluded by space; instead, feature vectors chooses to examine three representative lossless compression methods, LZW, LZ77, and PPM.[29]\\n\\nAccording to AIXI theory, a connection more directly explained in Hutter Prize, the best possible compression of x is the smallest possible software that generates x. For example, in that model, a zip file\\'s compressed size includes both the zip file and the unzipping software, since you can not unzip it without both, but there may be an even smaller combined form.\\n\\nExamples of AI-powered audio/video compression software include VP9, NVIDIA Maxine, AIVC, AccMPEG.[30] Examples of software that can perform AI-powered image compression include OpenCV, TensorFlow, MATLAB\\'s Image Processing Toolbox (IPT) and High-Fidelity Generative Image Compression.[31]\\n\\nIn unsupervised machine learning, k-means clustering can be utilized to compress data by grouping similar data points into clusters. This technique simplifies handling extensive datasets that lack predefined labels and finds widespread use in fields such as image compression.[32] Large language models are also capable of lossless data compression.[33]\\nData mining\\nMachine learning and data mining often employ the same methods and overlap significantly, but while machine learning focuses on prediction, based on known properties learned from the training data, data mining focuses on the discovery of (previously) unknown properties in the data (this is the analysis step of knowledge discovery in databases). Data mining uses many machine learning methods, but with different goals; on the other hand, machine learning also employs data mining methods as \"unsupervised learning\" or as a preprocessing step to improve learner accuracy. Much of the confusion between these two research communities (which do often have separate conferences and separate journals, ECML PKDD being a major exception) comes from the basic assumptions they work with: in machine learning, performance is usually evaluated with respect to the ability to reproduce known knowledge, while in knowledge discovery and data mining (KDD) the key task is the discovery of previously unknown knowledge. Evaluated with respect to known knowledge, an uninformed (unsupervised) method will easily be outperformed by other supervised methods, while in a typical KDD task, supervised methods cannot be used due to the unavailability of training data.\\n\\nMachine learning also has intimate ties to optimization: many learning problems are formulated as minimization of some loss function on a training set of examples. Loss functions express the discrepancy between the predictions of the model being trained and the actual problem instances (for example, in classification, one wants to assign a label to instances, and models are trained to correctly predict the pre-assigned labels of a set of examples).[34]\\n\\nGeneralization\\nThe difference between optimization and machine learning arises from the goal of generalization: while optimization algorithms can minimize the loss on a training set, machine learning is concerned with minimizing the loss on unseen samples. Characterizing the generalization of various learning algorithms is an active topic of current research, especially for deep learning algorithms.\\n\\nStatistics\\nMachine learning and statistics are closely related fields in terms of methods, but distinct in their principal goal: statistics draws population inferences from a sample, while machine learning finds generalizable predictive patterns.[35] According to Michael I. Jordan, the ideas of machine learning, from methodological principles to theoretical tools, have had a long pre-history in statistics.[36] He also suggested the term data science as a placeholder to call the overall field.[36]\\n\\nConventional statistical analyses require the a priori selection of a model most suitable for the study data set. In addition, only significant or theoretically relevant variables based on previous experience are included for analysis. In contrast, machine learning is not built on a pre-structured model; rather, the data shape the model by detecting underlying patterns. The more variables (input) used to train the model, the more accurate the ultimate model will be.[37]\\n\\nLeo Breiman distinguished two statistical modeling paradigms: data model and algorithmic model,[38] wherein \"algorithmic model\" means more or less the machine learning algorithms like Random Forest.\\n\\nSome statisticians have adopted methods from machine learning, leading to a combined field that they call statistical learning.[39]\\n\\nStatistical physics\\nAnalytical and computational techniques derived from deep-rooted physics of disordered systems can be extended to large-scale problems, including machine learning, e.g., to analyze the weight space of deep neural networks.[40] Statistical physics is thus finding applications in the area of medical diagnostics.[41]\\n\\nTheory\\nMain articles: Computational learning theory and Statistical learning theory\\nA core objective of a learner is to generalize from its experience.[6][42] Generalization in this context is the ability of a learning machine to perform accurately on new, unseen examples/tasks after having experienced a learning data set. The training examples come from some generally unknown probability distribution (considered representative of the space of occurrences) and the learner has to build a general model about this space that enables it to produce sufficiently accurate predictions in new cases.\\n\\nThe computational analysis of machine learning algorithms and their performance is a branch of theoretical computer science known as computational learning theory via the Probably Approximately Correct Learning (PAC) model. Because training sets are finite and the future is uncertain, learning theory usually does not yield guarantees of the performance of algorithms. Instead, probabilistic bounds on the performance are quite common. The bias–variance decomposition is one way to quantify generalization error.\\n\\nFor the best performance in the context of generalization, the complexity of the hypothesis should match the complexity of the function underlying the data. If the hypothesis is less complex than the function, then the model has under fitted the data. If the complexity of the model is increased in response, then the training error decreases. But if the hypothesis is too complex, then the model is subject to overfitting and generalization will be poorer.[43]\\n\\nIn addition to performance bounds, learning theorists study the time complexity and feasibility of learning. In computational learning theory, a computation is considered feasible if it can be done in polynomial time. There are two kinds of time complexity results: Positive results show that a certain class of functions can be learned in polynomial time. Negative results show that certain classes cannot be learned in polynomial time.\\n\\n',\n",
|
584 |
+
" 'number': 5,\n",
|
585 |
+
" 'subject': 'Machine learning',\n",
|
586 |
+
" 'tone': 'simple',\n",
|
587 |
+
" 'response_json': '{\"1\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"2\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}, \"3\": {\"mcq\": \"multiple choice question\", \"options\": {\"a\": \"choice here\", \"b\": \"choice here\", \"c\": \"choice here\", \"d\": \"choice here\"}, \"correct\": \"correct answer\"}}',\n",
|
588 |
+
" 'quiz': '{\\n\"1\": {\\n\"mcq\": \"What term was coined in 1959 by Arthur Samuel to describe the field of computer gaming and artificial intelligence?\",\\n\"options\": {\\n\"a\": \"Artificial Intelligence\",\\n\"b\": \"Machine Learning\",\\n\"c\": \"Data Mining\",\\n\"d\": \"Neural Networks\"\\n},\\n\"correct\": \"b\"\\n},\\n\"2\": {\\n\"mcq\": \"What was the name of the \\'learning machine\\' with punched tape memory developed by Raytheon Company in the early 1960s?\",\\n\"options\": {\\n\"a\": \"Cyberdyne\",\\n\"b\": \"Cybernet\",\\n\"c\": \"Cybertron\",\\n\"d\": \"Cyberlink\"\\n},\\n\"correct\": \"c\"\\n},\\n\"3\": {\\n\"mcq\": \"Which field focuses on population inferences from a sample, while the other finds generalizable predictive patterns?\",\\n\"options\": {\\n\"a\": \"Machine Learning\",\\n\"b\": \"Data Mining\",\\n\"c\": \"Statistics\",\\n\"d\": \"Artificial Intelligence\"\\n},\\n\"correct\": \"c\"\\n},\\n\"4\": {\\n\"mcq\": \"What is the main goal of machine learning in terms of generalization?\",\\n\"options\": {\\n\"a\": \"Minimize loss on training set\",\\n\"b\": \"Minimize loss on unseen samples\",\\n\"c\": \"Maximize loss on unseen samples\",\\n\"d\": \"Maximize loss on training set\"\\n},\\n\"correct\": \"b\"\\n},\\n\"5\": {\\n\"mcq\": \"What is the branch of theoretical computer science that deals with the computational analysis of machine learning algorithms?\",\\n\"options\": {\\n\"a\": \"Artificial Intelligence Theory\",\\n\"b\": \"Data Mining Theory\",\\n\"c\": \"Computational Learning Theory\",\\n\"d\": \"Statistical Learning Theory\"\\n},\\n\"correct\": \"c\"\\n}\\n}',\n",
|
589 |
+
" 'review': 'The complexity of the quiz questions is suitable for machine learning students. The questions cover key concepts in the field and require a good understanding of the terminology and principles. No changes are needed as the tone and difficulty level are appropriate for the target audience.'}"
|
590 |
+
]
|
591 |
+
},
|
592 |
+
"execution_count": 43,
|
593 |
+
"metadata": {},
|
594 |
+
"output_type": "execute_result"
|
595 |
+
}
|
596 |
+
],
|
597 |
+
"source": [
|
598 |
+
"response"
|
599 |
+
]
|
600 |
+
},
|
601 |
+
{
|
602 |
+
"cell_type": "code",
|
603 |
+
"execution_count": 45,
|
604 |
+
"metadata": {},
|
605 |
+
"outputs": [],
|
606 |
+
"source": [
|
607 |
+
"quiz=response.get(\"quiz\")"
|
608 |
+
]
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"cell_type": "code",
|
612 |
+
"execution_count": 50,
|
613 |
+
"metadata": {},
|
614 |
+
"outputs": [],
|
615 |
+
"source": [
|
616 |
+
"quiz=json.loads(quiz)"
|
617 |
+
]
|
618 |
+
},
|
619 |
+
{
|
620 |
+
"cell_type": "code",
|
621 |
+
"execution_count": 51,
|
622 |
+
"metadata": {},
|
623 |
+
"outputs": [],
|
624 |
+
"source": [
|
625 |
+
"quiz_table_data = []\n",
|
626 |
+
"for key, value in quiz.items():\n",
|
627 |
+
" mcq = value[\"mcq\"]\n",
|
628 |
+
" options = \" | \".join(\n",
|
629 |
+
" [\n",
|
630 |
+
" f\"{option}: {option_value}\"\n",
|
631 |
+
" for option, option_value in value[\"options\"].items()\n",
|
632 |
+
" ]\n",
|
633 |
+
" )\n",
|
634 |
+
" correct = value[\"correct\"]\n",
|
635 |
+
" quiz_table_data.append({\"MCQ\": mcq, \"Choices\": options, \"Correct\": correct})"
|
636 |
+
]
|
637 |
+
},
|
638 |
+
{
|
639 |
+
"cell_type": "code",
|
640 |
+
"execution_count": 52,
|
641 |
+
"metadata": {},
|
642 |
+
"outputs": [
|
643 |
+
{
|
644 |
+
"data": {
|
645 |
+
"text/plain": [
|
646 |
+
"[{'MCQ': 'What term was coined in 1959 by Arthur Samuel to describe the field of computer gaming and artificial intelligence?',\n",
|
647 |
+
" 'Choices': 'a: Artificial Intelligence | b: Machine Learning | c: Data Mining | d: Neural Networks',\n",
|
648 |
+
" 'Correct': 'b'},\n",
|
649 |
+
" {'MCQ': \"What was the name of the 'learning machine' with punched tape memory developed by Raytheon Company in the early 1960s?\",\n",
|
650 |
+
" 'Choices': 'a: Cyberdyne | b: Cybernet | c: Cybertron | d: Cyberlink',\n",
|
651 |
+
" 'Correct': 'c'},\n",
|
652 |
+
" {'MCQ': 'Which field focuses on population inferences from a sample, while the other finds generalizable predictive patterns?',\n",
|
653 |
+
" 'Choices': 'a: Machine Learning | b: Data Mining | c: Statistics | d: Artificial Intelligence',\n",
|
654 |
+
" 'Correct': 'c'},\n",
|
655 |
+
" {'MCQ': 'What is the main goal of machine learning in terms of generalization?',\n",
|
656 |
+
" 'Choices': 'a: Minimize loss on training set | b: Minimize loss on unseen samples | c: Maximize loss on unseen samples | d: Maximize loss on training set',\n",
|
657 |
+
" 'Correct': 'b'},\n",
|
658 |
+
" {'MCQ': 'What is the branch of theoretical computer science that deals with the computational analysis of machine learning algorithms?',\n",
|
659 |
+
" 'Choices': 'a: Artificial Intelligence Theory | b: Data Mining Theory | c: Computational Learning Theory | d: Statistical Learning Theory',\n",
|
660 |
+
" 'Correct': 'c'}]"
|
661 |
+
]
|
662 |
+
},
|
663 |
+
"execution_count": 52,
|
664 |
+
"metadata": {},
|
665 |
+
"output_type": "execute_result"
|
666 |
+
}
|
667 |
+
],
|
668 |
+
"source": [
|
669 |
+
"quiz_table_data "
|
670 |
+
]
|
671 |
+
},
|
672 |
+
{
|
673 |
+
"cell_type": "code",
|
674 |
+
"execution_count": 53,
|
675 |
+
"metadata": {},
|
676 |
+
"outputs": [],
|
677 |
+
"source": [
|
678 |
+
"quiz=pd.DataFrame(quiz_table_data)"
|
679 |
+
]
|
680 |
+
},
|
681 |
+
{
|
682 |
+
"cell_type": "code",
|
683 |
+
"execution_count": 54,
|
684 |
+
"metadata": {},
|
685 |
+
"outputs": [],
|
686 |
+
"source": [
|
687 |
+
"quiz.to_csv(\"machinelearning.csv\",index=False)"
|
688 |
+
]
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"cell_type": "code",
|
692 |
+
"execution_count": 55,
|
693 |
+
"metadata": {},
|
694 |
+
"outputs": [
|
695 |
+
{
|
696 |
+
"data": {
|
697 |
+
"text/plain": [
|
698 |
+
"'03_09_2024_11_59_14'"
|
699 |
+
]
|
700 |
+
},
|
701 |
+
"execution_count": 55,
|
702 |
+
"metadata": {},
|
703 |
+
"output_type": "execute_result"
|
704 |
+
}
|
705 |
+
],
|
706 |
+
"source": [
|
707 |
+
"from datetime import datetime\n",
|
708 |
+
"datetime.now().strftime('%m_%d_%Y_%H_%M_%S')"
|
709 |
+
]
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"cell_type": "code",
|
713 |
+
"execution_count": 2,
|
714 |
+
"metadata": {},
|
715 |
+
"outputs": [
|
716 |
+
{
|
717 |
+
"data": {
|
718 |
+
"text/plain": [
|
719 |
+
"'03_11_2024_09_28_03'"
|
720 |
+
]
|
721 |
+
},
|
722 |
+
"execution_count": 2,
|
723 |
+
"metadata": {},
|
724 |
+
"output_type": "execute_result"
|
725 |
+
}
|
726 |
+
],
|
727 |
+
"source": [
|
728 |
+
"from datetime import datetime\n",
|
729 |
+
"datetime.now().strftime('%m_%d_%Y_%H_%M_%S')"
|
730 |
+
]
|
731 |
+
},
|
732 |
+
{
|
733 |
+
"cell_type": "code",
|
734 |
+
"execution_count": null,
|
735 |
+
"metadata": {},
|
736 |
+
"outputs": [],
|
737 |
+
"source": []
|
738 |
+
}
|
739 |
+
],
|
740 |
+
"metadata": {
|
741 |
+
"kernelspec": {
|
742 |
+
"display_name": "Python 3",
|
743 |
+
"language": "python",
|
744 |
+
"name": "python3"
|
745 |
+
},
|
746 |
+
"language_info": {
|
747 |
+
"codemirror_mode": {
|
748 |
+
"name": "ipython",
|
749 |
+
"version": 3
|
750 |
+
},
|
751 |
+
"file_extension": ".py",
|
752 |
+
"mimetype": "text/x-python",
|
753 |
+
"name": "python",
|
754 |
+
"nbconvert_exporter": "python",
|
755 |
+
"pygments_lexer": "ipython3",
|
756 |
+
"version": "3.12.2"
|
757 |
+
}
|
758 |
+
},
|
759 |
+
"nbformat": 4,
|
760 |
+
"nbformat_minor": 2
|
761 |
+
}
|
logs/03_11_2024_09_36_10.log
ADDED
File without changes
|
logs/03_11_2024_09_41_37.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[2024-03-11 09:41:37,455] 3 root - INFO - hi, i am going to start my excution...
|
logs/03_11_2024_11_53_51.log
ADDED
File without changes
|
logs/03_11_2024_12_29_49.log
ADDED
File without changes
|
logs/03_11_2024_12_31_09.log
ADDED
File without changes
|
logs/03_11_2024_12_32_21.log
ADDED
File without changes
|
logs/03_11_2024_12_32_57.log
ADDED
File without changes
|
logs/03_11_2024_12_33_22.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[2024-03-11 12:35:23,011] 1026 httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 401 Unauthorized"
|
logs/03_11_2024_12_37_49.log
ADDED
File without changes
|
logs/03_11_2024_12_38_02.log
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[2024-03-11 12:38:39,834] 1026 httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
2 |
+
[2024-03-11 12:38:41,387] 1026 httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
|
mcqgenrator.egg-info/PKG-INFO
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: mcqgenrator
|
3 |
+
Version: 0.0.1
|
4 |
+
Author: Manikandan-Alagu
|
5 |
+
Author-email: [email protected]
|
6 |
+
Requires-Dist: openai
|
7 |
+
Requires-Dist: langchain
|
8 |
+
Requires-Dist: streamlit
|
9 |
+
Requires-Dist: python-dotenv
|
10 |
+
Requires-Dist: PyPDF2
|
mcqgenrator.egg-info/SOURCES.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
README.md
|
2 |
+
setup.py
|
3 |
+
mcqgenrator.egg-info/PKG-INFO
|
4 |
+
mcqgenrator.egg-info/SOURCES.txt
|
5 |
+
mcqgenrator.egg-info/dependency_links.txt
|
6 |
+
mcqgenrator.egg-info/requires.txt
|
7 |
+
mcqgenrator.egg-info/top_level.txt
|
8 |
+
src/__init__.py
|
9 |
+
src/mcqgenerator/__init__.py
|
10 |
+
src/mcqgenerator/logger.py
|
11 |
+
src/mcqgenerator/mcqgenerator.py
|
12 |
+
src/mcqgenerator/utilis.py
|
mcqgenrator.egg-info/dependency_links.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
mcqgenrator.egg-info/requires.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
langchain
|
3 |
+
streamlit
|
4 |
+
python-dotenv
|
5 |
+
PyPDF2
|
mcqgenrator.egg-info/top_level.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
src
|
src/__init__.py
ADDED
File without changes
|
src/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (152 Bytes). View file
|
|
src/mcqgenerator/__init__.py
ADDED
File without changes
|
src/mcqgenerator/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (165 Bytes). View file
|
|
src/mcqgenerator/__pycache__/logger.cpython-312.pyc
ADDED
Binary file (911 Bytes). View file
|
|
src/mcqgenerator/__pycache__/mcqgenerator.cpython-312.pyc
ADDED
Binary file (2.36 kB). View file
|
|
src/mcqgenerator/__pycache__/utilis.cpython-312.pyc
ADDED
Binary file (2.13 kB). View file
|
|
src/mcqgenerator/logger.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
|
6 |
+
LOG_FILE=f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
|
7 |
+
|
8 |
+
log_path=os.path.join(os.getcwd(),"logs")
|
9 |
+
|
10 |
+
os.makedirs(log_path,exist_ok=True)
|
11 |
+
|
12 |
+
|
13 |
+
LOG_FILEPATH=os.path.join(log_path,LOG_FILE)
|
14 |
+
|
15 |
+
|
16 |
+
logging.basicConfig(level=logging.INFO,
|
17 |
+
filename=LOG_FILEPATH,
|
18 |
+
format="[%(asctime)s] %(lineno)d %(name)s - %(levelname)s - %(message)s"
|
19 |
+
)
|
src/mcqgenerator/mcqgenerator.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import pandas as pd
|
4 |
+
import traceback
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from src.mcqgenerator.utilis import read_file,get_table_data
|
7 |
+
from src.mcqgenerator.logger import logging
|
8 |
+
|
9 |
+
from langchain_openai import ChatOpenAI
|
10 |
+
from langchain.prompts import PromptTemplate
|
11 |
+
from langchain.chains import LLMChain
|
12 |
+
from langchain.chains import SequentialChain
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
key=os.getenv("OPENAI_API_KEY")
|
17 |
+
|
18 |
+
llm=ChatOpenAI(openai_api_key=key,model_name="gpt-3.5-turbo", temperature=0.3)
|
19 |
+
|
20 |
+
TEMPLATE="""
|
21 |
+
Text:{text}
|
22 |
+
You are an expert MCQ maker. Given the above text, it is your job to \
|
23 |
+
create a quiz of {number} multiple choice questions for {subject} students in {tone} tone.
|
24 |
+
Make sure the questions are not repeated and check all the questions to be conforming the text as well.
|
25 |
+
Make sure to format your response like RESPONSE_JSON below and use it as a guide. \
|
26 |
+
Ensure to make {number} MCQs
|
27 |
+
### RESPONSE_JSON
|
28 |
+
{response_json}
|
29 |
+
|
30 |
+
"""
|
31 |
+
|
32 |
+
quiz_generation_prompt = PromptTemplate(
|
33 |
+
input_variables=["text", "number", "subject", "tone", "response_json"],
|
34 |
+
template=TEMPLATE
|
35 |
+
)
|
36 |
+
|
37 |
+
quiz_chain=LLMChain(llm=llm, prompt=quiz_generation_prompt, output_key="quiz", verbose=True)
|
38 |
+
|
39 |
+
TEMPLATE2="""
|
40 |
+
You are an expert english grammarian and writer. Given a Multiple Choice Quiz for {subject} students.\
|
41 |
+
You need to evaluate the complexity of the question and give a complete analysis of the quiz. Only use at max 50 words for complexity analysis.
|
42 |
+
if the quiz is not at per with the cognitive and analytical abilities of the students,\
|
43 |
+
update the quiz questions which needs to be changed and change the tone such that it perfectly fits the student abilities
|
44 |
+
Quiz_MCQs:
|
45 |
+
{quiz}
|
46 |
+
|
47 |
+
Check from an expert English Writer of the above quiz:
|
48 |
+
"""
|
49 |
+
quiz_evaluation_prompt=PromptTemplate(input_variables=["subject", "quiz"], template=TEMPLATE2)
|
50 |
+
|
51 |
+
review_chain=LLMChain(llm=llm, prompt=quiz_evaluation_prompt, output_key="review", verbose=True)
|
52 |
+
|
53 |
+
generate_evaluate_chain=SequentialChain(chains=[quiz_chain, review_chain], input_variables=["text", "number", "subject", "tone", "response_json"],
|
54 |
+
output_variables=["quiz", "review"], verbose=True,)
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
src/mcqgenerator/utilis.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import PyPDF2
|
3 |
+
import traceback
|
4 |
+
|
5 |
+
def read_file(file):
|
6 |
+
if file.name.endswith(".pdf"):
|
7 |
+
try:
|
8 |
+
pdf_reader=PyPDF2.PdfFileReader(file)
|
9 |
+
text=""
|
10 |
+
for page in pdf_reader.pages:
|
11 |
+
text+=page.extract_text()
|
12 |
+
return text
|
13 |
+
|
14 |
+
except Exception as e:
|
15 |
+
raise Exception("error reading the PDF file")
|
16 |
+
|
17 |
+
elif file.name.endswith(".txt"):
|
18 |
+
return file.read().decode("utf-8")
|
19 |
+
|
20 |
+
else:
|
21 |
+
raise Exception(
|
22 |
+
"unsupported file format only pdf and text fiile supported"
|
23 |
+
)
|
24 |
+
|
25 |
+
def get_table_data(quiz_str):
|
26 |
+
try:
|
27 |
+
quiz_dict=json.loads(quiz_str)
|
28 |
+
quiz_table_data=[]
|
29 |
+
|
30 |
+
for key,value in quiz_dict.items():
|
31 |
+
mcq=value["mcq"]
|
32 |
+
options = " | ".join( [f"{option}: {option_value}"for option, option_value in value["options"].items()] )
|
33 |
+
correct = value["correct"]
|
34 |
+
quiz_table_data.append({"MCQ": mcq, "Choices": options, "Correct": correct})
|
35 |
+
|
36 |
+
return quiz_table_data
|
37 |
+
|
38 |
+
except Exception as e:
|
39 |
+
traceback.print_exception(type(e), e,e.__traceback__)
|