Lim0011 commited on
Commit
8d20eca
ยท
1 Parent(s): 1b5f3f9

rm extract

Browse files
.history/app_20250404175511.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from reactagent.environment import Environment
4
+ from reactagent.agents.agent_research import ResearchAgent
5
+ from reactagent.runner import create_parser
6
+ from reactagent import llm
7
+ from reactagent.users.user import User
8
+ import os
9
+ import json
10
+
11
+
12
+ # Global variables to store session state
13
+ env = None
14
+ agent = None
15
+ state_example = False
16
+ state_extract = False
17
+ state_generate = False
18
+ state_agent = False
19
+ state_complete = False
20
+ index_ex = "1"
21
+
22
+ example_text = [
23
+ "Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
24
+ "Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
25
+ ]
26
+
27
+
28
+ # Load example JSON file
29
+ def load_example_data():
30
+ with open("example/example_data.json", "r") as json_file:
31
+ example_data = json.load(json_file)
32
+
33
+ for idx in example_data.keys():
34
+ try:
35
+ file = example_data[idx]["code_init"]
36
+ with open(os.path.join("example", file), "r") as f:
37
+ example_data[idx]["code_init"] = f.read()
38
+ except FileNotFoundError:
39
+ print(f"File not found: {file}. Skipping key: {idx}")
40
+ try:
41
+ file = example_data[idx]["code_final"]
42
+ with open(os.path.join("example", file), "r") as f:
43
+ example_data[idx]["code_final"] = f.read()
44
+ except FileNotFoundError:
45
+ print(f"File not found: {file}. Skipping key: {idx}")
46
+ return example_data
47
+
48
+ example_data = load_example_data()
49
+
50
+ # Function to handle the selection of an example and populate the respective fields
51
+ def load_example(example_id):
52
+ global index_ex
53
+ index_ex = str(example_id)
54
+ example = example_data[index_ex]
55
+ paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
56
+ tasks = example['research_tasks']
57
+ gaps = example['research_gaps']
58
+ keywords = example['keywords']
59
+ recent_works = "\n".join(example['recent_works'])
60
+ return [paper_text, tasks, gaps, keywords, recent_works]
61
+
62
+ example_text = [load_example(1)[0], load_example(2)[0]]
63
+
64
+
65
+ # Function to handle example clicks
66
+ def load_example_and_set_index(paper_text_input):
67
+ global index_ex, state_example
68
+ state_example = True
69
+ index_ex = str(example_text.index(paper_text_input) + 1)
70
+ example = load_example(index_ex)
71
+
72
+ return example
73
+
74
+
75
+
76
+ ########## Phase 1 ##############
77
+
78
+ def extract_research_elements(paper_text):
79
+ global state_extract, index_ex, state_example
80
+ if not state_example or paper_text == "":
81
+ return "", "", "", ""
82
+ state_extract = True
83
+ if not paper_text.strip().startswith("Title:\t" + example_data[index_ex]["title"]):
84
+ print("Mismatch detected.")
85
+ print(paper_text)
86
+ return "", "", "", ""
87
+ example = example_data[index_ex]
88
+ tasks = example['research_tasks']
89
+ gaps = example['research_gaps']
90
+ keywords = example['keywords']
91
+ recent_works = "\n".join(example['recent_works'])
92
+ return tasks, gaps, keywords, recent_works
93
+
94
+
95
+ # Step 2: Generate Research Hypothesis and Experiment Plan
96
+ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
97
+ if (not state_example):
98
+ return "", "", "", ""
99
+ global state_generate, index_ex
100
+ state_generate = True
101
+ hypothesis = example_data[index_ex]['hypothesis']
102
+ experiment_plan = example_data[index_ex]['experiment_plan']
103
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
104
+
105
+ ########## Phase 2 & 3 ##############
106
+ def start_experiment_agent(hypothesis, plan):
107
+ if (not state_extract or not state_generate or not state_example):
108
+ return "", "", ""
109
+ global state_agent, step_index, state_complete
110
+ state_agent = True
111
+ step_index = 0
112
+ state_complete = False
113
+ # predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
114
+ return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
+
116
+ def submit_feedback(user_feedback, history, previous_response):
117
+ if (not state_extract or not state_generate or not state_agent or not state_example):
118
+ return "", "", ""
119
+ global step_index, state_complete
120
+ step_index += 1
121
+ msg = history
122
+ if step_index < len(process_steps):
123
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
124
+ response_info = process_steps[step_index]
125
+ response = info_to_message(response_info) # Convert dictionary to formatted string
126
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
127
+ step_index += 1
128
+ msg += response
129
+ else:
130
+ state_complete = True
131
+ response = "Agent Finished."
132
+
133
+ return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""
134
+
135
+ def load_phase_2_inputs(hypothesis, plan):
136
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
137
+
138
+
139
+
140
+ predefined_action_log = """
141
+ [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
142
+ [Action]: Inspect Script (train.py)
143
+ Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
144
+ Objective: Understand the training script, including data processing, [...]
145
+ [Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
146
+ [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
147
+ """
148
+
149
+
150
+ predefined_observation = """
151
+ Epoch [1/10],
152
+ Train MSE: 0.543,
153
+ Test MSE: 0.688
154
+ Epoch [2/10],
155
+ Train MSE: 0.242,
156
+ Test MSE: 0.493\n
157
+ """
158
+
159
+ # Initialize the global step_index and history
160
+ process_steps = [
161
+ {
162
+ "Action": "Inspect Script Lines (train.py)",
163
+ "Observation": (
164
+ "The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
165
+ "Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
166
+ "to calculate RMSE for different dimensions. Placeholder functions train_model and "
167
+ "predict exist without implementations."
168
+ ),
169
+ },
170
+ {
171
+ "Action": "Execute Script (train.py)",
172
+ "Observation": (
173
+ "The script executed successfully. Generated embeddings using the BERT model. Completed "
174
+ "the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
175
+ ),
176
+ },
177
+ {
178
+ "Action": "Edit Script (train.py)",
179
+ "Observation": (
180
+ "Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
181
+ "The edited train.py now has clearly defined functions"
182
+ "for data loading (load_data), model definition (build_model), "
183
+ "training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
184
+ ),
185
+ },
186
+ {
187
+ "Action": "Retrieve Model",
188
+ "Observation": "CNN and BiLSTM retrieved.",
189
+ },
190
+ {
191
+ "Action": "Execute Script (train.py)",
192
+ "Observation": (
193
+ "The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
194
+ "the decrease in loss indicates improved model performance."
195
+ )
196
+ },
197
+ {
198
+ "Action": "Evaluation",
199
+ "Observation": predefined_observation,
200
+ }
201
+ ]
202
+ def info_to_message(info):
203
+ msg = ""
204
+ for k, v in info.items():
205
+ if isinstance(v, dict):
206
+ tempv = v
207
+ v = ""
208
+ for k2, v2 in tempv.items():
209
+ v += f"{k2}:\n {v2}\n"
210
+ v = User.indent_text(v, 2)
211
+ msg += '-' * 64
212
+ msg += '\n'
213
+ msg += f"{k}:\n{v}\n"
214
+ return msg
215
+
216
+
217
+ def handle_example_click(example_index):
218
+ global index_ex
219
+ index_ex = example_index
220
+ return load_example(index_ex) # Simply return the text to display it in the textbox
221
+
222
+ # Gradio Interface
223
+ with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
+ gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
+ gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
+ gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
+
229
+
230
+
231
+ gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchersโ€™ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
232
+
233
+
234
+ gr.Markdown("## โœ… Click an example at bottom to start โฌ‡๏ธ")
235
+
236
+
237
+ # Use state variables to store generated hypothesis and experiment plan
238
+ hypothesis_state = gr.State("")
239
+ experiment_plan_state = gr.State("")
240
+
241
+ ########## Phase 1: Research Idea Generation Tab ##############
242
+ with gr.Tab("๐Ÿ’กStage 1: Research Idea Generation"):
243
+ gr.Markdown("### Extract Research Elements and Generate Research Ideas")
244
+ with gr.Row():
245
+ with gr.Column():
246
+ paper_text_input = gr.Textbox(value="", lines=10, label="๐Ÿ“‘ Research Paper Text", interactive=False)
247
+
248
+ # extract_button = gr.Button("๐Ÿ” Extract Research Elements")
249
+ with gr.Row():
250
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
251
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
252
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
253
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
254
+ with gr.Column():
255
+ with gr.Row(): # Move the button to the top
256
+ generate_button = gr.Button("โœ๏ธ Generate Research Hypothesis & Experiment Plan")
257
+ with gr.Group():
258
+ gr.Markdown("### ๐ŸŒŸ Research Idea")
259
+ with gr.Row():
260
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
261
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
262
+
263
+
264
+ # Step 1: Extract Research Elements
265
+ # extract_button.click(
266
+ # fn=extract_research_elements,
267
+ # inputs=paper_text_input,
268
+ # outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
269
+ # )
270
+
271
+ generate_button.click(
272
+ fn=generate_and_store,
273
+ inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
274
+ outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
275
+ )
276
+
277
+ gr.Examples(
278
+ examples=example_text,
279
+ inputs=[paper_text_input],
280
+ outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
281
+ fn=load_example_and_set_index,
282
+ run_on_click = True,
283
+ # label="โฌ‡๏ธ Click an example to load"
284
+ )
285
+
286
+
287
+
288
+
289
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
290
+ with gr.Tab("๐Ÿงช Stage 2 & Stage 3: Experiment implementation and execution"):
291
+ gr.Markdown("### Interact with the ExperimentAgent")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ with gr.Group():
296
+ gr.Markdown("### ๐ŸŒŸ Generated Research Idea")
297
+ with gr.Row():
298
+ idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
299
+ plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)
300
+
301
+ with gr.Column():
302
+ start_exp_agnet = gr.Button("โš™๏ธ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
303
+ with gr.Group():
304
+ gr.Markdown("### Implementation + Execution Log")
305
+ log = gr.Textbox(label="๐Ÿ“– Execution Log", lines=20, interactive=False)
306
+ code_display = gr.Code(label="๐Ÿง‘โ€๐Ÿ’ป Implementation", language="python", interactive=False)
307
+
308
+ with gr.Column():
309
+ response = gr.Textbox(label="๐Ÿค– ExperimentAgent Response", lines=30, interactive=False)
310
+ feedback = gr.Textbox(placeholder="N/A", label="๐Ÿง‘โ€๐Ÿ”ฌ User Feedback", lines=3, interactive=True)
311
+ submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
312
+
313
+ hypothesis_state.change(
314
+ fn=load_phase_2_inputs,
315
+ inputs=[hypothesis_state, experiment_plan_state],
316
+ outputs=[idea_input, plan_input, code_display]
317
+ )
318
+
319
+ # Start research agent
320
+ start_exp_agnet.click(
321
+ fn=start_experiment_agent,
322
+ inputs=[hypothesis_state, experiment_plan_state],
323
+ outputs=[code_display, log, response, feedback]
324
+ )
325
+
326
+ submit_button.click(
327
+ fn=submit_feedback,
328
+ inputs=[feedback, log, response],
329
+ outputs=[log, response, code_display, feedback]
330
+ )
331
+
332
+ # Test
333
+ if __name__ == "__main__":
334
+ step_index = 0
335
+ app.launch(share=True)
.history/app_20250404175512.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from reactagent.environment import Environment
4
+ from reactagent.agents.agent_research import ResearchAgent
5
+ from reactagent.runner import create_parser
6
+ from reactagent import llm
7
+ from reactagent.users.user import User
8
+ import os
9
+ import json
10
+
11
+
12
+ # Global variables to store session state
13
+ env = None
14
+ agent = None
15
+ state_example = False
16
+ state_extract = False
17
+ state_generate = False
18
+ state_agent = False
19
+ state_complete = False
20
+ index_ex = "1"
21
+
22
+ example_text = [
23
+ "Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
24
+ "Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
25
+ ]
26
+
27
+
28
+ # Load example JSON file
29
+ def load_example_data():
30
+ with open("example/example_data.json", "r") as json_file:
31
+ example_data = json.load(json_file)
32
+
33
+ for idx in example_data.keys():
34
+ try:
35
+ file = example_data[idx]["code_init"]
36
+ with open(os.path.join("example", file), "r") as f:
37
+ example_data[idx]["code_init"] = f.read()
38
+ except FileNotFoundError:
39
+ print(f"File not found: {file}. Skipping key: {idx}")
40
+ try:
41
+ file = example_data[idx]["code_final"]
42
+ with open(os.path.join("example", file), "r") as f:
43
+ example_data[idx]["code_final"] = f.read()
44
+ except FileNotFoundError:
45
+ print(f"File not found: {file}. Skipping key: {idx}")
46
+ return example_data
47
+
48
+ example_data = load_example_data()
49
+
50
+ # Function to handle the selection of an example and populate the respective fields
51
+ def load_example(example_id):
52
+ global index_ex
53
+ index_ex = str(example_id)
54
+ example = example_data[index_ex]
55
+ paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
56
+ tasks = example['research_tasks']
57
+ gaps = example['research_gaps']
58
+ keywords = example['keywords']
59
+ recent_works = "\n".join(example['recent_works'])
60
+ return [paper_text, tasks, gaps, keywords, recent_works]
61
+
62
+ example_text = [load_example(1)[0], load_example(2)[0]]
63
+
64
+
65
+ # Function to handle example clicks
66
+ def load_example_and_set_index(paper_text_input):
67
+ global index_ex, state_example
68
+ state_example = True
69
+ index_ex = str(example_text.index(paper_text_input) + 1)
70
+ example = load_example(index_ex)
71
+
72
+ return example
73
+
74
+
75
+
76
+ ########## Phase 1 ##############
77
+
78
+ def extract_research_elements(paper_text):
79
+ global state_extract, index_ex, state_example
80
+ if not state_example or paper_text == "":
81
+ return "", "", "", ""
82
+ state_extract = True
83
+ if not paper_text.strip().startswith("Title:\t" + example_data[index_ex]["title"]):
84
+ print("Mismatch detected.")
85
+ print(paper_text)
86
+ return "", "", "", ""
87
+ example = example_data[index_ex]
88
+ tasks = example['research_tasks']
89
+ gaps = example['research_gaps']
90
+ keywords = example['keywords']
91
+ recent_works = "\n".join(example['recent_works'])
92
+ return tasks, gaps, keywords, recent_works
93
+
94
+
95
+ # Step 2: Generate Research Hypothesis and Experiment Plan
96
+ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
97
+ if (not state_example):
98
+ return "", "", "", ""
99
+ global state_generate, index_ex
100
+ state_generate = True
101
+ hypothesis = example_data[index_ex]['hypothesis']
102
+ experiment_plan = example_data[index_ex]['experiment_plan']
103
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
104
+
105
+ ########## Phase 2 & 3 ##############
106
+ def start_experiment_agent(hypothesis, plan):
107
+ if (not state_extract or not state_generate or not state_example):
108
+ return "", "", ""
109
+ global state_agent, step_index, state_complete
110
+ state_agent = True
111
+ step_index = 0
112
+ state_complete = False
113
+ # predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
114
+ return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
+
116
+ def submit_feedback(user_feedback, history, previous_response):
117
+ if (not state_extract or not state_generate or not state_agent or not state_example):
118
+ return "", "", ""
119
+ global step_index, state_complete
120
+ step_index += 1
121
+ msg = history
122
+ if step_index < len(process_steps):
123
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
124
+ response_info = process_steps[step_index]
125
+ response = info_to_message(response_info) # Convert dictionary to formatted string
126
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
127
+ step_index += 1
128
+ msg += response
129
+ else:
130
+ state_complete = True
131
+ response = "Agent Finished."
132
+
133
+ return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""
134
+
135
+ def load_phase_2_inputs(hypothesis, plan):
136
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
137
+
138
+
139
+
140
+ predefined_action_log = """
141
+ [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
142
+ [Action]: Inspect Script (train.py)
143
+ Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
144
+ Objective: Understand the training script, including data processing, [...]
145
+ [Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
146
+ [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
147
+ """
148
+
149
+
150
+ predefined_observation = """
151
+ Epoch [1/10],
152
+ Train MSE: 0.543,
153
+ Test MSE: 0.688
154
+ Epoch [2/10],
155
+ Train MSE: 0.242,
156
+ Test MSE: 0.493\n
157
+ """
158
+
159
+ # Initialize the global step_index and history
160
+ process_steps = [
161
+ {
162
+ "Action": "Inspect Script Lines (train.py)",
163
+ "Observation": (
164
+ "The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
165
+ "Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
166
+ "to calculate RMSE for different dimensions. Placeholder functions train_model and "
167
+ "predict exist without implementations."
168
+ ),
169
+ },
170
+ {
171
+ "Action": "Execute Script (train.py)",
172
+ "Observation": (
173
+ "The script executed successfully. Generated embeddings using the BERT model. Completed "
174
+ "the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
175
+ ),
176
+ },
177
+ {
178
+ "Action": "Edit Script (train.py)",
179
+ "Observation": (
180
+ "Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
181
+ "The edited train.py now has clearly defined functions"
182
+ "for data loading (load_data), model definition (build_model), "
183
+ "training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
184
+ ),
185
+ },
186
+ {
187
+ "Action": "Retrieve Model",
188
+ "Observation": "CNN and BiLSTM retrieved.",
189
+ },
190
+ {
191
+ "Action": "Execute Script (train.py)",
192
+ "Observation": (
193
+ "The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
194
+ "the decrease in loss indicates improved model performance."
195
+ )
196
+ },
197
+ {
198
+ "Action": "Evaluation",
199
+ "Observation": predefined_observation,
200
+ }
201
+ ]
202
+ def info_to_message(info):
203
+ msg = ""
204
+ for k, v in info.items():
205
+ if isinstance(v, dict):
206
+ tempv = v
207
+ v = ""
208
+ for k2, v2 in tempv.items():
209
+ v += f"{k2}:\n {v2}\n"
210
+ v = User.indent_text(v, 2)
211
+ msg += '-' * 64
212
+ msg += '\n'
213
+ msg += f"{k}:\n{v}\n"
214
+ return msg
215
+
216
+
217
+ def handle_example_click(example_index):
218
+ global index_ex
219
+ index_ex = example_index
220
+ return load_example(index_ex) # Simply return the text to display it in the textbox
221
+
222
+ # Gradio Interface
223
+ with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
+ gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
+ gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
+ gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
+
229
+
230
+
231
+ gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchersโ€™ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
232
+
233
+
234
+ gr.Markdown("## โœ… Click an example at bottom to start โฌ‡๏ธ")
235
+
236
+
237
+ # Use state variables to store generated hypothesis and experiment plan
238
+ hypothesis_state = gr.State("")
239
+ experiment_plan_state = gr.State("")
240
+
241
+ ########## Phase 1: Research Idea Generation Tab ##############
242
+ with gr.Tab("๐Ÿ’กStage 1: Research Idea Generation"):
243
+ gr.Markdown("### Extract Research Elements and Generate Research Ideas")
244
+ with gr.Row():
245
+ with gr.Column():
246
+ paper_text_input = gr.Textbox(value="", lines=10, label="๐Ÿ“‘ Research Paper Text", interactive=False)
247
+
248
+ # extract_button = gr.Button("๐Ÿ” Extract Research Elements")
249
+ with gr.Row():
250
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
251
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
252
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
253
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
254
+ with gr.Column():
255
+ with gr.Row(): # Move the button to the top
256
+ generate_button = gr.Button("โœ๏ธ Generate Research Hypothesis & Experiment Plan")
257
+ with gr.Group():
258
+ gr.Markdown("### ๐ŸŒŸ Research Idea")
259
+ with gr.Row():
260
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
261
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
262
+
263
+
264
+ # Step 1: Extract Research Elements
265
+ # extract_button.click(
266
+ # fn=extract_research_elements,
267
+ # inputs=paper_text_input,
268
+ # outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
269
+ # )
270
+
271
+ generate_button.click(
272
+ fn=generate_and_store,
273
+ inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
274
+ outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
275
+ )
276
+
277
+ gr.Examples(
278
+ examples=example_text,
279
+ inputs=[paper_text_input],
280
+ outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
281
+ fn=load_example_and_set_index,
282
+ run_on_click = True,
283
+ # label="โฌ‡๏ธ Click an example to load"
284
+ )
285
+
286
+
287
+
288
+
289
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
290
+ with gr.Tab("๐Ÿงช Stage 2 & Stage 3: Experiment implementation and execution"):
291
+ gr.Markdown("### Interact with the ExperimentAgent")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ with gr.Group():
296
+ gr.Markdown("### ๐ŸŒŸ Generated Research Idea")
297
+ with gr.Row():
298
+ idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
299
+ plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)
300
+
301
+ with gr.Column():
302
+ start_exp_agnet = gr.Button("โš™๏ธ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
303
+ with gr.Group():
304
+ gr.Markdown("### Implementation + Execution Log")
305
+ log = gr.Textbox(label="๐Ÿ“– Execution Log", lines=20, interactive=False)
306
+ code_display = gr.Code(label="๐Ÿง‘โ€๐Ÿ’ป Implementation", language="python", interactive=False)
307
+
308
+ with gr.Column():
309
+ response = gr.Textbox(label="๐Ÿค– ExperimentAgent Response", lines=30, interactive=False)
310
+ feedback = gr.Textbox(placeholder="N/A", label="๐Ÿง‘โ€๐Ÿ”ฌ User Feedback", lines=3, interactive=True)
311
+ submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
312
+
313
+ hypothesis_state.change(
314
+ fn=load_phase_2_inputs,
315
+ inputs=[hypothesis_state, experiment_plan_state],
316
+ outputs=[idea_input, plan_input, code_display]
317
+ )
318
+
319
+ # Start research agent
320
+ start_exp_agnet.click(
321
+ fn=start_experiment_agent,
322
+ inputs=[hypothesis_state, experiment_plan_state],
323
+ outputs=[code_display, log, response, feedback]
324
+ )
325
+
326
+ submit_button.click(
327
+ fn=submit_feedback,
328
+ inputs=[feedback, log, response],
329
+ outputs=[log, response, code_display, feedback]
330
+ )
331
+
332
+ # Test
333
+ if __name__ == "__main__":
334
+ step_index = 0
335
+ app.launch(share=True)
.history/app_20250404175513.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from reactagent.environment import Environment
4
+ from reactagent.agents.agent_research import ResearchAgent
5
+ from reactagent.runner import create_parser
6
+ from reactagent import llm
7
+ from reactagent.users.user import User
8
+ import os
9
+ import json
10
+
11
+
12
+ # Global variables to store session state
13
+ env = None
14
+ agent = None
15
+ state_example = False
16
+ state_extract = False
17
+ state_generate = False
18
+ state_agent = False
19
+ state_complete = False
20
+ index_ex = "1"
21
+
22
+ example_text = [
23
+ "Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
24
+ "Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
25
+ ]
26
+
27
+
28
+ # Load example JSON file
29
+ def load_example_data():
30
+ with open("example/example_data.json", "r") as json_file:
31
+ example_data = json.load(json_file)
32
+
33
+ for idx in example_data.keys():
34
+ try:
35
+ file = example_data[idx]["code_init"]
36
+ with open(os.path.join("example", file), "r") as f:
37
+ example_data[idx]["code_init"] = f.read()
38
+ except FileNotFoundError:
39
+ print(f"File not found: {file}. Skipping key: {idx}")
40
+ try:
41
+ file = example_data[idx]["code_final"]
42
+ with open(os.path.join("example", file), "r") as f:
43
+ example_data[idx]["code_final"] = f.read()
44
+ except FileNotFoundError:
45
+ print(f"File not found: {file}. Skipping key: {idx}")
46
+ return example_data
47
+
48
+ example_data = load_example_data()
49
+
50
+ # Function to handle the selection of an example and populate the respective fields
51
+ def load_example(example_id):
52
+ global index_ex
53
+ index_ex = str(example_id)
54
+ example = example_data[index_ex]
55
+ paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
56
+ tasks = example['research_tasks']
57
+ gaps = example['research_gaps']
58
+ keywords = example['keywords']
59
+ recent_works = "\n".join(example['recent_works'])
60
+ return [paper_text, tasks, gaps, keywords, recent_works]
61
+
62
+ example_text = [load_example(1)[0], load_example(2)[0]]
63
+
64
+
65
+ # Function to handle example clicks
66
+ def load_example_and_set_index(paper_text_input):
67
+ global index_ex, state_example
68
+ state_example = True
69
+ index_ex = str(example_text.index(paper_text_input) + 1)
70
+ example = load_example(index_ex)
71
+
72
+ return example
73
+
74
+
75
+
76
+ ########## Phase 1 ##############
77
+
78
+ def extract_research_elements(paper_text):
79
+ global state_extract, index_ex, state_example
80
+ if not state_example or paper_text == "":
81
+ return "", "", "", ""
82
+ state_extract = True
83
+ if not paper_text.strip().startswith("Title:\t" + example_data[index_ex]["title"]):
84
+ print("Mismatch detected.")
85
+ print(paper_text)
86
+ return "", "", "", ""
87
+ example = example_data[index_ex]
88
+ tasks = example['research_tasks']
89
+ gaps = example['research_gaps']
90
+ keywords = example['keywords']
91
+ recent_works = "\n".join(example['recent_works'])
92
+ return tasks, gaps, keywords, recent_works
93
+
94
+
95
+ # Step 2: Generate Research Hypothesis and Experiment Plan
96
+ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
97
+ if (not state_example):
98
+ return "", "", "", ""
99
+ global state_generate, index_ex
100
+ state_generate = True
101
+ hypothesis = example_data[index_ex]['hypothesis']
102
+ experiment_plan = example_data[index_ex]['experiment_plan']
103
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
104
+
105
+ ########## Phase 2 & 3 ##############
106
+ def start_experiment_agent(hypothesis, plan):
107
+ if (not state_extract or not state_generate or not state_example):
108
+ return "", "", ""
109
+ global state_agent, step_index, state_complete
110
+ state_agent = True
111
+ step_index = 0
112
+ state_complete = False
113
+ # predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
114
+ return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
+
116
+ def submit_feedback(user_feedback, history, previous_response):
117
+ if (not state_extract or not state_generate or not state_agent or not state_example):
118
+ return "", "", ""
119
+ global step_index, state_complete
120
+ step_index += 1
121
+ msg = history
122
+ if step_index < len(process_steps):
123
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
124
+ response_info = process_steps[step_index]
125
+ response = info_to_message(response_info) # Convert dictionary to formatted string
126
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
127
+ step_index += 1
128
+ msg += response
129
+ else:
130
+ state_complete = True
131
+ response = "Agent Finished."
132
+
133
+ return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""
134
+
135
+ def load_phase_2_inputs(hypothesis, plan):
136
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
137
+
138
+
139
+
140
+ predefined_action_log = """
141
+ [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
142
+ [Action]: Inspect Script (train.py)
143
+ Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
144
+ Objective: Understand the training script, including data processing, [...]
145
+ [Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
146
+ [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
147
+ """
148
+
149
+
150
+ predefined_observation = """
151
+ Epoch [1/10],
152
+ Train MSE: 0.543,
153
+ Test MSE: 0.688
154
+ Epoch [2/10],
155
+ Train MSE: 0.242,
156
+ Test MSE: 0.493\n
157
+ """
158
+
159
+ # Initialize the global step_index and history
160
+ process_steps = [
161
+ {
162
+ "Action": "Inspect Script Lines (train.py)",
163
+ "Observation": (
164
+ "The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
165
+ "Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
166
+ "to calculate RMSE for different dimensions. Placeholder functions train_model and "
167
+ "predict exist without implementations."
168
+ ),
169
+ },
170
+ {
171
+ "Action": "Execute Script (train.py)",
172
+ "Observation": (
173
+ "The script executed successfully. Generated embeddings using the BERT model. Completed "
174
+ "the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
175
+ ),
176
+ },
177
+ {
178
+ "Action": "Edit Script (train.py)",
179
+ "Observation": (
180
+ "Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
181
+ "The edited train.py now has clearly defined functions"
182
+ "for data loading (load_data), model definition (build_model), "
183
+ "training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
184
+ ),
185
+ },
186
+ {
187
+ "Action": "Retrieve Model",
188
+ "Observation": "CNN and BiLSTM retrieved.",
189
+ },
190
+ {
191
+ "Action": "Execute Script (train.py)",
192
+ "Observation": (
193
+ "The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
194
+ "the decrease in loss indicates improved model performance."
195
+ )
196
+ },
197
+ {
198
+ "Action": "Evaluation",
199
+ "Observation": predefined_observation,
200
+ }
201
+ ]
202
+ def info_to_message(info):
203
+ msg = ""
204
+ for k, v in info.items():
205
+ if isinstance(v, dict):
206
+ tempv = v
207
+ v = ""
208
+ for k2, v2 in tempv.items():
209
+ v += f"{k2}:\n {v2}\n"
210
+ v = User.indent_text(v, 2)
211
+ msg += '-' * 64
212
+ msg += '\n'
213
+ msg += f"{k}:\n{v}\n"
214
+ return msg
215
+
216
+
217
+ def handle_example_click(example_index):
218
+ global index_ex
219
+ index_ex = example_index
220
+ return load_example(index_ex) # Simply return the text to display it in the textbox
221
+
222
+ # Gradio Interface
223
+ with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
+ gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
+ gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
+ gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
+
229
+
230
+
231
+ gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchersโ€™ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
232
+
233
+
234
+ gr.Markdown("## โœ… Click an example at bottom to start โฌ‡๏ธ")
235
+
236
+
237
+ # Use state variables to store generated hypothesis and experiment plan
238
+ hypothesis_state = gr.State("")
239
+ experiment_plan_state = gr.State("")
240
+
241
+ ########## Phase 1: Research Idea Generation Tab ##############
242
+ with gr.Tab("๐Ÿ’กStage 1: Research Idea Generation"):
243
+ gr.Markdown("### Extract Research Elements and Generate Research Ideas")
244
+ with gr.Row():
245
+ with gr.Column():
246
+ paper_text_input = gr.Textbox(value="", lines=10, label="๐Ÿ“‘ Research Paper Text", interactive=False)
247
+
248
+ # extract_button = gr.Button("๐Ÿ” Extract Research Elements")
249
+ with gr.Row():
250
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
251
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
252
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
253
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
254
+ with gr.Column():
255
+ with gr.Row(): # Move the button to the top
256
+ generate_button = gr.Button("โœ๏ธ Generate Research Hypothesis & Experiment Plan")
257
+ with gr.Group():
258
+ gr.Markdown("### ๐ŸŒŸ Research Idea")
259
+ with gr.Row():
260
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
261
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
262
+
263
+
264
+ # Step 1: Extract Research Elements
265
+ # extract_button.click(
266
+ # fn=extract_research_elements,
267
+ # inputs=paper_text_input,
268
+ # outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
269
+ # )
270
+
271
+ generate_button.click(
272
+ fn=generate_and_store,
273
+ inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
274
+ outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
275
+ )
276
+
277
+ gr.Examples(
278
+ examples=example_text,
279
+ inputs=[paper_text_input],
280
+ outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
281
+ fn=load_example_and_set_index,
282
+ run_on_click = True,
283
+ # label="โฌ‡๏ธ Click an example to load"
284
+ )
285
+
286
+
287
+
288
+
289
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
290
+ with gr.Tab("๐Ÿงช Stage 2 & Stage 3: Experiment implementation and execution"):
291
+ gr.Markdown("### Interact with the ExperimentAgent")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ with gr.Group():
296
+ gr.Markdown("### ๐ŸŒŸ Generated Research Idea")
297
+ with gr.Row():
298
+ idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
299
+ plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)
300
+
301
+ with gr.Column():
302
+ start_exp_agnet = gr.Button("โš™๏ธ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
303
+ with gr.Group():
304
+ gr.Markdown("### Implementation + Execution Log")
305
+ log = gr.Textbox(label="๐Ÿ“– Execution Log", lines=20, interactive=False)
306
+ code_display = gr.Code(label="๐Ÿง‘โ€๐Ÿ’ป Implementation", language="python", interactive=False)
307
+
308
+ with gr.Column():
309
+ response = gr.Textbox(label="๐Ÿค– ExperimentAgent Response", lines=30, interactive=False)
310
+ feedback = gr.Textbox(placeholder="N/A", label="๐Ÿง‘โ€๐Ÿ”ฌ User Feedback", lines=3, interactive=True)
311
+ submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
312
+
313
+ hypothesis_state.change(
314
+ fn=load_phase_2_inputs,
315
+ inputs=[hypothesis_state, experiment_plan_state],
316
+ outputs=[idea_input, plan_input, code_display]
317
+ )
318
+
319
+ # Start research agent
320
+ start_exp_agnet.click(
321
+ fn=start_experiment_agent,
322
+ inputs=[hypothesis_state, experiment_plan_state],
323
+ outputs=[code_display, log, response, feedback]
324
+ )
325
+
326
+ submit_button.click(
327
+ fn=submit_feedback,
328
+ inputs=[feedback, log, response],
329
+ outputs=[log, response, code_display, feedback]
330
+ )
331
+
332
+ # Test
333
+ if __name__ == "__main__":
334
+ step_index = 0
335
+ app.launch(share=True)
.history/app_20250404175624.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from reactagent.environment import Environment
4
+ from reactagent.agents.agent_research import ResearchAgent
5
+ from reactagent.runner import create_parser
6
+ from reactagent import llm
7
+ from reactagent.users.user import User
8
+ import os
9
+ import json
10
+
11
+
12
+ # Global variables to store session state
13
+ env = None
14
+ agent = None
15
+ state_example = False
16
+ state_extract = False
17
+ state_generate = False
18
+ state_agent = False
19
+ state_complete = False
20
+ index_ex = "1"
21
+
22
+ example_text = [
23
+ "Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
24
+ "Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
25
+ ]
26
+
27
+
28
+ # Load example JSON file
29
+ def load_example_data():
30
+ with open("example/example_data.json", "r") as json_file:
31
+ example_data = json.load(json_file)
32
+
33
+ for idx in example_data.keys():
34
+ try:
35
+ file = example_data[idx]["code_init"]
36
+ with open(os.path.join("example", file), "r") as f:
37
+ example_data[idx]["code_init"] = f.read()
38
+ except FileNotFoundError:
39
+ print(f"File not found: {file}. Skipping key: {idx}")
40
+ try:
41
+ file = example_data[idx]["code_final"]
42
+ with open(os.path.join("example", file), "r") as f:
43
+ example_data[idx]["code_final"] = f.read()
44
+ except FileNotFoundError:
45
+ print(f"File not found: {file}. Skipping key: {idx}")
46
+ return example_data
47
+
48
+ example_data = load_example_data()
49
+
50
+ # Function to handle the selection of an example and populate the respective fields
51
+ def load_example(example_id):
52
+ global index_ex
53
+ index_ex = str(example_id)
54
+ example = example_data[index_ex]
55
+ paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
56
+ tasks = example['research_tasks']
57
+ gaps = example['research_gaps']
58
+ keywords = example['keywords']
59
+ recent_works = "\n".join(example['recent_works'])
60
+ return [paper_text, tasks, gaps, keywords, recent_works]
61
+
62
+ example_text = [load_example(1)[0], load_example(2)[0]]
63
+
64
+
65
+ # Function to handle example clicks
66
+ def load_example_and_set_index(paper_text_input):
67
+ global index_ex, state_example
68
+ state_example = True
69
+ index_ex = str(example_text.index(paper_text_input) + 1)
70
+ example = load_example(index_ex)
71
+
72
+ return example
73
+
74
+
75
+
76
+ ########## Phase 1 ##############
77
+
78
+ def extract_research_elements(paper_text):
79
+ global state_extract, index_ex, state_example
80
+ if not state_example or paper_text == "":
81
+ return "", "", "", ""
82
+ state_extract = True
83
+ if not paper_text.strip().startswith("Title:\t" + example_data[index_ex]["title"]):
84
+ print("Mismatch detected.")
85
+ print(paper_text)
86
+ return "", "", "", ""
87
+ example = example_data[index_ex]
88
+ tasks = example['research_tasks']
89
+ gaps = example['research_gaps']
90
+ keywords = example['keywords']
91
+ recent_works = "\n".join(example['recent_works'])
92
+ return tasks, gaps, keywords, recent_works
93
+
94
+
95
+ # Step 2: Generate Research Hypothesis and Experiment Plan
96
+ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
97
+ if (not state_example):
98
+ return "", "", "", ""
99
+ global state_generate, index_ex
100
+ state_generate = True
101
+ hypothesis = example_data[index_ex]['hypothesis']
102
+ experiment_plan = example_data[index_ex]['experiment_plan']
103
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
104
+
105
+ ########## Phase 2 & 3 ##############
106
+ def start_experiment_agent(hypothesis, plan):
107
+ if (not state_generate or not state_example):
108
+ return "", "", "", ""
109
+ global state_agent, step_index, state_complete
110
+ state_agent = True
111
+ step_index = 0
112
+ state_complete = False
113
+ # predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
114
+ return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
+
116
+ def submit_feedback(user_feedback, history, previous_response):
117
+ if ( or not state_generate or not state_agent or not state_example):
118
+ return "", "", ""
119
+ global step_index, state_complete
120
+ step_index += 1
121
+ msg = history
122
+ if step_index < len(process_steps):
123
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
124
+ response_info = process_steps[step_index]
125
+ response = info_to_message(response_info) # Convert dictionary to formatted string
126
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
127
+ step_index += 1
128
+ msg += response
129
+ else:
130
+ state_complete = True
131
+ response = "Agent Finished."
132
+
133
+ return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""
134
+
135
+ def load_phase_2_inputs(hypothesis, plan):
136
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
137
+
138
+
139
+
140
+ predefined_action_log = """
141
+ [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
142
+ [Action]: Inspect Script (train.py)
143
+ Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
144
+ Objective: Understand the training script, including data processing, [...]
145
+ [Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
146
+ [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
147
+ """
148
+
149
+
150
+ predefined_observation = """
151
+ Epoch [1/10],
152
+ Train MSE: 0.543,
153
+ Test MSE: 0.688
154
+ Epoch [2/10],
155
+ Train MSE: 0.242,
156
+ Test MSE: 0.493\n
157
+ """
158
+
159
+ # Initialize the global step_index and history
160
+ process_steps = [
161
+ {
162
+ "Action": "Inspect Script Lines (train.py)",
163
+ "Observation": (
164
+ "The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
165
+ "Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
166
+ "to calculate RMSE for different dimensions. Placeholder functions train_model and "
167
+ "predict exist without implementations."
168
+ ),
169
+ },
170
+ {
171
+ "Action": "Execute Script (train.py)",
172
+ "Observation": (
173
+ "The script executed successfully. Generated embeddings using the BERT model. Completed "
174
+ "the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
175
+ ),
176
+ },
177
+ {
178
+ "Action": "Edit Script (train.py)",
179
+ "Observation": (
180
+ "Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
181
+ "The edited train.py now has clearly defined functions"
182
+ "for data loading (load_data), model definition (build_model), "
183
+ "training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
184
+ ),
185
+ },
186
+ {
187
+ "Action": "Retrieve Model",
188
+ "Observation": "CNN and BiLSTM retrieved.",
189
+ },
190
+ {
191
+ "Action": "Execute Script (train.py)",
192
+ "Observation": (
193
+ "The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
194
+ "the decrease in loss indicates improved model performance."
195
+ )
196
+ },
197
+ {
198
+ "Action": "Evaluation",
199
+ "Observation": predefined_observation,
200
+ }
201
+ ]
202
+ def info_to_message(info):
203
+ msg = ""
204
+ for k, v in info.items():
205
+ if isinstance(v, dict):
206
+ tempv = v
207
+ v = ""
208
+ for k2, v2 in tempv.items():
209
+ v += f"{k2}:\n {v2}\n"
210
+ v = User.indent_text(v, 2)
211
+ msg += '-' * 64
212
+ msg += '\n'
213
+ msg += f"{k}:\n{v}\n"
214
+ return msg
215
+
216
+
217
+ def handle_example_click(example_index):
218
+ global index_ex
219
+ index_ex = example_index
220
+ return load_example(index_ex) # Simply return the text to display it in the textbox
221
+
222
+ # Gradio Interface
223
+ with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
+ gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
+ gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
+ gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
+
229
+
230
+
231
+ gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchersโ€™ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
232
+
233
+
234
+ gr.Markdown("## โœ… Click an example at bottom to start โฌ‡๏ธ")
235
+
236
+
237
+ # Use state variables to store generated hypothesis and experiment plan
238
+ hypothesis_state = gr.State("")
239
+ experiment_plan_state = gr.State("")
240
+
241
+ ########## Phase 1: Research Idea Generation Tab ##############
242
+ with gr.Tab("๐Ÿ’กStage 1: Research Idea Generation"):
243
+ gr.Markdown("### Extract Research Elements and Generate Research Ideas")
244
+ with gr.Row():
245
+ with gr.Column():
246
+ paper_text_input = gr.Textbox(value="", lines=10, label="๐Ÿ“‘ Research Paper Text", interactive=False)
247
+
248
+ # extract_button = gr.Button("๐Ÿ” Extract Research Elements")
249
+ with gr.Row():
250
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
251
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
252
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
253
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
254
+ with gr.Column():
255
+ with gr.Row(): # Move the button to the top
256
+ generate_button = gr.Button("โœ๏ธ Generate Research Hypothesis & Experiment Plan")
257
+ with gr.Group():
258
+ gr.Markdown("### ๐ŸŒŸ Research Idea")
259
+ with gr.Row():
260
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
261
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
262
+
263
+
264
+ # Step 1: Extract Research Elements
265
+ # extract_button.click(
266
+ # fn=extract_research_elements,
267
+ # inputs=paper_text_input,
268
+ # outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
269
+ # )
270
+
271
+ generate_button.click(
272
+ fn=generate_and_store,
273
+ inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
274
+ outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
275
+ )
276
+
277
+ gr.Examples(
278
+ examples=example_text,
279
+ inputs=[paper_text_input],
280
+ outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
281
+ fn=load_example_and_set_index,
282
+ run_on_click = True,
283
+ # label="โฌ‡๏ธ Click an example to load"
284
+ )
285
+
286
+
287
+
288
+
289
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
290
+ with gr.Tab("๐Ÿงช Stage 2 & Stage 3: Experiment implementation and execution"):
291
+ gr.Markdown("### Interact with the ExperimentAgent")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ with gr.Group():
296
+ gr.Markdown("### ๐ŸŒŸ Generated Research Idea")
297
+ with gr.Row():
298
+ idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
299
+ plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)
300
+
301
+ with gr.Column():
302
+ start_exp_agnet = gr.Button("โš™๏ธ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
303
+ with gr.Group():
304
+ gr.Markdown("### Implementation + Execution Log")
305
+ log = gr.Textbox(label="๐Ÿ“– Execution Log", lines=20, interactive=False)
306
+ code_display = gr.Code(label="๐Ÿง‘โ€๐Ÿ’ป Implementation", language="python", interactive=False)
307
+
308
+ with gr.Column():
309
+ response = gr.Textbox(label="๐Ÿค– ExperimentAgent Response", lines=30, interactive=False)
310
+ feedback = gr.Textbox(placeholder="N/A", label="๐Ÿง‘โ€๐Ÿ”ฌ User Feedback", lines=3, interactive=True)
311
+ submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
312
+
313
+ hypothesis_state.change(
314
+ fn=load_phase_2_inputs,
315
+ inputs=[hypothesis_state, experiment_plan_state],
316
+ outputs=[idea_input, plan_input, code_display]
317
+ )
318
+
319
+ # Start research agent
320
+ start_exp_agnet.click(
321
+ fn=start_experiment_agent,
322
+ inputs=[hypothesis_state, experiment_plan_state],
323
+ outputs=[code_display, log, response, feedback]
324
+ )
325
+
326
+ submit_button.click(
327
+ fn=submit_feedback,
328
+ inputs=[feedback, log, response],
329
+ outputs=[log, response, code_display, feedback]
330
+ )
331
+
332
+ # Test
333
+ if __name__ == "__main__":
334
+ step_index = 0
335
+ app.launch(share=True)
.history/app_20250404175627.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from reactagent.environment import Environment
4
+ from reactagent.agents.agent_research import ResearchAgent
5
+ from reactagent.runner import create_parser
6
+ from reactagent import llm
7
+ from reactagent.users.user import User
8
+ import os
9
+ import json
10
+
11
+
12
+ # Global variables to store session state
13
+ env = None
14
+ agent = None
15
+ state_example = False
16
+ state_extract = False
17
+ state_generate = False
18
+ state_agent = False
19
+ state_complete = False
20
+ index_ex = "1"
21
+
22
+ example_text = [
23
+ "Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
24
+ "Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
25
+ ]
26
+
27
+
28
+ # Load example JSON file
29
+ def load_example_data():
30
+ with open("example/example_data.json", "r") as json_file:
31
+ example_data = json.load(json_file)
32
+
33
+ for idx in example_data.keys():
34
+ try:
35
+ file = example_data[idx]["code_init"]
36
+ with open(os.path.join("example", file), "r") as f:
37
+ example_data[idx]["code_init"] = f.read()
38
+ except FileNotFoundError:
39
+ print(f"File not found: {file}. Skipping key: {idx}")
40
+ try:
41
+ file = example_data[idx]["code_final"]
42
+ with open(os.path.join("example", file), "r") as f:
43
+ example_data[idx]["code_final"] = f.read()
44
+ except FileNotFoundError:
45
+ print(f"File not found: {file}. Skipping key: {idx}")
46
+ return example_data
47
+
48
+ example_data = load_example_data()
49
+
50
+ # Function to handle the selection of an example and populate the respective fields
51
+ def load_example(example_id):
52
+ global index_ex
53
+ index_ex = str(example_id)
54
+ example = example_data[index_ex]
55
+ paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
56
+ tasks = example['research_tasks']
57
+ gaps = example['research_gaps']
58
+ keywords = example['keywords']
59
+ recent_works = "\n".join(example['recent_works'])
60
+ return [paper_text, tasks, gaps, keywords, recent_works]
61
+
62
+ example_text = [load_example(1)[0], load_example(2)[0]]
63
+
64
+
65
+ # Function to handle example clicks
66
+ def load_example_and_set_index(paper_text_input):
67
+ global index_ex, state_example
68
+ state_example = True
69
+ index_ex = str(example_text.index(paper_text_input) + 1)
70
+ example = load_example(index_ex)
71
+
72
+ return example
73
+
74
+
75
+
76
+ ########## Phase 1 ##############
77
+
78
+ def extract_research_elements(paper_text):
79
+ global state_extract, index_ex, state_example
80
+ if not state_example or paper_text == "":
81
+ return "", "", "", ""
82
+ state_extract = True
83
+ if not paper_text.strip().startswith("Title:\t" + example_data[index_ex]["title"]):
84
+ print("Mismatch detected.")
85
+ print(paper_text)
86
+ return "", "", "", ""
87
+ example = example_data[index_ex]
88
+ tasks = example['research_tasks']
89
+ gaps = example['research_gaps']
90
+ keywords = example['keywords']
91
+ recent_works = "\n".join(example['recent_works'])
92
+ return tasks, gaps, keywords, recent_works
93
+
94
+
95
+ # Step 2: Generate Research Hypothesis and Experiment Plan
96
+ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
97
+ if (not state_example):
98
+ return "", "", "", ""
99
+ global state_generate, index_ex
100
+ state_generate = True
101
+ hypothesis = example_data[index_ex]['hypothesis']
102
+ experiment_plan = example_data[index_ex]['experiment_plan']
103
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
104
+
105
+ ########## Phase 2 & 3 ##############
106
+ def start_experiment_agent(hypothesis, plan):
107
+ if (not state_generate or not state_example):
108
+ return "", "", "", ""
109
+ global state_agent, step_index, state_complete
110
+ state_agent = True
111
+ step_index = 0
112
+ state_complete = False
113
+ # predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
114
+ return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
+
116
+ def submit_feedback(user_feedback, history, previous_response):
117
+ if (not state_generate or not state_agent or not state_example):
118
+ return "", "", ""
119
+ global step_index, state_complete
120
+ step_index += 1
121
+ msg = history
122
+ if step_index < len(process_steps):
123
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
124
+ response_info = process_steps[step_index]
125
+ response = info_to_message(response_info) # Convert dictionary to formatted string
126
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
127
+ step_index += 1
128
+ msg += response
129
+ else:
130
+ state_complete = True
131
+ response = "Agent Finished."
132
+
133
+ return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""
134
+
135
+ def load_phase_2_inputs(hypothesis, plan):
136
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
137
+
138
+
139
+
140
+ predefined_action_log = """
141
+ [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
142
+ [Action]: Inspect Script (train.py)
143
+ Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
144
+ Objective: Understand the training script, including data processing, [...]
145
+ [Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
146
+ [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
147
+ """
148
+
149
+
150
+ predefined_observation = """
151
+ Epoch [1/10],
152
+ Train MSE: 0.543,
153
+ Test MSE: 0.688
154
+ Epoch [2/10],
155
+ Train MSE: 0.242,
156
+ Test MSE: 0.493\n
157
+ """
158
+
159
+ # Initialize the global step_index and history
160
+ process_steps = [
161
+ {
162
+ "Action": "Inspect Script Lines (train.py)",
163
+ "Observation": (
164
+ "The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
165
+ "Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
166
+ "to calculate RMSE for different dimensions. Placeholder functions train_model and "
167
+ "predict exist without implementations."
168
+ ),
169
+ },
170
+ {
171
+ "Action": "Execute Script (train.py)",
172
+ "Observation": (
173
+ "The script executed successfully. Generated embeddings using the BERT model. Completed "
174
+ "the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
175
+ ),
176
+ },
177
+ {
178
+ "Action": "Edit Script (train.py)",
179
+ "Observation": (
180
+ "Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
181
+ "The edited train.py now has clearly defined functions"
182
+ "for data loading (load_data), model definition (build_model), "
183
+ "training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
184
+ ),
185
+ },
186
+ {
187
+ "Action": "Retrieve Model",
188
+ "Observation": "CNN and BiLSTM retrieved.",
189
+ },
190
+ {
191
+ "Action": "Execute Script (train.py)",
192
+ "Observation": (
193
+ "The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
194
+ "the decrease in loss indicates improved model performance."
195
+ )
196
+ },
197
+ {
198
+ "Action": "Evaluation",
199
+ "Observation": predefined_observation,
200
+ }
201
+ ]
202
+ def info_to_message(info):
203
+ msg = ""
204
+ for k, v in info.items():
205
+ if isinstance(v, dict):
206
+ tempv = v
207
+ v = ""
208
+ for k2, v2 in tempv.items():
209
+ v += f"{k2}:\n {v2}\n"
210
+ v = User.indent_text(v, 2)
211
+ msg += '-' * 64
212
+ msg += '\n'
213
+ msg += f"{k}:\n{v}\n"
214
+ return msg
215
+
216
+
217
+ def handle_example_click(example_index):
218
+ global index_ex
219
+ index_ex = example_index
220
+ return load_example(index_ex) # Simply return the text to display it in the textbox
221
+
222
+ # Gradio Interface
223
+ with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
+ gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
+ gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
+ gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
+
229
+
230
+
231
+ gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchersโ€™ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
232
+
233
+
234
+ gr.Markdown("## โœ… Click an example at bottom to start โฌ‡๏ธ")
235
+
236
+
237
+ # Use state variables to store generated hypothesis and experiment plan
238
+ hypothesis_state = gr.State("")
239
+ experiment_plan_state = gr.State("")
240
+
241
+ ########## Phase 1: Research Idea Generation Tab ##############
242
+ with gr.Tab("๐Ÿ’กStage 1: Research Idea Generation"):
243
+ gr.Markdown("### Extract Research Elements and Generate Research Ideas")
244
+ with gr.Row():
245
+ with gr.Column():
246
+ paper_text_input = gr.Textbox(value="", lines=10, label="๐Ÿ“‘ Research Paper Text", interactive=False)
247
+
248
+ # extract_button = gr.Button("๐Ÿ” Extract Research Elements")
249
+ with gr.Row():
250
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
251
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
252
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
253
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
254
+ with gr.Column():
255
+ with gr.Row(): # Move the button to the top
256
+ generate_button = gr.Button("โœ๏ธ Generate Research Hypothesis & Experiment Plan")
257
+ with gr.Group():
258
+ gr.Markdown("### ๐ŸŒŸ Research Idea")
259
+ with gr.Row():
260
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
261
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
262
+
263
+
264
+ # Step 1: Extract Research Elements
265
+ # extract_button.click(
266
+ # fn=extract_research_elements,
267
+ # inputs=paper_text_input,
268
+ # outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
269
+ # )
270
+
271
+ generate_button.click(
272
+ fn=generate_and_store,
273
+ inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
274
+ outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
275
+ )
276
+
277
+ gr.Examples(
278
+ examples=example_text,
279
+ inputs=[paper_text_input],
280
+ outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
281
+ fn=load_example_and_set_index,
282
+ run_on_click = True,
283
+ # label="โฌ‡๏ธ Click an example to load"
284
+ )
285
+
286
+
287
+
288
+
289
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
290
+ with gr.Tab("๐Ÿงช Stage 2 & Stage 3: Experiment implementation and execution"):
291
+ gr.Markdown("### Interact with the ExperimentAgent")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ with gr.Group():
296
+ gr.Markdown("### ๐ŸŒŸ Generated Research Idea")
297
+ with gr.Row():
298
+ idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
299
+ plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)
300
+
301
+ with gr.Column():
302
+ start_exp_agnet = gr.Button("โš™๏ธ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
303
+ with gr.Group():
304
+ gr.Markdown("### Implementation + Execution Log")
305
+ log = gr.Textbox(label="๐Ÿ“– Execution Log", lines=20, interactive=False)
306
+ code_display = gr.Code(label="๐Ÿง‘โ€๐Ÿ’ป Implementation", language="python", interactive=False)
307
+
308
+ with gr.Column():
309
+ response = gr.Textbox(label="๐Ÿค– ExperimentAgent Response", lines=30, interactive=False)
310
+ feedback = gr.Textbox(placeholder="N/A", label="๐Ÿง‘โ€๐Ÿ”ฌ User Feedback", lines=3, interactive=True)
311
+ submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
312
+
313
+ hypothesis_state.change(
314
+ fn=load_phase_2_inputs,
315
+ inputs=[hypothesis_state, experiment_plan_state],
316
+ outputs=[idea_input, plan_input, code_display]
317
+ )
318
+
319
+ # Start research agent
320
+ start_exp_agnet.click(
321
+ fn=start_experiment_agent,
322
+ inputs=[hypothesis_state, experiment_plan_state],
323
+ outputs=[code_display, log, response, feedback]
324
+ )
325
+
326
+ submit_button.click(
327
+ fn=submit_feedback,
328
+ inputs=[feedback, log, response],
329
+ outputs=[log, response, code_display, feedback]
330
+ )
331
+
332
+ # Test
333
+ if __name__ == "__main__":
334
+ step_index = 0
335
+ app.launch(share=True)
.history/app_20250404175628.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from reactagent.environment import Environment
4
+ from reactagent.agents.agent_research import ResearchAgent
5
+ from reactagent.runner import create_parser
6
+ from reactagent import llm
7
+ from reactagent.users.user import User
8
+ import os
9
+ import json
10
+
11
+
12
+ # Global variables to store session state
13
+ env = None
14
+ agent = None
15
+ state_example = False
16
+ state_extract = False
17
+ state_generate = False
18
+ state_agent = False
19
+ state_complete = False
20
+ index_ex = "1"
21
+
22
+ example_text = [
23
+ "Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
24
+ "Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
25
+ ]
26
+
27
+
28
+ # Load example JSON file
29
+ def load_example_data():
30
+ with open("example/example_data.json", "r") as json_file:
31
+ example_data = json.load(json_file)
32
+
33
+ for idx in example_data.keys():
34
+ try:
35
+ file = example_data[idx]["code_init"]
36
+ with open(os.path.join("example", file), "r") as f:
37
+ example_data[idx]["code_init"] = f.read()
38
+ except FileNotFoundError:
39
+ print(f"File not found: {file}. Skipping key: {idx}")
40
+ try:
41
+ file = example_data[idx]["code_final"]
42
+ with open(os.path.join("example", file), "r") as f:
43
+ example_data[idx]["code_final"] = f.read()
44
+ except FileNotFoundError:
45
+ print(f"File not found: {file}. Skipping key: {idx}")
46
+ return example_data
47
+
48
+ example_data = load_example_data()
49
+
50
+ # Function to handle the selection of an example and populate the respective fields
51
+ def load_example(example_id):
52
+ global index_ex
53
+ index_ex = str(example_id)
54
+ example = example_data[index_ex]
55
+ paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
56
+ tasks = example['research_tasks']
57
+ gaps = example['research_gaps']
58
+ keywords = example['keywords']
59
+ recent_works = "\n".join(example['recent_works'])
60
+ return [paper_text, tasks, gaps, keywords, recent_works]
61
+
62
+ example_text = [load_example(1)[0], load_example(2)[0]]
63
+
64
+
65
+ # Function to handle example clicks
66
+ def load_example_and_set_index(paper_text_input):
67
+ global index_ex, state_example
68
+ state_example = True
69
+ index_ex = str(example_text.index(paper_text_input) + 1)
70
+ example = load_example(index_ex)
71
+
72
+ return example
73
+
74
+
75
+
76
+ ########## Phase 1 ##############
77
+
78
+ def extract_research_elements(paper_text):
79
+ global state_extract, index_ex, state_example
80
+ if not state_example or paper_text == "":
81
+ return "", "", "", ""
82
+ state_extract = True
83
+ if not paper_text.strip().startswith("Title:\t" + example_data[index_ex]["title"]):
84
+ print("Mismatch detected.")
85
+ print(paper_text)
86
+ return "", "", "", ""
87
+ example = example_data[index_ex]
88
+ tasks = example['research_tasks']
89
+ gaps = example['research_gaps']
90
+ keywords = example['keywords']
91
+ recent_works = "\n".join(example['recent_works'])
92
+ return tasks, gaps, keywords, recent_works
93
+
94
+
95
+ # Step 2: Generate Research Hypothesis and Experiment Plan
96
+ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
97
+ if (not state_example):
98
+ return "", "", "", ""
99
+ global state_generate, index_ex
100
+ state_generate = True
101
+ hypothesis = example_data[index_ex]['hypothesis']
102
+ experiment_plan = example_data[index_ex]['experiment_plan']
103
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
104
+
105
+ ########## Phase 2 & 3 ##############
106
+ def start_experiment_agent(hypothesis, plan):
107
+ if (not state_generate or not state_example):
108
+ return "", "", "", ""
109
+ global state_agent, step_index, state_complete
110
+ state_agent = True
111
+ step_index = 0
112
+ state_complete = False
113
+ # predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
114
+ return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
+
116
+ def submit_feedback(user_feedback, history, previous_response):
117
+ if (not state_generate or not state_agent or not state_example):
118
+ return "", "", ""
119
+ global step_index, state_complete
120
+ step_index += 1
121
+ msg = history
122
+ if step_index < len(process_steps):
123
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
124
+ response_info = process_steps[step_index]
125
+ response = info_to_message(response_info) # Convert dictionary to formatted string
126
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
127
+ step_index += 1
128
+ msg += response
129
+ else:
130
+ state_complete = True
131
+ response = "Agent Finished."
132
+
133
+ return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""
134
+
135
+ def load_phase_2_inputs(hypothesis, plan):
136
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
137
+
138
+
139
+
140
+ predefined_action_log = """
141
+ [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
142
+ [Action]: Inspect Script (train.py)
143
+ Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
144
+ Objective: Understand the training script, including data processing, [...]
145
+ [Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
146
+ [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
147
+ """
148
+
149
+
150
+ predefined_observation = """
151
+ Epoch [1/10],
152
+ Train MSE: 0.543,
153
+ Test MSE: 0.688
154
+ Epoch [2/10],
155
+ Train MSE: 0.242,
156
+ Test MSE: 0.493\n
157
+ """
158
+
159
+ # Initialize the global step_index and history
160
+ process_steps = [
161
+ {
162
+ "Action": "Inspect Script Lines (train.py)",
163
+ "Observation": (
164
+ "The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
165
+ "Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
166
+ "to calculate RMSE for different dimensions. Placeholder functions train_model and "
167
+ "predict exist without implementations."
168
+ ),
169
+ },
170
+ {
171
+ "Action": "Execute Script (train.py)",
172
+ "Observation": (
173
+ "The script executed successfully. Generated embeddings using the BERT model. Completed "
174
+ "the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
175
+ ),
176
+ },
177
+ {
178
+ "Action": "Edit Script (train.py)",
179
+ "Observation": (
180
+ "Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
181
+ "The edited train.py now has clearly defined functions"
182
+ "for data loading (load_data), model definition (build_model), "
183
+ "training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
184
+ ),
185
+ },
186
+ {
187
+ "Action": "Retrieve Model",
188
+ "Observation": "CNN and BiLSTM retrieved.",
189
+ },
190
+ {
191
+ "Action": "Execute Script (train.py)",
192
+ "Observation": (
193
+ "The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
194
+ "the decrease in loss indicates improved model performance."
195
+ )
196
+ },
197
+ {
198
+ "Action": "Evaluation",
199
+ "Observation": predefined_observation,
200
+ }
201
+ ]
202
+ def info_to_message(info):
203
+ msg = ""
204
+ for k, v in info.items():
205
+ if isinstance(v, dict):
206
+ tempv = v
207
+ v = ""
208
+ for k2, v2 in tempv.items():
209
+ v += f"{k2}:\n {v2}\n"
210
+ v = User.indent_text(v, 2)
211
+ msg += '-' * 64
212
+ msg += '\n'
213
+ msg += f"{k}:\n{v}\n"
214
+ return msg
215
+
216
+
217
+ def handle_example_click(example_index):
218
+ global index_ex
219
+ index_ex = example_index
220
+ return load_example(index_ex) # Simply return the text to display it in the textbox
221
+
222
+ # Gradio Interface
223
+ with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
+ gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
+ gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
+ gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
+
229
+
230
+
231
+ gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchersโ€™ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
232
+
233
+
234
+ gr.Markdown("## โœ… Click an example at bottom to start โฌ‡๏ธ")
235
+
236
+
237
+ # Use state variables to store generated hypothesis and experiment plan
238
+ hypothesis_state = gr.State("")
239
+ experiment_plan_state = gr.State("")
240
+
241
+ ########## Phase 1: Research Idea Generation Tab ##############
242
+ with gr.Tab("๐Ÿ’กStage 1: Research Idea Generation"):
243
+ gr.Markdown("### Extract Research Elements and Generate Research Ideas")
244
+ with gr.Row():
245
+ with gr.Column():
246
+ paper_text_input = gr.Textbox(value="", lines=10, label="๐Ÿ“‘ Research Paper Text", interactive=False)
247
+
248
+ # extract_button = gr.Button("๐Ÿ” Extract Research Elements")
249
+ with gr.Row():
250
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
251
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
252
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
253
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
254
+ with gr.Column():
255
+ with gr.Row(): # Move the button to the top
256
+ generate_button = gr.Button("โœ๏ธ Generate Research Hypothesis & Experiment Plan")
257
+ with gr.Group():
258
+ gr.Markdown("### ๐ŸŒŸ Research Idea")
259
+ with gr.Row():
260
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
261
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
262
+
263
+
264
+ # Step 1: Extract Research Elements
265
+ # extract_button.click(
266
+ # fn=extract_research_elements,
267
+ # inputs=paper_text_input,
268
+ # outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
269
+ # )
270
+
271
+ generate_button.click(
272
+ fn=generate_and_store,
273
+ inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
274
+ outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
275
+ )
276
+
277
+ gr.Examples(
278
+ examples=example_text,
279
+ inputs=[paper_text_input],
280
+ outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
281
+ fn=load_example_and_set_index,
282
+ run_on_click = True,
283
+ # label="โฌ‡๏ธ Click an example to load"
284
+ )
285
+
286
+
287
+
288
+
289
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
290
+ with gr.Tab("๐Ÿงช Stage 2 & Stage 3: Experiment implementation and execution"):
291
+ gr.Markdown("### Interact with the ExperimentAgent")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ with gr.Group():
296
+ gr.Markdown("### ๐ŸŒŸ Generated Research Idea")
297
+ with gr.Row():
298
+ idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
299
+ plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)
300
+
301
+ with gr.Column():
302
+ start_exp_agnet = gr.Button("โš™๏ธ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
303
+ with gr.Group():
304
+ gr.Markdown("### Implementation + Execution Log")
305
+ log = gr.Textbox(label="๐Ÿ“– Execution Log", lines=20, interactive=False)
306
+ code_display = gr.Code(label="๐Ÿง‘โ€๐Ÿ’ป Implementation", language="python", interactive=False)
307
+
308
+ with gr.Column():
309
+ response = gr.Textbox(label="๐Ÿค– ExperimentAgent Response", lines=30, interactive=False)
310
+ feedback = gr.Textbox(placeholder="N/A", label="๐Ÿง‘โ€๐Ÿ”ฌ User Feedback", lines=3, interactive=True)
311
+ submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
312
+
313
+ hypothesis_state.change(
314
+ fn=load_phase_2_inputs,
315
+ inputs=[hypothesis_state, experiment_plan_state],
316
+ outputs=[idea_input, plan_input, code_display]
317
+ )
318
+
319
+ # Start research agent
320
+ start_exp_agnet.click(
321
+ fn=start_experiment_agent,
322
+ inputs=[hypothesis_state, experiment_plan_state],
323
+ outputs=[code_display, log, response, feedback]
324
+ )
325
+
326
+ submit_button.click(
327
+ fn=submit_feedback,
328
+ inputs=[feedback, log, response],
329
+ outputs=[log, response, code_display, feedback]
330
+ )
331
+
332
+ # Test
333
+ if __name__ == "__main__":
334
+ step_index = 0
335
+ app.launch(share=True)
.history/app_20250404175636.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pathlib import Path
3
+ from reactagent.environment import Environment
4
+ from reactagent.agents.agent_research import ResearchAgent
5
+ from reactagent.runner import create_parser
6
+ from reactagent import llm
7
+ from reactagent.users.user import User
8
+ import os
9
+ import json
10
+
11
+
12
+ # Global variables to store session state
13
+ env = None
14
+ agent = None
15
+ state_example = False
16
+ state_extract = False
17
+ state_generate = False
18
+ state_agent = False
19
+ state_complete = False
20
+ index_ex = "1"
21
+
22
+ example_text = [
23
+ "Research Paper 1: Dataset and Baseline for Automatic Student Feedback Analysis",
24
+ "Research Paper 2: An Empirical Study on the Impact of Code Review on Software Quality"
25
+ ]
26
+
27
+
28
+ # Load example JSON file
29
+ def load_example_data():
30
+ with open("example/example_data.json", "r") as json_file:
31
+ example_data = json.load(json_file)
32
+
33
+ for idx in example_data.keys():
34
+ try:
35
+ file = example_data[idx]["code_init"]
36
+ with open(os.path.join("example", file), "r") as f:
37
+ example_data[idx]["code_init"] = f.read()
38
+ except FileNotFoundError:
39
+ print(f"File not found: {file}. Skipping key: {idx}")
40
+ try:
41
+ file = example_data[idx]["code_final"]
42
+ with open(os.path.join("example", file), "r") as f:
43
+ example_data[idx]["code_final"] = f.read()
44
+ except FileNotFoundError:
45
+ print(f"File not found: {file}. Skipping key: {idx}")
46
+ return example_data
47
+
48
+ example_data = load_example_data()
49
+
50
+ # Function to handle the selection of an example and populate the respective fields
51
+ def load_example(example_id):
52
+ global index_ex
53
+ index_ex = str(example_id)
54
+ example = example_data[index_ex]
55
+ paper_text = 'Title:\t' + example['title'] + '\n\nAbstract:\t' + example['abstract']
56
+ tasks = example['research_tasks']
57
+ gaps = example['research_gaps']
58
+ keywords = example['keywords']
59
+ recent_works = "\n".join(example['recent_works'])
60
+ return [paper_text, tasks, gaps, keywords, recent_works]
61
+
62
+ example_text = [load_example(1)[0], load_example(2)[0]]
63
+
64
+
65
+ # Function to handle example clicks
66
+ def load_example_and_set_index(paper_text_input):
67
+ global index_ex, state_example
68
+ state_example = True
69
+ index_ex = str(example_text.index(paper_text_input) + 1)
70
+ example = load_example(index_ex)
71
+
72
+ return example
73
+
74
+
75
+
76
+ ########## Phase 1 ##############
77
+
78
+ def extract_research_elements(paper_text):
79
+ global state_extract, index_ex, state_example
80
+ if not state_example or paper_text == "":
81
+ return "", "", "", ""
82
+ state_extract = True
83
+ if not paper_text.strip().startswith("Title:\t" + example_data[index_ex]["title"]):
84
+ print("Mismatch detected.")
85
+ print(paper_text)
86
+ return "", "", "", ""
87
+ example = example_data[index_ex]
88
+ tasks = example['research_tasks']
89
+ gaps = example['research_gaps']
90
+ keywords = example['keywords']
91
+ recent_works = "\n".join(example['recent_works'])
92
+ return tasks, gaps, keywords, recent_works
93
+
94
+
95
+ # Step 2: Generate Research Hypothesis and Experiment Plan
96
+ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
97
+ if (not state_example):
98
+ return "", "", "", ""
99
+ global state_generate, index_ex
100
+ state_generate = True
101
+ hypothesis = example_data[index_ex]['hypothesis']
102
+ experiment_plan = example_data[index_ex]['experiment_plan']
103
+ return hypothesis, experiment_plan, hypothesis, experiment_plan
104
+
105
+ ########## Phase 2 & 3 ##############
106
+ def start_experiment_agent(hypothesis, plan):
107
+ if (not state_generate or not state_example):
108
+ return "", "", "", ""
109
+ global state_agent, step_index, state_complete
110
+ state_agent = True
111
+ step_index = 0
112
+ state_complete = False
113
+ # predefined_message = f"Implement the following hypothesis and experiment plan:\n\nHypothesis:\n{hypothesis}\n\nExperiment Plan:\n{plan}"
114
+ return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
+
116
+ def submit_feedback(user_feedback, history, previous_response):
117
+ if (not state_generate or not state_agent or not state_example):
118
+ return "", "", ""
119
+ global step_index, state_complete
120
+ step_index += 1
121
+ msg = history
122
+ if step_index < len(process_steps):
123
+ msg += previous_response + "\nUser feedback:" + user_feedback + "\n\n"
124
+ response_info = process_steps[step_index]
125
+ response = info_to_message(response_info) # Convert dictionary to formatted string
126
+ response += "Please provide feedback based on the history, response entries, and observation, and questions: "
127
+ step_index += 1
128
+ msg += response
129
+ else:
130
+ state_complete = True
131
+ response = "Agent Finished."
132
+
133
+ return msg, response, example_data[index_ex]['code_init'] if state_complete else example_data[index_ex]['code_final'], ""
134
+
135
+ def load_phase_2_inputs(hypothesis, plan):
136
+ return hypothesis, plan, "# Code implementation will be displayed here after Start ExperimentAgent."
137
+
138
+
139
+
140
+ predefined_action_log = """
141
+ [Reasoning]: To understand the initial structure and functionality of train.py for effective improvements.
142
+ [Action]: Inspect Script (train.py)
143
+ Input: {"script_name": "train.py", "start_line_number": "1", "end_line_number": "74"}
144
+ Objective: Understand the training script, including data processing, [...]
145
+ [Observation]: The train.py script imports [...]. Sets random seeds [...]. Defines [...] Placeholder functions [...] exist without implementation. [...]
146
+ [Feedback]: The script structure is clear, but key functions (train_model, predict) need proper implementation for proposed model training and prediction.\n
147
+ """
148
+
149
+
150
+ predefined_observation = """
151
+ Epoch [1/10],
152
+ Train MSE: 0.543,
153
+ Test MSE: 0.688
154
+ Epoch [2/10],
155
+ Train MSE: 0.242,
156
+ Test MSE: 0.493\n
157
+ """
158
+
159
+ # Initialize the global step_index and history
160
+ process_steps = [
161
+ {
162
+ "Action": "Inspect Script Lines (train.py)",
163
+ "Observation": (
164
+ "The train.py script imports necessary libraries (e.g., pandas, sklearn, torch). "
165
+ "Sets random seeds for reproducibility. Defines compute_metrics_for_regression function "
166
+ "to calculate RMSE for different dimensions. Placeholder functions train_model and "
167
+ "predict exist without implementations."
168
+ ),
169
+ },
170
+ {
171
+ "Action": "Execute Script (train.py)",
172
+ "Observation": (
173
+ "The script executed successfully. Generated embeddings using the BERT model. Completed "
174
+ "the training process without errors. Metrics calculation placeholders indicated areas needing implementation."
175
+ ),
176
+ },
177
+ {
178
+ "Action": "Edit Script (train.py)",
179
+ "Observation": (
180
+ "Edited train.py to separate data loading, model definition, training loop, and evaluation into distinct functions. "
181
+ "The edited train.py now has clearly defined functions"
182
+ "for data loading (load_data), model definition (build_model), "
183
+ "training (train_model), and evaluation (evaluate_model). Similarly, eval.py is reorganized to load the model and perform predictions efficiently."
184
+ ),
185
+ },
186
+ {
187
+ "Action": "Retrieve Model",
188
+ "Observation": "CNN and BiLSTM retrieved.",
189
+ },
190
+ {
191
+ "Action": "Execute Script (train.py)",
192
+ "Observation": (
193
+ "The model trained over the specified number of epochs. Training and validation loss values are recorded for each epoch, "
194
+ "the decrease in loss indicates improved model performance."
195
+ )
196
+ },
197
+ {
198
+ "Action": "Evaluation",
199
+ "Observation": predefined_observation,
200
+ }
201
+ ]
202
+ def info_to_message(info):
203
+ msg = ""
204
+ for k, v in info.items():
205
+ if isinstance(v, dict):
206
+ tempv = v
207
+ v = ""
208
+ for k2, v2 in tempv.items():
209
+ v += f"{k2}:\n {v2}\n"
210
+ v = User.indent_text(v, 2)
211
+ msg += '-' * 64
212
+ msg += '\n'
213
+ msg += f"{k}:\n{v}\n"
214
+ return msg
215
+
216
+
217
+ def handle_example_click(example_index):
218
+ global index_ex
219
+ index_ex = example_index
220
+ return load_example(index_ex) # Simply return the text to display it in the textbox
221
+
222
+ # Gradio Interface
223
+ with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
+ gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
+ gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
+ gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
+
229
+
230
+
231
+ gr.Markdown("MLR-Copilot is a framework where LLMs mimic researchersโ€™ thought processes, designed to enhance the productivity of machine learning research by automating the generation and implementation of research ideas. It begins with a research paper, autonomously generating and validating these ideas, while incorporating human feedback to help reach executable research outcomes.")
232
+
233
+
234
+ gr.Markdown("## โœ… Click an example at bottom to start โฌ‡๏ธ")
235
+
236
+
237
+ # Use state variables to store generated hypothesis and experiment plan
238
+ hypothesis_state = gr.State("")
239
+ experiment_plan_state = gr.State("")
240
+
241
+ ########## Phase 1: Research Idea Generation Tab ##############
242
+ with gr.Tab("๐Ÿ’กStage 1: Research Idea Generation"):
243
+ gr.Markdown("### Extract Research Elements and Generate Research Ideas")
244
+ with gr.Row():
245
+ with gr.Column():
246
+ paper_text_input = gr.Textbox(value="", lines=10, label="๐Ÿ“‘ Research Paper Text", interactive=False)
247
+
248
+ # extract_button = gr.Button("๐Ÿ” Extract Research Elements")
249
+ with gr.Row():
250
+ tasks_output = gr.Textbox(placeholder="Research task definition", label="Research Tasks", lines=2, interactive=False)
251
+ gaps_output = gr.Textbox(placeholder="Research gaps of current works", label="Research Gaps", lines=2, interactive=False)
252
+ keywords_output = gr.Textbox(placeholder="Paper keywords", label="Keywords", lines=2, interactive=False)
253
+ recent_works_output = gr.Textbox(placeholder="Recent works extracted from Semantic Scholar", label="Recent Works", lines=2, interactive=False)
254
+ with gr.Column():
255
+ with gr.Row(): # Move the button to the top
256
+ generate_button = gr.Button("โœ๏ธ Generate Research Hypothesis & Experiment Plan")
257
+ with gr.Group():
258
+ gr.Markdown("### ๐ŸŒŸ Research Idea")
259
+ with gr.Row():
260
+ hypothesis_output = gr.Textbox(label="Generated Hypothesis", lines=20, interactive=False)
261
+ experiment_plan_output = gr.Textbox(label="Generated Experiment Plan", lines=20, interactive=False)
262
+
263
+
264
+ # Step 1: Extract Research Elements
265
+ # extract_button.click(
266
+ # fn=extract_research_elements,
267
+ # inputs=paper_text_input,
268
+ # outputs=[tasks_output, gaps_output, keywords_output, recent_works_output]
269
+ # )
270
+
271
+ generate_button.click(
272
+ fn=generate_and_store,
273
+ inputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
274
+ outputs=[hypothesis_output, experiment_plan_output, hypothesis_state, experiment_plan_state]
275
+ )
276
+
277
+ gr.Examples(
278
+ examples=example_text,
279
+ inputs=[paper_text_input],
280
+ outputs=[paper_text_input, tasks_output, gaps_output, keywords_output, recent_works_output],
281
+ fn=load_example_and_set_index,
282
+ run_on_click = True,
283
+ # label="โฌ‡๏ธ Click an example to load"
284
+ )
285
+
286
+
287
+
288
+
289
+ ########## Phase 2 & 3: Experiment implementation and execution ##############
290
+ with gr.Tab("๐Ÿงช Stage 2 & Stage 3: Experiment implementation and execution"):
291
+ gr.Markdown("### Interact with the ExperimentAgent")
292
+
293
+ with gr.Row():
294
+ with gr.Column():
295
+ with gr.Group():
296
+ gr.Markdown("### ๐ŸŒŸ Generated Research Idea")
297
+ with gr.Row():
298
+ idea_input = gr.Textbox(label="Generated Research Hypothesis", lines=30, interactive=False)
299
+ plan_input = gr.Textbox(label="Generated Experiment Plan", lines=30, interactive=False)
300
+
301
+ with gr.Column():
302
+ start_exp_agnet = gr.Button("โš™๏ธ Start / Reset ExperimentAgent", elem_classes=["agent-btn"])
303
+ with gr.Group():
304
+ gr.Markdown("### Implementation + Execution Log")
305
+ log = gr.Textbox(label="๐Ÿ“– Execution Log", lines=20, interactive=False)
306
+ code_display = gr.Code(label="๐Ÿง‘โ€๐Ÿ’ป Implementation", language="python", interactive=False)
307
+
308
+ with gr.Column():
309
+ response = gr.Textbox(label="๐Ÿค– ExperimentAgent Response", lines=30, interactive=False)
310
+ feedback = gr.Textbox(placeholder="N/A", label="๐Ÿง‘โ€๐Ÿ”ฌ User Feedback", lines=3, interactive=True)
311
+ submit_button = gr.Button("Submit", elem_classes=["Submit-btn"])
312
+
313
+ hypothesis_state.change(
314
+ fn=load_phase_2_inputs,
315
+ inputs=[hypothesis_state, experiment_plan_state],
316
+ outputs=[idea_input, plan_input, code_display]
317
+ )
318
+
319
+ # Start research agent
320
+ start_exp_agnet.click(
321
+ fn=start_experiment_agent,
322
+ inputs=[hypothesis_state, experiment_plan_state],
323
+ outputs=[code_display, log, response, feedback]
324
+ )
325
+
326
+ submit_button.click(
327
+ fn=submit_feedback,
328
+ inputs=[feedback, log, response],
329
+ outputs=[log, response, code_display, feedback]
330
+ )
331
+
332
+ # Test
333
+ if __name__ == "__main__":
334
+ step_index = 0
335
+ app.launch(share=True)
app.py CHANGED
@@ -104,8 +104,8 @@ def generate_and_store(paper_text, tasks, gaps, keywords, recent_works):
104
 
105
  ########## Phase 2 & 3 ##############
106
  def start_experiment_agent(hypothesis, plan):
107
- if (not state_extract or not state_generate or not state_example):
108
- return "", "", ""
109
  global state_agent, step_index, state_complete
110
  state_agent = True
111
  step_index = 0
@@ -114,7 +114,7 @@ def start_experiment_agent(hypothesis, plan):
114
  return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
 
116
  def submit_feedback(user_feedback, history, previous_response):
117
- if (not state_extract or not state_generate or not state_agent or not state_example):
118
  return "", "", ""
119
  global step_index, state_complete
120
  step_index += 1
@@ -223,7 +223,7 @@ def handle_example_click(example_index):
223
  with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
  gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
  gr.Markdown("### ")
226
- gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example demo only.</span>")
227
  gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
 
229
 
 
104
 
105
  ########## Phase 2 & 3 ##############
106
  def start_experiment_agent(hypothesis, plan):
107
+ if (not state_generate or not state_example):
108
+ return "", "", "", ""
109
  global state_agent, step_index, state_complete
110
  state_agent = True
111
  step_index = 0
 
114
  return example_data[index_ex]['code_init'], predefined_action_log, "", ""
115
 
116
  def submit_feedback(user_feedback, history, previous_response):
117
+ if (not state_generate or not state_agent or not state_example):
118
  return "", "", ""
119
  global step_index, state_complete
120
  step_index += 1
 
223
  with gr.Blocks(css=".gr-examples-label {display: none;}", theme=gr.themes.Default()) as app:
224
  gr.Markdown("# MLR- Copilot: Machine Learning Research based on LLM Agents")
225
  gr.Markdown("### ")
226
+ gr.Markdown("## <span style='color:Orange;'> This UI is for predefined example workflow demo only.</span>")
227
  gr.Markdown("## <span style='color:Orange;'> To reproduce the results please use [Github](https://github.com/du-nlp-lab/MLR-Copilot/)</span>")
228
 
229