Spaces:
Running
Running
adding task solver box n xmpl
Browse files
examples/LynxScribe Data Cleaning.lynxkite.json
ADDED
@@ -0,0 +1,345 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"edges": [
|
3 |
+
{
|
4 |
+
"id": "LynxScribe Task Solver 1 View DataFrame 1",
|
5 |
+
"source": "LynxScribe Task Solver 1",
|
6 |
+
"sourceHandle": "output",
|
7 |
+
"target": "View DataFrame 1",
|
8 |
+
"targetHandle": "input"
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"id": "Read Excel 1 LynxScribe Task Solver 1",
|
12 |
+
"source": "Read Excel 1",
|
13 |
+
"sourceHandle": "output",
|
14 |
+
"target": "LynxScribe Task Solver 1",
|
15 |
+
"targetHandle": "dataframe"
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"id": "LynxScribe Message 3 LynxScribe Task Solver 1",
|
19 |
+
"source": "LynxScribe Message 3",
|
20 |
+
"sourceHandle": "output",
|
21 |
+
"target": "LynxScribe Task Solver 1",
|
22 |
+
"targetHandle": "system_prompt"
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"id": "LynxScribe Message 1 LynxScribe Task Solver 1",
|
26 |
+
"source": "LynxScribe Message 1",
|
27 |
+
"sourceHandle": "output",
|
28 |
+
"target": "LynxScribe Task Solver 1",
|
29 |
+
"targetHandle": "instruction_prompt"
|
30 |
+
}
|
31 |
+
],
|
32 |
+
"env": "LynxScribe",
|
33 |
+
"nodes": [
|
34 |
+
{
|
35 |
+
"data": {
|
36 |
+
"__execution_delay": 0.0,
|
37 |
+
"collapsed": null,
|
38 |
+
"display": null,
|
39 |
+
"error": null,
|
40 |
+
"input_metadata": null,
|
41 |
+
"meta": {
|
42 |
+
"inputs": {
|
43 |
+
"dataframe": {
|
44 |
+
"name": "dataframe",
|
45 |
+
"position": "left",
|
46 |
+
"type": {
|
47 |
+
"type": "<class 'inspect._empty'>"
|
48 |
+
}
|
49 |
+
},
|
50 |
+
"instruction_prompt": {
|
51 |
+
"name": "instruction_prompt",
|
52 |
+
"position": "bottom",
|
53 |
+
"type": {
|
54 |
+
"type": "<class 'inspect._empty'>"
|
55 |
+
}
|
56 |
+
},
|
57 |
+
"system_prompt": {
|
58 |
+
"name": "system_prompt",
|
59 |
+
"position": "bottom",
|
60 |
+
"type": {
|
61 |
+
"type": "<class 'inspect._empty'>"
|
62 |
+
}
|
63 |
+
}
|
64 |
+
},
|
65 |
+
"name": "LynxScribe Task Solver",
|
66 |
+
"outputs": {
|
67 |
+
"output": {
|
68 |
+
"name": "output",
|
69 |
+
"position": "right",
|
70 |
+
"type": {
|
71 |
+
"type": "None"
|
72 |
+
}
|
73 |
+
}
|
74 |
+
},
|
75 |
+
"params": {
|
76 |
+
"llm_interface": {
|
77 |
+
"default": "openai",
|
78 |
+
"name": "llm_interface",
|
79 |
+
"type": {
|
80 |
+
"type": "<class 'str'>"
|
81 |
+
}
|
82 |
+
},
|
83 |
+
"llm_model_name": {
|
84 |
+
"default": "gpt-4o",
|
85 |
+
"name": "llm_model_name",
|
86 |
+
"type": {
|
87 |
+
"type": "<class 'str'>"
|
88 |
+
}
|
89 |
+
},
|
90 |
+
"new_column_names": {
|
91 |
+
"default": "processed_field",
|
92 |
+
"name": "new_column_names",
|
93 |
+
"type": {
|
94 |
+
"type": "<class 'str'>"
|
95 |
+
}
|
96 |
+
}
|
97 |
+
},
|
98 |
+
"type": "basic"
|
99 |
+
},
|
100 |
+
"params": {
|
101 |
+
"llm_interface": "openai",
|
102 |
+
"llm_model_name": "gpt-4o",
|
103 |
+
"new_column_names": "zip_code, country, state_or_county, city, district, street_name, house_number, floor, flat_number, additional_info"
|
104 |
+
},
|
105 |
+
"status": "done",
|
106 |
+
"title": "LynxScribe Task Solver"
|
107 |
+
},
|
108 |
+
"dragHandle": ".bg-primary",
|
109 |
+
"height": 292.0,
|
110 |
+
"id": "LynxScribe Task Solver 1",
|
111 |
+
"position": {
|
112 |
+
"x": 576.0,
|
113 |
+
"y": 209.0
|
114 |
+
},
|
115 |
+
"type": "basic",
|
116 |
+
"width": 432.0
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"data": {
|
120 |
+
"display": null,
|
121 |
+
"error": null,
|
122 |
+
"input_metadata": null,
|
123 |
+
"meta": {
|
124 |
+
"inputs": {
|
125 |
+
"input": {
|
126 |
+
"name": "input",
|
127 |
+
"position": "left",
|
128 |
+
"type": {
|
129 |
+
"type": "<class 'inspect._empty'>"
|
130 |
+
}
|
131 |
+
}
|
132 |
+
},
|
133 |
+
"name": "View DataFrame",
|
134 |
+
"outputs": {},
|
135 |
+
"params": {},
|
136 |
+
"position": {
|
137 |
+
"x": 2162.0,
|
138 |
+
"y": 266.0
|
139 |
+
},
|
140 |
+
"type": "table_view"
|
141 |
+
},
|
142 |
+
"params": {},
|
143 |
+
"status": "done",
|
144 |
+
"title": "View DataFrame"
|
145 |
+
},
|
146 |
+
"dragHandle": ".bg-primary",
|
147 |
+
"height": 309.0,
|
148 |
+
"id": "View DataFrame 1",
|
149 |
+
"position": {
|
150 |
+
"x": 1350.0,
|
151 |
+
"y": 210.0
|
152 |
+
},
|
153 |
+
"type": "table_view",
|
154 |
+
"width": 662.0
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"data": {
|
158 |
+
"__execution_delay": 0.0,
|
159 |
+
"collapsed": null,
|
160 |
+
"display": null,
|
161 |
+
"error": null,
|
162 |
+
"input_metadata": null,
|
163 |
+
"meta": {
|
164 |
+
"inputs": {},
|
165 |
+
"name": "Read Excel",
|
166 |
+
"outputs": {
|
167 |
+
"output": {
|
168 |
+
"name": "output",
|
169 |
+
"position": "right",
|
170 |
+
"type": {
|
171 |
+
"type": "None"
|
172 |
+
}
|
173 |
+
}
|
174 |
+
},
|
175 |
+
"params": {
|
176 |
+
"columns": {
|
177 |
+
"default": "",
|
178 |
+
"name": "columns",
|
179 |
+
"type": {
|
180 |
+
"type": "<class 'str'>"
|
181 |
+
}
|
182 |
+
},
|
183 |
+
"file_path": {
|
184 |
+
"default": null,
|
185 |
+
"name": "file_path",
|
186 |
+
"type": {
|
187 |
+
"type": "<class 'str'>"
|
188 |
+
}
|
189 |
+
},
|
190 |
+
"sheet_name": {
|
191 |
+
"default": "Sheet1",
|
192 |
+
"name": "sheet_name",
|
193 |
+
"type": {
|
194 |
+
"type": "<class 'str'>"
|
195 |
+
}
|
196 |
+
}
|
197 |
+
},
|
198 |
+
"position": {
|
199 |
+
"x": 429.0,
|
200 |
+
"y": 234.0
|
201 |
+
},
|
202 |
+
"type": "basic"
|
203 |
+
},
|
204 |
+
"params": {
|
205 |
+
"columns": "",
|
206 |
+
"file_path": "uploads/task_solver_examples.xlsx",
|
207 |
+
"sheet_name": "address_example"
|
208 |
+
},
|
209 |
+
"status": "done",
|
210 |
+
"title": "Read Excel"
|
211 |
+
},
|
212 |
+
"dragHandle": ".bg-primary",
|
213 |
+
"height": 296.0,
|
214 |
+
"id": "Read Excel 1",
|
215 |
+
"position": {
|
216 |
+
"x": 11.0,
|
217 |
+
"y": 207.0
|
218 |
+
},
|
219 |
+
"type": "basic",
|
220 |
+
"width": 400.0
|
221 |
+
},
|
222 |
+
{
|
223 |
+
"data": {
|
224 |
+
"__execution_delay": 0.0,
|
225 |
+
"collapsed": null,
|
226 |
+
"display": null,
|
227 |
+
"error": null,
|
228 |
+
"input_metadata": null,
|
229 |
+
"meta": {
|
230 |
+
"inputs": {},
|
231 |
+
"name": "LynxScribe Message",
|
232 |
+
"outputs": {
|
233 |
+
"output": {
|
234 |
+
"name": "output",
|
235 |
+
"position": "top",
|
236 |
+
"type": {
|
237 |
+
"type": "None"
|
238 |
+
}
|
239 |
+
}
|
240 |
+
},
|
241 |
+
"params": {
|
242 |
+
"prompt_content": {
|
243 |
+
"default": null,
|
244 |
+
"name": "prompt_content",
|
245 |
+
"type": {
|
246 |
+
"format": "textarea"
|
247 |
+
}
|
248 |
+
},
|
249 |
+
"prompt_role": {
|
250 |
+
"default": null,
|
251 |
+
"name": "prompt_role",
|
252 |
+
"type": {
|
253 |
+
"enum": [
|
254 |
+
"SYSTEM",
|
255 |
+
"USER"
|
256 |
+
]
|
257 |
+
}
|
258 |
+
}
|
259 |
+
},
|
260 |
+
"position": {
|
261 |
+
"x": 653.0,
|
262 |
+
"y": 954.0
|
263 |
+
},
|
264 |
+
"type": "basic"
|
265 |
+
},
|
266 |
+
"params": {
|
267 |
+
"prompt_content": "You are an AI assistant designed to clean and extract structured address information from raw text.\nYour goal is to identify and extract relevant address components while ignoring any unrelated information.\nThe output must be formatted as a structured dictionary.\n\nYour task is to parse an address from raw text and return a dictionary with the following keys:\n - zip_code: The postal or ZIP code.\n - country: The country name.\n - state_or_county: The state, province, or county (if applicable).\n - city: The city or town name.\n - district: The district or borough name (if mentioned).\n - street_type: The type of public space (e.g., street, avenue, boulevard, square).\n - street_name: The name of the public space (e.g., Main, Baker, Champs-\u00c9lys\u00e9es).\n - house_number: The house or building number.\n - floor: The floor number (if mentioned).\n - flat_number: The apartment or unit number (if mentioned).\n - additional_info: Any other useful details, such as building names, or known landmarks.\n\nIf any information is missing from the input, leave the corresponding key as an empty string.\n\nYou must return only a python dictionary with the following keys:\n`zip_code`, `country`, `state_or_county`, `city`, `district`, `street_name`, \n`house_number`, `floor`, `flat_number`, `additional_info`.\n\nDo not include any extra text, comments, or explanations\u2014only return the dictionary.",
|
268 |
+
"prompt_role": null
|
269 |
+
},
|
270 |
+
"status": "done",
|
271 |
+
"title": "LynxScribe Message"
|
272 |
+
},
|
273 |
+
"dragHandle": ".bg-primary",
|
274 |
+
"height": 354.0,
|
275 |
+
"id": "LynxScribe Message 3",
|
276 |
+
"position": {
|
277 |
+
"x": 36.0,
|
278 |
+
"y": 561.0
|
279 |
+
},
|
280 |
+
"type": "basic",
|
281 |
+
"width": 740.0
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"data": {
|
285 |
+
"__execution_delay": 0.0,
|
286 |
+
"collapsed": null,
|
287 |
+
"display": null,
|
288 |
+
"error": null,
|
289 |
+
"input_metadata": null,
|
290 |
+
"meta": {
|
291 |
+
"inputs": {},
|
292 |
+
"name": "LynxScribe Message",
|
293 |
+
"outputs": {
|
294 |
+
"output": {
|
295 |
+
"name": "output",
|
296 |
+
"position": "top",
|
297 |
+
"type": {
|
298 |
+
"type": "None"
|
299 |
+
}
|
300 |
+
}
|
301 |
+
},
|
302 |
+
"params": {
|
303 |
+
"prompt_content": {
|
304 |
+
"default": null,
|
305 |
+
"name": "prompt_content",
|
306 |
+
"type": {
|
307 |
+
"format": "textarea"
|
308 |
+
}
|
309 |
+
},
|
310 |
+
"prompt_role": {
|
311 |
+
"default": null,
|
312 |
+
"name": "prompt_role",
|
313 |
+
"type": {
|
314 |
+
"enum": [
|
315 |
+
"SYSTEM",
|
316 |
+
"USER"
|
317 |
+
]
|
318 |
+
}
|
319 |
+
}
|
320 |
+
},
|
321 |
+
"position": {
|
322 |
+
"x": 1498.0,
|
323 |
+
"y": 660.0
|
324 |
+
},
|
325 |
+
"type": "basic"
|
326 |
+
},
|
327 |
+
"params": {
|
328 |
+
"prompt_content": "Extract structured address information from the following text: {message_parts}",
|
329 |
+
"prompt_role": "USER"
|
330 |
+
},
|
331 |
+
"status": "done",
|
332 |
+
"title": "LynxScribe Message"
|
333 |
+
},
|
334 |
+
"dragHandle": ".bg-primary",
|
335 |
+
"height": 347.0,
|
336 |
+
"id": "LynxScribe Message 1",
|
337 |
+
"position": {
|
338 |
+
"x": 817.0,
|
339 |
+
"y": 566.0
|
340 |
+
},
|
341 |
+
"type": "basic",
|
342 |
+
"width": 498.0
|
343 |
+
}
|
344 |
+
]
|
345 |
+
}
|
examples/LynxScribe Image Search.lynxkite.json
CHANGED
@@ -292,14 +292,10 @@
|
|
292 |
}
|
293 |
}
|
294 |
},
|
295 |
-
"position": {
|
296 |
-
"x": 1260.0,
|
297 |
-
"y": 166.0
|
298 |
-
},
|
299 |
"type": "basic"
|
300 |
},
|
301 |
"params": {
|
302 |
-
"chat": "Show me a picture about
|
303 |
},
|
304 |
"status": "done",
|
305 |
"title": "Input chat"
|
@@ -355,10 +351,6 @@
|
|
355 |
}
|
356 |
}
|
357 |
},
|
358 |
-
"position": {
|
359 |
-
"x": 1987.0,
|
360 |
-
"y": 365.0
|
361 |
-
},
|
362 |
"type": "basic"
|
363 |
},
|
364 |
"params": {
|
@@ -379,7 +371,7 @@
|
|
379 |
},
|
380 |
{
|
381 |
"data": {
|
382 |
-
"display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/
|
383 |
"error": null,
|
384 |
"input_metadata": null,
|
385 |
"meta": {
|
@@ -395,10 +387,6 @@
|
|
395 |
"name": "LynxScribe Image Result Viewer",
|
396 |
"outputs": {},
|
397 |
"params": {},
|
398 |
-
"position": {
|
399 |
-
"x": 2326.0,
|
400 |
-
"y": 319.0
|
401 |
-
},
|
402 |
"type": "image"
|
403 |
},
|
404 |
"params": {},
|
|
|
292 |
}
|
293 |
}
|
294 |
},
|
|
|
|
|
|
|
|
|
295 |
"type": "basic"
|
296 |
},
|
297 |
"params": {
|
298 |
+
"chat": "Show me a picture about fruits"
|
299 |
},
|
300 |
"status": "done",
|
301 |
"title": "Input chat"
|
|
|
351 |
}
|
352 |
}
|
353 |
},
|
|
|
|
|
|
|
|
|
354 |
"type": "basic"
|
355 |
},
|
356 |
"params": {
|
|
|
371 |
},
|
372 |
{
|
373 |
"data": {
|
374 |
+
"display": "https://storage.googleapis.com/lynxkite_public_data/lynxscribe-images/image-rag-test/food-405521_1280.jpg",
|
375 |
"error": null,
|
376 |
"input_metadata": null,
|
377 |
"meta": {
|
|
|
387 |
"name": "LynxScribe Image Result Viewer",
|
388 |
"outputs": {},
|
389 |
"params": {},
|
|
|
|
|
|
|
|
|
390 |
"type": "image"
|
391 |
},
|
392 |
"params": {},
|
examples/uploads/task_solver_examples.xlsx
ADDED
Binary file (11.3 kB). View file
|
|
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py
CHANGED
@@ -28,7 +28,7 @@ from lynxscribe.components.chat.processors import (
|
|
28 |
TruncateHistory,
|
29 |
)
|
30 |
from lynxscribe.components.chat.api import ChatAPI
|
31 |
-
from lynxscribe.core.models.prompts import ChatCompletionPrompt
|
32 |
from lynxscribe.components.rag.loaders import FAQTemplateLoader
|
33 |
|
34 |
from lynxkite.core import ops
|
@@ -56,6 +56,11 @@ class RAGVersion(Enum):
|
|
56 |
V2 = "v2"
|
57 |
|
58 |
|
|
|
|
|
|
|
|
|
|
|
59 |
class RAGTemplate(BaseModel):
|
60 |
"""
|
61 |
Model for RAG templates consisting of three tables: they are connected via scenario names.
|
@@ -672,6 +677,113 @@ def chat_processor(processor, *, _ctx: one_by_one.Context):
|
|
672 |
return {"chat_processor": chat_processor, **cfg}
|
673 |
|
674 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
675 |
@output_on_top
|
676 |
@op("Truncate history")
|
677 |
def truncate_history(*, max_tokens=10000):
|
@@ -718,6 +830,28 @@ def input_chat(*, chat: str):
|
|
718 |
return {"text": chat}
|
719 |
|
720 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
721 |
@op("View", view="table_view")
|
722 |
def view(input):
|
723 |
columns = [str(c) for c in input.keys() if not str(c).startswith("_")]
|
|
|
28 |
TruncateHistory,
|
29 |
)
|
30 |
from lynxscribe.components.chat.api import ChatAPI
|
31 |
+
from lynxscribe.core.models.prompts import ChatCompletionPrompt, Message
|
32 |
from lynxscribe.components.rag.loaders import FAQTemplateLoader
|
33 |
|
34 |
from lynxkite.core import ops
|
|
|
56 |
V2 = "v2"
|
57 |
|
58 |
|
59 |
+
class MessageRole(Enum):
|
60 |
+
SYSTEM = "system"
|
61 |
+
USER = "user"
|
62 |
+
|
63 |
+
|
64 |
class RAGTemplate(BaseModel):
|
65 |
"""
|
66 |
Model for RAG templates consisting of three tables: they are connected via scenario names.
|
|
|
677 |
return {"chat_processor": chat_processor, **cfg}
|
678 |
|
679 |
|
680 |
+
@output_on_top
|
681 |
+
@op("LynxScribe Message")
|
682 |
+
def lynxscribe_message(*, prompt_role: MessageRole, prompt_content: ops.LongStr):
|
683 |
+
return_message = Message(role=prompt_role.value, content=prompt_content.strip())
|
684 |
+
return {"prompt_message": return_message}
|
685 |
+
|
686 |
+
|
687 |
+
@op("Read Excel")
|
688 |
+
def read_excel(*, file_path: str, sheet_name: str = "Sheet1", columns: str = ""):
|
689 |
+
"""
|
690 |
+
Reads an Excel file and returns the content of the specified sheet.
|
691 |
+
The columns parameter can be used to specify which columns to include in the output.
|
692 |
+
If not specified, all columns will be included (separate the values by comma).
|
693 |
+
|
694 |
+
TODO: more general: several input/output versions.
|
695 |
+
"""
|
696 |
+
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
697 |
+
if columns:
|
698 |
+
columns = [c.strip() for c in columns.split(",") if c.strip()]
|
699 |
+
columns = [c for c in columns if c in df.columns]
|
700 |
+
if len(columns) == 0:
|
701 |
+
raise ValueError("No valid columns specified.")
|
702 |
+
df = df[columns].copy()
|
703 |
+
return df # {"dataframe": df}
|
704 |
+
|
705 |
+
|
706 |
+
@ops.input_position(system_prompt="bottom", instruction_prompt="bottom", df="left")
|
707 |
+
@op("LynxScribe Task Solver")
|
708 |
+
@mem.cache
|
709 |
+
async def ls_task_solver(
|
710 |
+
system_prompt,
|
711 |
+
instruction_prompt,
|
712 |
+
df,
|
713 |
+
*,
|
714 |
+
llm_interface: str = "openai",
|
715 |
+
llm_model_name: str = "gpt-4o",
|
716 |
+
new_column_names: str = "processed_field",
|
717 |
+
# api_key_name: str = "OPENAI_API_KEY",
|
718 |
+
):
|
719 |
+
"""
|
720 |
+
Solving the described task on a data frame and put the results into a new column.
|
721 |
+
|
722 |
+
If there are multiple new_column_names provided, the structured dictionary output
|
723 |
+
will be split into multiple columns.
|
724 |
+
"""
|
725 |
+
|
726 |
+
# handling inputs
|
727 |
+
system_message = system_prompt[0]["prompt_message"]
|
728 |
+
instruction_message = instruction_prompt[0]["prompt_message"]
|
729 |
+
|
730 |
+
# preparing output
|
731 |
+
out_df = df.copy()
|
732 |
+
|
733 |
+
# connecting to the LLM
|
734 |
+
llm_params = {"name": llm_interface}
|
735 |
+
# if api_key_name:
|
736 |
+
# llm_params["api_key"] = os.getenv(api_key_name)
|
737 |
+
llm = get_llm_engine(**llm_params)
|
738 |
+
|
739 |
+
# getting the list of fieldnames used in the instruction message
|
740 |
+
fieldnames = []
|
741 |
+
for pot_fieldname in df.columns:
|
742 |
+
if "{" + pot_fieldname + "}" in instruction_message.content:
|
743 |
+
fieldnames.append(pot_fieldname)
|
744 |
+
|
745 |
+
# generate a list of instruction messages (from fieldnames)
|
746 |
+
# each row of the df is a separate instruction message
|
747 |
+
# TODO: make it fast for large dataframes
|
748 |
+
instruction_messages = []
|
749 |
+
for i in range(len(df)):
|
750 |
+
instruction_message_i = deepcopy(instruction_message)
|
751 |
+
for fieldname in fieldnames:
|
752 |
+
instruction_message_i.content = instruction_message_i.content.replace(
|
753 |
+
"{" + fieldname + "}", str(df.iloc[i][fieldname])
|
754 |
+
)
|
755 |
+
instruction_messages.append(instruction_message_i)
|
756 |
+
|
757 |
+
# generate completition prompt
|
758 |
+
completion_prompts = [
|
759 |
+
ChatCompletionPrompt(
|
760 |
+
model=llm_model_name,
|
761 |
+
messages=[system_message, instruction_message_j],
|
762 |
+
)
|
763 |
+
for instruction_message_j in instruction_messages
|
764 |
+
]
|
765 |
+
|
766 |
+
# get the answers
|
767 |
+
tasks = [llm.acreate_completion(completion_prompt=_prompt) for _prompt in completion_prompts]
|
768 |
+
out_completions = await asyncio.gather(*tasks)
|
769 |
+
|
770 |
+
# answer post-processing: 1 vs more columns
|
771 |
+
col_list = [_c.strip() for _c in new_column_names.split(",") if _c.strip()]
|
772 |
+
if len(col_list) == 0:
|
773 |
+
raise ValueError("No valid column names specified.")
|
774 |
+
elif len(col_list) == 1:
|
775 |
+
out_df[col_list[0]] = [result.choices[0].message.content for result in out_completions]
|
776 |
+
else:
|
777 |
+
answers = [
|
778 |
+
dictionary_corrector(result.choices[0].message.content, expected_keys=col_list)
|
779 |
+
for result in out_completions
|
780 |
+
]
|
781 |
+
for i, col in enumerate(col_list):
|
782 |
+
out_df[col] = [answer[col] for answer in answers]
|
783 |
+
|
784 |
+
return out_df # {"dataframe": out_df}
|
785 |
+
|
786 |
+
|
787 |
@output_on_top
|
788 |
@op("Truncate history")
|
789 |
def truncate_history(*, max_tokens=10000):
|
|
|
830 |
return {"text": chat}
|
831 |
|
832 |
|
833 |
+
@op("View DataFrame", view="table_view")
|
834 |
+
def view_df(input, *, _ctx: one_by_one.Context):
|
835 |
+
"""
|
836 |
+
TODO: This part is not working
|
837 |
+
"""
|
838 |
+
v = _ctx.last_result
|
839 |
+
if v:
|
840 |
+
columns = v["dataframes"]["df"]["columns"]
|
841 |
+
v["dataframes"]["df"]["data"].append([input[c] for c in columns])
|
842 |
+
else:
|
843 |
+
columns = [str(c) for c in input.keys() if not str(c).startswith("_")]
|
844 |
+
v = {
|
845 |
+
"dataframes": {
|
846 |
+
"df": {
|
847 |
+
"columns": columns,
|
848 |
+
"data": [[input[c] for c in columns]],
|
849 |
+
}
|
850 |
+
}
|
851 |
+
}
|
852 |
+
return v
|
853 |
+
|
854 |
+
|
855 |
@op("View", view="table_view")
|
856 |
def view(input):
|
857 |
columns = [str(c) for c in input.keys() if not str(c).startswith("_")]
|