Spaces:
Sleeping
Sleeping
wuyiqunLu
commited on
feat: integrate vision agent v3 for logs and code (#51)
Browse fileswhat looks like for V2:
https://github.com/landing-ai/vision-agent/assets/132986242/95eb4605-0d37-402c-8401-b843bfd769ad
V3:
https://github.com/landing-ai/vision-agent/assets/132986242/4728641e-a9a7-49eb-81e7-2a5eff155224
- app/api/vision-agent/route.ts +65 -4
- components/chat/ChatMessage.tsx +76 -100
- lib/messageUtils.ts +34 -38
app/api/vision-agent/route.ts
CHANGED
@@ -57,7 +57,7 @@ export const POST = withLogging(
|
|
57 |
|
58 |
const fetchResponse = await fetch(
|
59 |
`https://api.dev.landing.ai/v1/agent/chat?agent_class=vision_agent&visualize_output=true&self_reflection=${enableSelfReflection}`,
|
60 |
-
// `http://localhost:5001/v1/agent/chat?agent_class=vision_agent&
|
61 |
{
|
62 |
method: 'POST',
|
63 |
headers: {
|
@@ -114,9 +114,70 @@ export const POST = withLogging(
|
|
114 |
const stream = fetchResponse.body.pipeThrough(
|
115 |
new TransformStream({
|
116 |
transform: async (chunk, controller) => {
|
117 |
-
const
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
},
|
121 |
}),
|
122 |
);
|
|
|
57 |
|
58 |
const fetchResponse = await fetch(
|
59 |
`https://api.dev.landing.ai/v1/agent/chat?agent_class=vision_agent&visualize_output=true&self_reflection=${enableSelfReflection}`,
|
60 |
+
// `http://localhost:5001/v1/agent/chat?agent_class=vision_agent&self_reflection=${enableSelfReflection}`,
|
61 |
{
|
62 |
method: 'POST',
|
63 |
headers: {
|
|
|
114 |
const stream = fetchResponse.body.pipeThrough(
|
115 |
new TransformStream({
|
116 |
transform: async (chunk, controller) => {
|
117 |
+
const data = decoder.decode(chunk);
|
118 |
+
data.split('\n').forEach(line => {
|
119 |
+
if (!line.trim()) {
|
120 |
+
return;
|
121 |
+
}
|
122 |
+
try {
|
123 |
+
const json = JSON.parse(line);
|
124 |
+
let message = (json.log ?? '') + '\n';
|
125 |
+
if (json.task || json.plan || json.reflection) {
|
126 |
+
const arr = json.plan
|
127 |
+
? json.plan
|
128 |
+
: json.task
|
129 |
+
? [json.task]
|
130 |
+
: [json.reflection];
|
131 |
+
const keys = Object.keys(arr[0]);
|
132 |
+
message += '\n';
|
133 |
+
message += '| ' + keys.join(' | ') + ' |' + '\n';
|
134 |
+
message +=
|
135 |
+
new Array(keys.length + 1).fill('|').join(' :- ') + '\n';
|
136 |
+
arr.forEach((obj: any) => {
|
137 |
+
message +=
|
138 |
+
'| ' +
|
139 |
+
keys.map(key => obj[key]).join(' | ') +
|
140 |
+
' |' +
|
141 |
+
'\n';
|
142 |
+
});
|
143 |
+
message += '\n';
|
144 |
+
}
|
145 |
+
if (json.tools) {
|
146 |
+
message += '\n';
|
147 |
+
message += '| ' + 'Descriptions' + ' |' + '\n';
|
148 |
+
message += '| ' + ':-' + ' |' + '\n';
|
149 |
+
json.tools.forEach((tool: string) => {
|
150 |
+
message += '| ' + tool + ' |' + '\n';
|
151 |
+
});
|
152 |
+
message += '\n';
|
153 |
+
}
|
154 |
+
if (json.code) {
|
155 |
+
message += `\`\`\`python\n${json.code}\n\`\`\`\n`;
|
156 |
+
}
|
157 |
+
if (json.result) {
|
158 |
+
message += `\`\`\`\n${json.result}\n\`\`\`\n`;
|
159 |
+
}
|
160 |
+
|
161 |
+
logger.info(
|
162 |
+
session,
|
163 |
+
{
|
164 |
+
message,
|
165 |
+
},
|
166 |
+
request,
|
167 |
+
'__AGENT_RESPONSE',
|
168 |
+
);
|
169 |
+
controller.enqueue(encoder.encode(message));
|
170 |
+
} catch (e) {
|
171 |
+
console.log(data);
|
172 |
+
logger.error(
|
173 |
+
session,
|
174 |
+
{ message: (e as Error).message, data },
|
175 |
+
request,
|
176 |
+
);
|
177 |
+
controller.error(e);
|
178 |
+
controller.terminate();
|
179 |
+
}
|
180 |
+
});
|
181 |
},
|
182 |
}),
|
183 |
);
|
components/chat/ChatMessage.tsx
CHANGED
@@ -48,119 +48,95 @@ export function ChatMessage({
|
|
48 |
{message.role === 'user' ? <IconUser /> : <IconOpenAI />}
|
49 |
</div>
|
50 |
<div className="flex-1 px-1 ml-4 space-y-2 overflow-hidden">
|
51 |
-
{logs &&
|
52 |
-
<
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
|
|
|
|
59 |
return (
|
60 |
-
<p className="
|
61 |
{children}
|
62 |
</p>
|
63 |
);
|
64 |
-
}
|
65 |
-
code({ children, className, ...props }) {
|
66 |
-
const match = /language-(\w+)/.exec(className || '');
|
67 |
-
return (
|
68 |
-
<CodeBlock
|
69 |
-
key={Math.random()}
|
70 |
-
language={(match && match[1]) || ''}
|
71 |
-
value={String(children).replace(/\n$/, '')}
|
72 |
-
{...props}
|
73 |
-
/>
|
74 |
-
);
|
75 |
-
},
|
76 |
-
}}
|
77 |
-
>
|
78 |
-
{logs}
|
79 |
-
</MemoizedReactMarkdown>
|
80 |
-
</div>
|
81 |
-
)}
|
82 |
-
<MemoizedReactMarkdown
|
83 |
-
className="break-words"
|
84 |
-
remarkPlugins={[remarkGfm, remarkMath]}
|
85 |
-
components={{
|
86 |
-
p({ children, ...props }) {
|
87 |
-
if (
|
88 |
-
props.node.children.some(
|
89 |
-
child => child.type === 'element' && child.tagName === 'img',
|
90 |
-
)
|
91 |
-
) {
|
92 |
-
return (
|
93 |
-
<p className="flex flex-wrap gap-2 items-start">{children}</p>
|
94 |
-
);
|
95 |
-
}
|
96 |
-
return (
|
97 |
-
<p className="my-2 last:mb-0 whitespace-pre-line">{children}</p>
|
98 |
-
);
|
99 |
-
},
|
100 |
-
img(props) {
|
101 |
-
if (props.src?.endsWith('.mp4')) {
|
102 |
return (
|
103 |
-
<
|
|
|
|
|
104 |
);
|
105 |
-
}
|
106 |
-
|
107 |
-
|
108 |
-
<TooltipTrigger asChild>
|
109 |
-
<Img
|
110 |
-
src={props.src ?? '/landing.png'}
|
111 |
-
alt={props.alt ?? 'answer-image'}
|
112 |
-
quality={100}
|
113 |
-
className="cursor-zoom-in"
|
114 |
-
sizes="(min-width: 66em) 25vw,
|
115 |
-
(min-width: 44em) 40vw,
|
116 |
-
100vw"
|
117 |
-
/>
|
118 |
-
</TooltipTrigger>
|
119 |
-
<TooltipContent>
|
120 |
-
<Img
|
121 |
-
className="m-2"
|
122 |
-
src={props.src ?? '/landing.png'}
|
123 |
-
alt={props.alt ?? 'answer-image'}
|
124 |
-
quality={100}
|
125 |
-
width={500}
|
126 |
-
/>
|
127 |
-
</TooltipContent>
|
128 |
-
</Tooltip>
|
129 |
-
);
|
130 |
-
},
|
131 |
-
code({ node, inline, className, children, ...props }) {
|
132 |
-
if (children.length) {
|
133 |
-
if (children[0] == 'β') {
|
134 |
return (
|
135 |
-
<
|
136 |
);
|
137 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
|
139 |
-
children[0] = (children[0] as string).replace('`β`', 'β');
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
-
const match = /language-(\w+)/.exec(className || '');
|
143 |
-
if (inline) {
|
144 |
return (
|
145 |
-
<
|
146 |
-
{
|
147 |
-
|
|
|
|
|
|
|
148 |
);
|
149 |
-
}
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
value={String(children).replace(/\n$/, '')}
|
156 |
-
{...props}
|
157 |
-
/>
|
158 |
-
);
|
159 |
-
},
|
160 |
-
}}
|
161 |
-
>
|
162 |
-
{content}
|
163 |
-
</MemoizedReactMarkdown>
|
164 |
{/* <ChatMessageActions message={message} /> */}
|
165 |
{isLoading && <Loading />}
|
166 |
</div>
|
|
|
48 |
{message.role === 'user' ? <IconUser /> : <IconOpenAI />}
|
49 |
</div>
|
50 |
<div className="flex-1 px-1 ml-4 space-y-2 overflow-hidden">
|
51 |
+
{logs && (
|
52 |
+
<MemoizedReactMarkdown
|
53 |
+
className="break-words"
|
54 |
+
remarkPlugins={[remarkGfm, remarkMath]}
|
55 |
+
components={{
|
56 |
+
p({ children, ...props }) {
|
57 |
+
if (
|
58 |
+
props.node.children.some(
|
59 |
+
child =>
|
60 |
+
child.type === 'element' && child.tagName === 'img',
|
61 |
+
)
|
62 |
+
) {
|
63 |
return (
|
64 |
+
<p className="flex flex-wrap gap-2 items-start">
|
65 |
{children}
|
66 |
</p>
|
67 |
);
|
68 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
return (
|
70 |
+
<p className="my-2 last:mb-0 whitespace-pre-line">
|
71 |
+
{children}
|
72 |
+
</p>
|
73 |
);
|
74 |
+
},
|
75 |
+
img(props) {
|
76 |
+
if (props.src?.endsWith('.mp4')) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
return (
|
78 |
+
<video src={props.src} controls width={500} height={500} />
|
79 |
);
|
80 |
}
|
81 |
+
return (
|
82 |
+
<Tooltip>
|
83 |
+
<TooltipTrigger asChild>
|
84 |
+
<Img
|
85 |
+
src={props.src ?? '/landing.png'}
|
86 |
+
alt={props.alt ?? 'answer-image'}
|
87 |
+
quality={100}
|
88 |
+
className="cursor-zoom-in"
|
89 |
+
sizes="(min-width: 66em) 25vw,
|
90 |
+
(min-width: 44em) 40vw,
|
91 |
+
100vw"
|
92 |
+
/>
|
93 |
+
</TooltipTrigger>
|
94 |
+
<TooltipContent>
|
95 |
+
<Img
|
96 |
+
className="m-2"
|
97 |
+
src={props.src ?? '/landing.png'}
|
98 |
+
alt={props.alt ?? 'answer-image'}
|
99 |
+
quality={100}
|
100 |
+
width={500}
|
101 |
+
/>
|
102 |
+
</TooltipContent>
|
103 |
+
</Tooltip>
|
104 |
+
);
|
105 |
+
},
|
106 |
+
code({ node, inline, className, children, ...props }) {
|
107 |
+
// if (children.length) {
|
108 |
+
// if (children[0] == 'β') {
|
109 |
+
// return (
|
110 |
+
// <span className="mt-1 cursor-default animate-pulse">β</span>
|
111 |
+
// );
|
112 |
+
// }
|
113 |
|
114 |
+
// children[0] = (children[0] as string).replace('`β`', 'β');
|
115 |
+
// }
|
116 |
+
|
117 |
+
const match = /language-(\w+)/.exec(className || '');
|
118 |
+
// if (inline) {
|
119 |
+
// return (
|
120 |
+
// <code className={className} {...props}>
|
121 |
+
// {children}
|
122 |
+
// </code>
|
123 |
+
// );
|
124 |
+
// }
|
125 |
|
|
|
|
|
126 |
return (
|
127 |
+
<CodeBlock
|
128 |
+
key={Math.random()}
|
129 |
+
language={(match && match[1]) || ''}
|
130 |
+
value={String(children).replace(/\n$/, '')}
|
131 |
+
{...props}
|
132 |
+
/>
|
133 |
);
|
134 |
+
},
|
135 |
+
}}
|
136 |
+
>
|
137 |
+
{logs}
|
138 |
+
</MemoizedReactMarkdown>
|
139 |
+
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
{/* <ChatMessageActions message={message} /> */}
|
141 |
{isLoading && <Loading />}
|
142 |
</div>
|
lib/messageUtils.ts
CHANGED
@@ -42,11 +42,6 @@ export const getCleanedUpMessages = ({
|
|
42 |
content,
|
43 |
role,
|
44 |
}: Pick<MessageBase, 'role' | 'content'>) => {
|
45 |
-
if (role === 'user') {
|
46 |
-
return {
|
47 |
-
content,
|
48 |
-
};
|
49 |
-
}
|
50 |
if (content.split(CLEANED_SEPARATOR).length === 2) {
|
51 |
return {
|
52 |
logs: content.split(CLEANED_SEPARATOR)[0],
|
@@ -54,38 +49,39 @@ export const getCleanedUpMessages = ({
|
|
54 |
};
|
55 |
}
|
56 |
const [logs = '', answer = ''] = content.split('<ANSWER>');
|
57 |
-
|
58 |
-
|
59 |
-
let
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
}
|
88 |
-
|
|
|
89 |
const [answerText, imagesStr = ''] = answer.split('<VIZ>');
|
90 |
const [imagesArrayStr, ...rest] = imagesStr.split('</VIZ>');
|
91 |
const images = imagesArrayStr
|
@@ -93,7 +89,7 @@ export const getCleanedUpMessages = ({
|
|
93 |
.map(str => str.replace('<IMG>', ''))
|
94 |
.slice(0, -1);
|
95 |
return {
|
96 |
-
logs:
|
97 |
content:
|
98 |
answerText.replace('</</ANSWER>', '').replace('</ANSWER>', '') +
|
99 |
'\n\n' +
|
|
|
42 |
content,
|
43 |
role,
|
44 |
}: Pick<MessageBase, 'role' | 'content'>) => {
|
|
|
|
|
|
|
|
|
|
|
45 |
if (content.split(CLEANED_SEPARATOR).length === 2) {
|
46 |
return {
|
47 |
logs: content.split(CLEANED_SEPARATOR)[0],
|
|
|
49 |
};
|
50 |
}
|
51 |
const [logs = '', answer = ''] = content.split('<ANSWER>');
|
52 |
+
// console.log(logs);
|
53 |
+
// const cleanedLogs = [];
|
54 |
+
// let left = 0;
|
55 |
+
// let right = 0;
|
56 |
+
// while (right < logs.length) {
|
57 |
+
// if (Object.keys(PAIRS).includes(content[right])) {
|
58 |
+
// cleanedLogs.push(content.substring(left, right));
|
59 |
+
// left = right++;
|
60 |
+
// while (
|
61 |
+
// right < content.length &&
|
62 |
+
// PAIRS[content[left]] !== content[right]
|
63 |
+
// ) {
|
64 |
+
// right++;
|
65 |
+
// }
|
66 |
+
// if (content[left] === MIDDLE_STARTER) {
|
67 |
+
// // add the text alignment so it can be shown as a table
|
68 |
+
// const separators = logs
|
69 |
+
// .substring(left, right)
|
70 |
+
// .split(MIDDLE_SEPARATOR).length;
|
71 |
+
// if (separators > 0) {
|
72 |
+
// cleanedLogs.push(
|
73 |
+
// Array(separators + 1)
|
74 |
+
// .fill('|')
|
75 |
+
// .join(' :- '),
|
76 |
+
// );
|
77 |
+
// }
|
78 |
+
// }
|
79 |
+
// left = ++right;
|
80 |
+
// } else {
|
81 |
+
// right++;
|
82 |
+
// }
|
83 |
+
// }
|
84 |
+
// cleanedLogs.push(content.substring(left, right));
|
85 |
const [answerText, imagesStr = ''] = answer.split('<VIZ>');
|
86 |
const [imagesArrayStr, ...rest] = imagesStr.split('</VIZ>');
|
87 |
const images = imagesArrayStr
|
|
|
89 |
.map(str => str.replace('<IMG>', ''))
|
90 |
.slice(0, -1);
|
91 |
return {
|
92 |
+
logs: logs,
|
93 |
content:
|
94 |
answerText.replace('</</ANSWER>', '').replace('</ANSWER>', '') +
|
95 |
'\n\n' +
|