zhengr commited on
Commit
0bcc252
·
1 Parent(s): cd76fd8
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +45 -0
  2. LICENSE +193 -0
  3. README2.md +328 -0
  4. config.json +63 -0
  5. docker-compose.yml +14 -0
  6. jest.config.js +6 -0
  7. jest.setup.js +1 -0
  8. jina-ai/.dockerignore +1 -0
  9. jina-ai/Dockerfile +50 -0
  10. jina-ai/config.json +69 -0
  11. jina-ai/package-lock.json +0 -0
  12. jina-ai/package.json +39 -0
  13. jina-ai/src/dto/jina-embeddings-auth.ts +347 -0
  14. jina-ai/src/lib/async-context.ts +9 -0
  15. jina-ai/src/lib/billing.ts +102 -0
  16. jina-ai/src/lib/env-config.ts +59 -0
  17. jina-ai/src/lib/errors.ts +70 -0
  18. jina-ai/src/lib/firestore.ts +223 -0
  19. jina-ai/src/lib/logger.ts +56 -0
  20. jina-ai/src/lib/registry.ts +4 -0
  21. jina-ai/src/patch-express.ts +162 -0
  22. jina-ai/src/rate-limit.ts +278 -0
  23. jina-ai/src/server.ts +56 -0
  24. jina-ai/tsconfig.json +17 -0
  25. package-lock.json +0 -0
  26. package.json +63 -0
  27. src/__tests__/agent.test.ts +51 -0
  28. src/__tests__/docker.test.ts +41 -0
  29. src/__tests__/server.test.ts +300 -0
  30. src/agent.ts +774 -0
  31. src/app.ts +646 -0
  32. src/cli.ts +48 -0
  33. src/config.ts +158 -0
  34. src/evals/batch-evals.ts +209 -0
  35. src/evals/ego-questions.json +82 -0
  36. src/server.ts +15 -0
  37. src/tools/__tests__/error-analyzer.test.ts +31 -0
  38. src/tools/__tests__/evaluator.test.ts +62 -0
  39. src/tools/__tests__/read.test.ts +21 -0
  40. src/tools/__tests__/search.test.ts +24 -0
  41. src/tools/brave-search.ts +22 -0
  42. src/tools/dedup.ts +89 -0
  43. src/tools/error-analyzer.ts +134 -0
  44. src/tools/evaluator.ts +553 -0
  45. src/tools/grounding.ts +38 -0
  46. src/tools/jina-dedup.ts +182 -0
  47. src/tools/jina-search.ts +88 -0
  48. src/tools/query-rewriter.ts +112 -0
  49. src/tools/read.ts +102 -0
  50. src/types.ts +230 -0
Dockerfile ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- BUILD STAGE ----
2
+ FROM node:20-slim AS builder
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy package.json and package-lock.json
8
+ COPY package*.json ./
9
+
10
+ # Install dependencies
11
+ RUN npm install --ignore-scripts
12
+
13
+ # Copy application code
14
+ COPY . .
15
+
16
+ # Build the application
17
+ RUN npm run build --ignore-scripts
18
+
19
+ # ---- PRODUCTION STAGE ----
20
+ FROM node:20-slim AS production
21
+
22
+ # Set working directory
23
+ WORKDIR /app
24
+
25
+ # Copy package.json and package-lock.json
26
+ COPY package*.json ./
27
+
28
+ # Install production dependencies only
29
+ RUN npm install --production --ignore-scripts
30
+
31
+ # Copy config.json and built files from builder
32
+ COPY --from=builder /app/config.json ./
33
+ COPY --from=builder /app/dist ./dist
34
+
35
+ # Set environment variables (Recommended to set at runtime, avoid hardcoding)
36
+ ENV GEMINI_API_KEY=${GEMINI_API_KEY}
37
+ ENV OPENAI_API_KEY=${OPENAI_API_KEY}
38
+ ENV JINA_API_KEY=${JINA_API_KEY}
39
+ ENV BRAVE_API_KEY=${BRAVE_API_KEY}
40
+
41
+ # Expose the port the app runs on
42
+ EXPOSE 3000
43
+
44
+ # Set startup command
45
+ CMD ["node", "./dist/server.js"]
LICENSE ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright 2020-2025 Jina AI Limited. All rights reserved.
2
+
3
+
4
+ Apache License
5
+ Version 2.0, January 2004
6
+ http://www.apache.org/licenses/
7
+
8
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
9
+
10
+ 1. Definitions.
11
+
12
+ "License" shall mean the terms and conditions for use, reproduction,
13
+ and distribution as defined by Sections 1 through 9 of this document.
14
+
15
+ "Licensor" shall mean the copyright owner or entity authorized by
16
+ the copyright owner that is granting the License.
17
+
18
+ "Legal Entity" shall mean the union of the acting entity and all
19
+ other entities that control, are controlled by, or are under common
20
+ control with that entity. For the purposes of this definition,
21
+ "control" means (i) the power, direct or indirect, to cause the
22
+ direction or management of such entity, whether by contract or
23
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
24
+ outstanding shares, or (iii) beneficial ownership of such entity.
25
+
26
+ "You" (or "Your") shall mean an individual or Legal Entity
27
+ exercising permissions granted by this License.
28
+
29
+ "Source" form shall mean the preferred form for making modifications,
30
+ including but not limited to software source code, documentation
31
+ source, and configuration files.
32
+
33
+ "Object" form shall mean any form resulting from mechanical
34
+ transformation or translation of a Source form, including but
35
+ not limited to compiled object code, generated documentation,
36
+ and conversions to other media types.
37
+
38
+ "Work" shall mean the work of authorship, whether in Source or
39
+ Object form, made available under the License, as indicated by a
40
+ copyright notice that is included in or attached to the work
41
+ (an example is provided in the Appendix below).
42
+
43
+ "Derivative Works" shall mean any work, whether in Source or Object
44
+ form, that is based on (or derived from) the Work and for which the
45
+ editorial revisions, annotations, elaborations, or other modifications
46
+ represent, as a whole, an original work of authorship. For the purposes
47
+ of this License, Derivative Works shall not include works that remain
48
+ separable from, or merely link (or bind by name) to the interfaces of,
49
+ the Work and Derivative Works thereof.
50
+
51
+ "Contribution" shall mean any work of authorship, including
52
+ the original version of the Work and any modifications or additions
53
+ to that Work or Derivative Works thereof, that is intentionally
54
+ submitted to Licensor for inclusion in the Work by the copyright owner
55
+ or by an individual or Legal Entity authorized to submit on behalf of
56
+ the copyright owner. For the purposes of this definition, "submitted"
57
+ means any form of electronic, verbal, or written communication sent
58
+ to the Licensor or its representatives, including but not limited to
59
+ communication on electronic mailing lists, source code control systems,
60
+ and issue tracking systems that are managed by, or on behalf of, the
61
+ Licensor for the purpose of discussing and improving the Work, but
62
+ excluding communication that is conspicuously marked or otherwise
63
+ designated in writing by the copyright owner as "Not a Contribution."
64
+
65
+ "Contributor" shall mean Licensor and any individual or Legal Entity
66
+ on behalf of whom a Contribution has been received by Licensor and
67
+ subsequently incorporated within the Work.
68
+
69
+ 2. Grant of Copyright License. Subject to the terms and conditions of
70
+ this License, each Contributor hereby grants to You a perpetual,
71
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
72
+ copyright license to reproduce, prepare Derivative Works of,
73
+ publicly display, publicly perform, sublicense, and distribute the
74
+ Work and such Derivative Works in Source or Object form.
75
+
76
+ 3. Grant of Patent License. Subject to the terms and conditions of
77
+ this License, each Contributor hereby grants to You a perpetual,
78
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
79
+ (except as stated in this section) patent license to make, have made,
80
+ use, offer to sell, sell, import, and otherwise transfer the Work,
81
+ where such license applies only to those patent claims licensable
82
+ by such Contributor that are necessarily infringed by their
83
+ Contribution(s) alone or by combination of their Contribution(s)
84
+ with the Work to which such Contribution(s) was submitted. If You
85
+ institute patent litigation against any entity (including a
86
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
87
+ or a Contribution incorporated within the Work constitutes direct
88
+ or contributory patent infringement, then any patent licenses
89
+ granted to You under this License for that Work shall terminate
90
+ as of the date such litigation is filed.
91
+
92
+ 4. Redistribution. You may reproduce and distribute copies of the
93
+ Work or Derivative Works thereof in any medium, with or without
94
+ modifications, and in Source or Object form, provided that You
95
+ meet the following conditions:
96
+
97
+ (a) You must give any other recipients of the Work or
98
+ Derivative Works a copy of this License; and
99
+
100
+ (b) You must cause any modified files to carry prominent notices
101
+ stating that You changed the files; and
102
+
103
+ (c) You must retain, in the Source form of any Derivative Works
104
+ that You distribute, all copyright, patent, trademark, and
105
+ attribution notices from the Source form of the Work,
106
+ excluding those notices that do not pertain to any part of
107
+ the Derivative Works; and
108
+
109
+ (d) If the Work includes a "NOTICE" text file as part of its
110
+ distribution, then any Derivative Works that You distribute must
111
+ include a readable copy of the attribution notices contained
112
+ within such NOTICE file, excluding those notices that do not
113
+ pertain to any part of the Derivative Works, in at least one
114
+ of the following places: within a NOTICE text file distributed
115
+ as part of the Derivative Works; within the Source form or
116
+ documentation, if provided along with the Derivative Works; or,
117
+ within a display generated by the Derivative Works, if and
118
+ wherever such third-party notices normally appear. The contents
119
+ of the NOTICE file are for informational purposes only and
120
+ do not modify the License. You may add Your own attribution
121
+ notices within Derivative Works that You distribute, alongside
122
+ or as an addendum to the NOTICE text from the Work, provided
123
+ that such additional attribution notices cannot be construed
124
+ as modifying the License.
125
+
126
+ You may add Your own copyright statement to Your modifications and
127
+ may provide additional or different license terms and conditions
128
+ for use, reproduction, or distribution of Your modifications, or
129
+ for any such Derivative Works as a whole, provided Your use,
130
+ reproduction, and distribution of the Work otherwise complies with
131
+ the conditions stated in this License.
132
+
133
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
134
+ any Contribution intentionally submitted for inclusion in the Work
135
+ by You to the Licensor shall be under the terms and conditions of
136
+ this License, without any additional terms or conditions.
137
+ Notwithstanding the above, nothing herein shall supersede or modify
138
+ the terms of any separate license agreement you may have executed
139
+ with Licensor regarding such Contributions.
140
+
141
+ 6. Trademarks. This License does not grant permission to use the trade
142
+ names, trademarks, service marks, or product names of the Licensor,
143
+ except as required for reasonable and customary use in describing the
144
+ origin of the Work and reproducing the content of the NOTICE file.
145
+
146
+ 7. Disclaimer of Warranty. Unless required by applicable law or
147
+ agreed to in writing, Licensor provides the Work (and each
148
+ Contributor provides its Contributions) on an "AS IS" BASIS,
149
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
150
+ implied, including, without limitation, any warranties or conditions
151
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
152
+ PARTICULAR PURPOSE. You are solely responsible for determining the
153
+ appropriateness of using or redistributing the Work and assume any
154
+ risks associated with Your exercise of permissions under this License.
155
+
156
+ 8. Limitation of Liability. In no event and under no legal theory,
157
+ whether in tort (including negligence), contract, or otherwise,
158
+ unless required by applicable law (such as deliberate and grossly
159
+ negligent acts) or agreed to in writing, shall any Contributor be
160
+ liable to You for damages, including any direct, indirect, special,
161
+ incidental, or consequential damages of any character arising as a
162
+ result of this License or out of the use or inability to use the
163
+ Work (including but not limited to damages for loss of goodwill,
164
+ work stoppage, computer failure or malfunction, or any and all
165
+ other commercial damages or losses), even if such Contributor
166
+ has been advised of the possibility of such damages.
167
+
168
+ 9. Accepting Warranty or Additional Liability. While redistributing
169
+ the Work or Derivative Works thereof, You may choose to offer,
170
+ and charge a fee for, acceptance of support, warranty, indemnity,
171
+ or other liability obligations and/or rights consistent with this
172
+ License. However, in accepting such obligations, You may act only
173
+ on Your own behalf and on Your sole responsibility, not on behalf
174
+ of any other Contributor, and only if You agree to indemnify,
175
+ defend, and hold each Contributor harmless for any liability
176
+ incurred by, or claims asserted against, such Contributor by reason
177
+ of your accepting any such warranty or additional liability.
178
+
179
+ END OF TERMS AND CONDITIONS
180
+
181
+ Copyright 2020-2021 Jina AI Limited
182
+
183
+ Licensed under the Apache License, Version 2.0 (the "License");
184
+ you may not use this file except in compliance with the License.
185
+ You may obtain a copy of the License at
186
+
187
+ http://www.apache.org/licenses/LICENSE-2.0
188
+
189
+ Unless required by applicable law or agreed to in writing, software
190
+ distributed under the License is distributed on an "AS IS" BASIS,
191
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
192
+ See the License for the specific language governing permissions and
193
+ limitations under the License.
README2.md ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DeepResearch
2
+
3
+ [Demo](https://jina.ai/deepsearch#demo) | [API](#official-api) | [Evaluation](#evaluation)
4
+
5
+ Keep searching, reading webpages, reasoning until an answer is found (or the token budget is exceeded). Useful for deeply investigating a query.
6
+
7
+ ```mermaid
8
+ ---
9
+ config:
10
+ theme: mc
11
+ look: handDrawn
12
+ ---
13
+ flowchart LR
14
+ subgraph Loop["until budget exceed"]
15
+ direction LR
16
+ Search["Search"]
17
+ Read["Read"]
18
+ Reason["Reason"]
19
+ end
20
+ Query(["Query"]) --> Loop
21
+ Search --> Read
22
+ Read --> Reason
23
+ Reason --> Search
24
+ Loop --> Answer(["Answer"])
25
+
26
+ ```
27
+
28
+ Unlike OpenAI and Gemini's Deep Research capabilities, we focus solely on **delivering accurate answers through our iterative process**. We don't optimize for long-form articles – if you need quick, precise answers from deep search, you're in the right place. If you're looking for AI-generated reports like OpenAI/Gemini do, this isn't for you.
29
+
30
+ ## Install
31
+
32
+ ```bash
33
+ git clone https://github.com/jina-ai/node-DeepResearch.git
34
+ cd node-DeepResearch
35
+ npm install
36
+ ```
37
+
38
+ [安装部署视频教程 on Youtube](https://youtu.be/vrpraFiPUyA)
39
+
40
+ It is also available on npm but not recommended for now, as the code is still under active development.
41
+
42
+
43
+ ## Usage
44
+
45
+ We use Gemini (latest `gemini-2.0-flash`) / OpenAI / [LocalLLM](#use-local-llm) for reasoning, [Jina Reader](https://jina.ai/reader) for searching and reading webpages, you can get a free API key with 1M tokens from jina.ai.
46
+
47
+ ```bash
48
+ export GEMINI_API_KEY=... # for gemini
49
+ # export OPENAI_API_KEY=... # for openai
50
+ # export LLM_PROVIDER=openai # for openai
51
+ export JINA_API_KEY=jina_... # free jina api key, get from https://jina.ai/reader
52
+
53
+ npm run dev $QUERY
54
+ ```
55
+
56
+ ### Official API
57
+
58
+ You can also use our official DeepSearch API, hosted and optimized by Jina AI:
59
+
60
+ ```
61
+ https://deepsearch.jina.ai/v1/chat/completions
62
+ ```
63
+
64
+ You can use it with any OpenAI-compatible client. For the authentication Bearer, get your Jina API key from https://jina.ai
65
+
66
+ #### Client integration guidelines
67
+
68
+ If you are building a web/local/mobile client that uses `Jina DeepSearch API`, here are some design guidelines:
69
+ - Our API is fully compatible with [OpenAI API schema](https://platform.openai.com/docs/api-reference/chat/create), this should greatly simplify the integration process. The model name is `jina-deepsearch-v1`.
70
+ - Our DeepSearch API is a reasoning+search grounding LLM, so it's best for questions that require deep reasoning and search.
71
+ - Two special tokens are introduced `<think>...</think>`, `<references>...</references>`. Please render them with care.
72
+ - Guide the user to get a Jina API key from https://jina.ai, with 1M free tokens for new API key.
73
+ - There are rate limits, [between 10RPM to 30RPM depending on the API key tier](https://jina.ai/contact-sales#rate-limit).
74
+ - [Download Jina AI logo here](https://jina.ai/logo-Jina-1024.zip)
75
+
76
+ ## Demo
77
+ > was recorded with `gemini-1.5-flash`, the latest `gemini-2.0-flash` leads to much better results!
78
+
79
+ Query: `"what is the latest blog post's title from jina ai?"`
80
+ 3 steps; answer is correct!
81
+ ![demo1](.github/visuals/demo.gif)
82
+
83
+ Query: `"what is the context length of readerlm-v2?"`
84
+ 2 steps; answer is correct!
85
+ ![demo1](.github/visuals/demo3.gif)
86
+
87
+ Query: `"list all employees from jina ai that u can find, as many as possible"`
88
+ 11 steps; partially correct! but im not in the list :(
89
+ ![demo1](.github/visuals/demo2.gif)
90
+
91
+ Query: `"who will be the biggest competitor of Jina AI"`
92
+ 42 steps; future prediction kind, so it's arguably correct! atm Im not seeing `weaviate` as a competitor, but im open for the future "i told you so" moment.
93
+ ![demo1](.github/visuals/demo4.gif)
94
+
95
+ More examples:
96
+
97
+ ```
98
+ # example: no tool calling
99
+ npm run dev "1+1="
100
+ npm run dev "what is the capital of France?"
101
+
102
+ # example: 2-step
103
+ npm run dev "what is the latest news from Jina AI?"
104
+
105
+ # example: 3-step
106
+ npm run dev "what is the twitter account of jina ai's founder"
107
+
108
+ # example: 13-step, ambiguious question (no def of "big")
109
+ npm run dev "who is bigger? cohere, jina ai, voyage?"
110
+
111
+ # example: open question, research-like, long chain of thoughts
112
+ npm run dev "who will be president of US in 2028?"
113
+ npm run dev "what should be jina ai strategy for 2025?"
114
+ ```
115
+
116
+ ## Use Local LLM
117
+
118
+ > Note, not every LLM works with our reasoning flow, we need those who support structured output (sometimes called JSON Schema output, object output) well. Feel free to purpose a PR to add more open-source LLMs to the working list.
119
+
120
+ If you use Ollama or LMStudio, you can redirect the reasoning request to your local LLM by setting the following environment variables:
121
+
122
+ ```bash
123
+ export LLM_PROVIDER=openai # yes, that's right - for local llm we still use openai client
124
+ export OPENAI_BASE_URL=http://127.0.0.1:1234/v1 # your local llm endpoint
125
+ export OPENAI_API_KEY=whatever # random string would do, as we don't use it (unless your local LLM has authentication)
126
+ export DEFAULT_MODEL_NAME=qwen2.5-7b # your local llm model name
127
+ ```
128
+
129
+
130
+ ## OpenAI-Compatible Server API
131
+
132
+ If you have a GUI client that supports OpenAI API (e.g. [CherryStudio](https://docs.cherry-ai.com/), [Chatbox](https://github.com/Bin-Huang/chatbox)) , you can simply config it to use this server.
133
+
134
+ ![demo1](.github/visuals/demo6.gif)
135
+
136
+ Start the server:
137
+ ```bash
138
+ # Without authentication
139
+ npm run serve
140
+
141
+ # With authentication (clients must provide this secret as Bearer token)
142
+ npm run serve --secret=your_secret_token
143
+ ```
144
+
145
+ The server will start on http://localhost:3000 with the following endpoint:
146
+
147
+ ### POST /v1/chat/completions
148
+ ```bash
149
+ # Without authentication
150
+ curl http://localhost:3000/v1/chat/completions \
151
+ -H "Content-Type: application/json" \
152
+ -d '{
153
+ "model": "jina-deepsearch-v1",
154
+ "messages": [
155
+ {
156
+ "role": "user",
157
+ "content": "Hello!"
158
+ }
159
+ ]
160
+ }'
161
+
162
+ # With authentication (when server is started with --secret)
163
+ curl http://localhost:3000/v1/chat/completions \
164
+ -H "Content-Type: application/json" \
165
+ -H "Authorization: Bearer your_secret_token" \
166
+ -d '{
167
+ "model": "jina-deepsearch-v1",
168
+ "messages": [
169
+ {
170
+ "role": "user",
171
+ "content": "Hello!"
172
+ }
173
+ ],
174
+ "stream": true
175
+ }'
176
+ ```
177
+
178
+ Response format:
179
+ ```json
180
+ {
181
+ "id": "chatcmpl-123",
182
+ "object": "chat.completion",
183
+ "created": 1677652288,
184
+ "model": "jina-deepsearch-v1",
185
+ "system_fingerprint": "fp_44709d6fcb",
186
+ "choices": [{
187
+ "index": 0,
188
+ "message": {
189
+ "role": "assistant",
190
+ "content": "YOUR FINAL ANSWER"
191
+ },
192
+ "logprobs": null,
193
+ "finish_reason": "stop"
194
+ }],
195
+ "usage": {
196
+ "prompt_tokens": 9,
197
+ "completion_tokens": 12,
198
+ "total_tokens": 21
199
+ }
200
+ }
201
+ ```
202
+
203
+ For streaming responses (stream: true), the server sends chunks in this format:
204
+ ```json
205
+ {
206
+ "id": "chatcmpl-123",
207
+ "object": "chat.completion.chunk",
208
+ "created": 1694268190,
209
+ "model": "jina-deepsearch-v1",
210
+ "system_fingerprint": "fp_44709d6fcb",
211
+ "choices": [{
212
+ "index": 0,
213
+ "delta": {
214
+ "content": "..."
215
+ },
216
+ "logprobs": null,
217
+ "finish_reason": null
218
+ }]
219
+ }
220
+ ```
221
+
222
+ Note: The think content in streaming responses is wrapped in XML tags:
223
+ ```
224
+ <think>
225
+ [thinking steps...]
226
+ </think>
227
+ [final answer]
228
+ ```
229
+
230
+
231
+ ## Docker Setup
232
+
233
+ ### Build Docker Image
234
+ To build the Docker image for the application, run the following command:
235
+ ```bash
236
+ docker build -t deepresearch:latest .
237
+ ```
238
+
239
+ ### Run Docker Container
240
+ To run the Docker container, use the following command:
241
+ ```bash
242
+ docker run -p 3000:3000 --env GEMINI_API_KEY=your_gemini_api_key --env JINA_API_KEY=your_jina_api_key deepresearch:latest
243
+ ```
244
+
245
+ ### Docker Compose
246
+ You can also use Docker Compose to manage multi-container applications. To start the application with Docker Compose, run:
247
+ ```bash
248
+ docker-compose up
249
+ ```
250
+
251
+ ## How Does it Work?
252
+
253
+ Not sure a flowchart helps, but here it is:
254
+
255
+ ```mermaid
256
+ flowchart TD
257
+ Start([Start]) --> Init[Initialize context & variables]
258
+ Init --> CheckBudget{Token budget<br/>exceeded?}
259
+ CheckBudget -->|No| GetQuestion[Get current question<br/>from gaps]
260
+ CheckBudget -->|Yes| BeastMode[Enter Beast Mode]
261
+
262
+ GetQuestion --> GenPrompt[Generate prompt]
263
+ GenPrompt --> ModelGen[Generate response<br/>using Gemini]
264
+ ModelGen --> ActionCheck{Check action<br/>type}
265
+
266
+ ActionCheck -->|answer| AnswerCheck{Is original<br/>question?}
267
+ AnswerCheck -->|Yes| EvalAnswer[Evaluate answer]
268
+ EvalAnswer --> IsGoodAnswer{Is answer<br/>definitive?}
269
+ IsGoodAnswer -->|Yes| HasRefs{Has<br/>references?}
270
+ HasRefs -->|Yes| End([End])
271
+ HasRefs -->|No| GetQuestion
272
+ IsGoodAnswer -->|No| StoreBad[Store bad attempt<br/>Reset context]
273
+ StoreBad --> GetQuestion
274
+
275
+ AnswerCheck -->|No| StoreKnowledge[Store as intermediate<br/>knowledge]
276
+ StoreKnowledge --> GetQuestion
277
+
278
+ ActionCheck -->|reflect| ProcessQuestions[Process new<br/>sub-questions]
279
+ ProcessQuestions --> DedupQuestions{New unique<br/>questions?}
280
+ DedupQuestions -->|Yes| AddGaps[Add to gaps queue]
281
+ DedupQuestions -->|No| DisableReflect[Disable reflect<br/>for next step]
282
+ AddGaps --> GetQuestion
283
+ DisableReflect --> GetQuestion
284
+
285
+ ActionCheck -->|search| SearchQuery[Execute search]
286
+ SearchQuery --> NewURLs{New URLs<br/>found?}
287
+ NewURLs -->|Yes| StoreURLs[Store URLs for<br/>future visits]
288
+ NewURLs -->|No| DisableSearch[Disable search<br/>for next step]
289
+ StoreURLs --> GetQuestion
290
+ DisableSearch --> GetQuestion
291
+
292
+ ActionCheck -->|visit| VisitURLs[Visit URLs]
293
+ VisitURLs --> NewContent{New content<br/>found?}
294
+ NewContent -->|Yes| StoreContent[Store content as<br/>knowledge]
295
+ NewContent -->|No| DisableVisit[Disable visit<br/>for next step]
296
+ StoreContent --> GetQuestion
297
+ DisableVisit --> GetQuestion
298
+
299
+ BeastMode --> FinalAnswer[Generate final answer] --> End
300
+ ```
301
+
302
+ ## Evaluation
303
+
304
+ I kept the evaluation simple, LLM-as-a-judge and collect some [ego questions](./src/evals/ego-questions.json) for evaluation. These are the questions about Jina AI that I know 100% the answer but LLMs do not.
305
+
306
+ I mainly look at 3 things: total steps, total tokens, and the correctness of the final answer.
307
+
308
+ ```bash
309
+ npm run eval ./src/evals/questions.json
310
+ ```
311
+
312
+ Here's the table comparing plain `gemini-2.0-flash` and `gemini-2.0-flash + node-deepresearch` on the ego set.
313
+
314
+ Plain `gemini-2.0-flash` can be run by setting `tokenBudget` to zero, skipping the while-loop and directly answering the question.
315
+
316
+ It should not be surprised that plain `gemini-2.0-flash` has a 0% pass rate, as I intentionally filtered out the questions that LLMs can answer.
317
+
318
+ | Metric | gemini-2.0-flash | #188f1bb |
319
+ |--------|------------------|----------|
320
+ | Pass Rate | 0% | 75% |
321
+ | Average Steps | 1 | 4 |
322
+ | Maximum Steps | 1 | 13 |
323
+ | Minimum Steps | 1 | 2 |
324
+ | Median Steps | 1 | 3 |
325
+ | Average Tokens | 428 | 68,574 |
326
+ | Median Tokens | 434 | 31,541 |
327
+ | Maximum Tokens | 463 | 363,655 |
328
+ | Minimum Tokens | 374 | 7,963 |
config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env": {
3
+ "https_proxy": "",
4
+ "OPENAI_BASE_URL": "",
5
+ "GEMINI_API_KEY": "",
6
+ "OPENAI_API_KEY": "",
7
+ "JINA_API_KEY": "",
8
+ "BRAVE_API_KEY": "",
9
+ "DEFAULT_MODEL_NAME": ""
10
+ },
11
+ "defaults": {
12
+ "search_provider": "jina",
13
+ "llm_provider": "gemini",
14
+ "step_sleep": 0
15
+ },
16
+ "providers": {
17
+ "gemini": {
18
+ "createClient": "createGoogleGenerativeAI"
19
+ },
20
+ "openai": {
21
+ "createClient": "createOpenAI",
22
+ "clientConfig": {
23
+ "compatibility": "strict"
24
+ }
25
+ }
26
+ },
27
+ "models": {
28
+ "gemini": {
29
+ "default": {
30
+ "model": "gemini-2.0-flash",
31
+ "temperature": 0,
32
+ "maxTokens": 8000
33
+ },
34
+ "tools": {
35
+ "searchGrounding": { "temperature": 0 },
36
+ "dedup": { "temperature": 0.1 },
37
+ "evaluator": {},
38
+ "errorAnalyzer": {},
39
+ "queryRewriter": { "temperature": 0.1 },
40
+ "agent": { "temperature": 0.7 },
41
+ "agentBeastMode": { "temperature": 0.7 },
42
+ "fallback": { "temperature": 0 }
43
+ }
44
+ },
45
+ "openai": {
46
+ "default": {
47
+ "model": "gpt-4o-mini",
48
+ "temperature": 0,
49
+ "maxTokens": 8000
50
+ },
51
+ "tools": {
52
+ "searchGrounding": { "temperature": 0 },
53
+ "dedup": { "temperature": 0.1 },
54
+ "evaluator": {},
55
+ "errorAnalyzer": {},
56
+ "queryRewriter": { "temperature": 0.1 },
57
+ "agent": { "temperature": 0.7 },
58
+ "agentBeastMode": { "temperature": 0.7 },
59
+ "fallback": { "temperature": 0 }
60
+ }
61
+ }
62
+ }
63
+ }
docker-compose.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ app:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ environment:
9
+ - GEMINI_API_KEY=${GEMINI_API_KEY}
10
+ - OPENAI_API_KEY=${OPENAI_API_KEY}
11
+ - JINA_API_KEY=${JINA_API_KEY}
12
+ - BRAVE_API_KEY=${BRAVE_API_KEY}
13
+ ports:
14
+ - "3000:3000"
jest.config.js ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ module.exports = {
2
+ preset: 'ts-jest',
3
+ testEnvironment: 'node',
4
+ testMatch: ['**/__tests__/**/*.test.ts'],
5
+ setupFiles: ['<rootDir>/jest.setup.js'],
6
+ };
jest.setup.js ADDED
@@ -0,0 +1 @@
 
 
1
+ require('dotenv').config();
jina-ai/.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ node_modules
jina-ai/Dockerfile ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---- BUILD STAGE ----
2
+ FROM node:20-slim AS builder
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy package.json and package-lock.json
8
+ COPY ./package*.json ./
9
+ COPY ./jina-ai/package*.json ./jina-ai/
10
+
11
+ # Install dependencies
12
+ RUN npm ci
13
+ WORKDIR /app/jina-ai
14
+ RUN npm ci
15
+
16
+ WORKDIR /app
17
+
18
+ # Copy application code
19
+ COPY ./src ./src
20
+ COPY ./tsconfig.json ./tsconfig.json
21
+ COPY ./jina-ai/config.json ./
22
+ RUN npm run build
23
+
24
+ COPY ./jina-ai/src ./jina-ai/src
25
+ COPY ./jina-ai/tsconfig.json ./jina-ai/tsconfig.json
26
+ WORKDIR /app/jina-ai
27
+ RUN npm run build
28
+
29
+ # ---- PRODUCTION STAGE ----
30
+ FROM node:20 AS production
31
+
32
+ # Set working directory
33
+ WORKDIR /app
34
+
35
+ COPY --from=builder /app ./
36
+ # Copy config.json and built files from builder
37
+
38
+ WORKDIR /app/jina-ai
39
+
40
+ # Set environment variables (Recommended to set at runtime, avoid hardcoding)
41
+ ENV GEMINI_API_KEY=${GEMINI_API_KEY}
42
+ ENV OPENAI_API_KEY=${OPENAI_API_KEY}
43
+ ENV JINA_API_KEY=${JINA_API_KEY}
44
+ ENV BRAVE_API_KEY=${BRAVE_API_KEY}
45
+
46
+ # Expose the port the app runs on
47
+ EXPOSE 3000
48
+
49
+ # Set startup command
50
+ CMD ["node", "./dist/server.js"]
jina-ai/config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "env": {
3
+ "https_proxy": "",
4
+ "OPENAI_BASE_URL": "",
5
+ "GEMINI_API_KEY": "",
6
+ "OPENAI_API_KEY": "",
7
+ "JINA_API_KEY": "",
8
+ "BRAVE_API_KEY": "",
9
+ "DEFAULT_MODEL_NAME": ""
10
+ },
11
+ "defaults": {
12
+ "search_provider": "brave",
13
+ "llm_provider": "vertex",
14
+ "step_sleep": 0
15
+ },
16
+ "providers": {
17
+ "vertex": {
18
+ "createClient": "createGoogleVertex",
19
+ "clientConfig": {
20
+ "location": "us-central1"
21
+ }
22
+ },
23
+ "gemini": {
24
+ "createClient": "createGoogleGenerativeAI"
25
+ },
26
+ "openai": {
27
+ "createClient": "createOpenAI",
28
+ "clientConfig": {
29
+ "compatibility": "strict"
30
+ }
31
+ }
32
+ },
33
+ "models": {
34
+ "gemini": {
35
+ "default": {
36
+ "model": "gemini-2.0-flash",
37
+ "temperature": 0,
38
+ "maxTokens": 8000
39
+ },
40
+ "tools": {
41
+ "searchGrounding": { "temperature": 0 },
42
+ "dedup": { "temperature": 0.1 },
43
+ "evaluator": {},
44
+ "errorAnalyzer": {},
45
+ "queryRewriter": { "temperature": 0.1 },
46
+ "agent": { "temperature": 0.7 },
47
+ "agentBeastMode": { "temperature": 0.7 },
48
+ "fallback": { "temperature": 0 }
49
+ }
50
+ },
51
+ "openai": {
52
+ "default": {
53
+ "model": "gpt-4o-mini",
54
+ "temperature": 0,
55
+ "maxTokens": 8000
56
+ },
57
+ "tools": {
58
+ "searchGrounding": { "temperature": 0 },
59
+ "dedup": { "temperature": 0.1 },
60
+ "evaluator": {},
61
+ "errorAnalyzer": {},
62
+ "queryRewriter": { "temperature": 0.1 },
63
+ "agent": { "temperature": 0.7 },
64
+ "agentBeastMode": { "temperature": 0.7 },
65
+ "fallback": { "temperature": 0 }
66
+ }
67
+ }
68
+ }
69
+ }
jina-ai/package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
jina-ai/package.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "@jina-ai/node-deepresearch",
3
+ "version": "1.0.0",
4
+ "main": "dist/app.js",
5
+ "files": [
6
+ "dist",
7
+ "README.md",
8
+ "LICENSE"
9
+ ],
10
+ "scripts": {
11
+ "build": "tsc",
12
+ "dev": "npx ts-node src/agent.ts",
13
+ "search": "npx ts-node src/test-duck.ts",
14
+ "rewrite": "npx ts-node src/tools/query-rewriter.ts",
15
+ "lint": "eslint . --ext .ts",
16
+ "lint:fix": "eslint . --ext .ts --fix",
17
+ "serve": "ts-node src/server.ts",
18
+ "eval": "ts-node src/evals/batch-evals.ts",
19
+ "test": "jest --testTimeout=30000",
20
+ "test:watch": "jest --watch"
21
+ },
22
+ "keywords": [],
23
+ "author": "Jina AI",
24
+ "license": "Apache-2.0",
25
+ "description": "",
26
+ "dependencies": {
27
+ "@ai-sdk/google-vertex": "^2.1.12",
28
+ "@google-cloud/firestore": "^7.11.0",
29
+ "civkit": "^0.8.3-15926cb",
30
+ "dayjs": "^1.11.13",
31
+ "lodash": "^4.17.21",
32
+ "reflect-metadata": "^0.2.2",
33
+ "tsyringe": "^4.8.0"
34
+ },
35
+ "devDependencies": {
36
+ "@types/lodash": "^4.17.15",
37
+ "pino-pretty": "^13.0.0"
38
+ }
39
+ }
jina-ai/src/dto/jina-embeddings-auth.ts ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ Also, AuthenticationFailedError, AuthenticationRequiredError,
3
+ DownstreamServiceFailureError, RPC_CALL_ENVIRONMENT,
4
+ ArrayOf, AutoCastable, Prop
5
+ } from 'civkit/civ-rpc';
6
+ import { parseJSONText } from 'civkit/vectorize';
7
+ import { htmlEscape } from 'civkit/escape';
8
+ import { marshalErrorLike } from 'civkit/lang';
9
+
10
+ import type express from 'express';
11
+
12
+ import logger from '../lib/logger';
13
+ import { AsyncLocalContext } from '../lib/async-context';
14
+ import { InjectProperty } from '../lib/registry';
15
+ import { JinaEmbeddingsDashboardHTTP } from '../lib/billing';
16
+ import envConfig from '../lib/env-config';
17
+
18
+ import { FirestoreRecord } from '../lib/firestore';
19
+ import _ from 'lodash';
20
+ import { RateLimitDesc } from '../rate-limit';
21
+
22
+ export class JinaWallet extends AutoCastable {
23
+ @Prop({
24
+ default: ''
25
+ })
26
+ user_id!: string;
27
+
28
+ @Prop({
29
+ default: 0
30
+ })
31
+ trial_balance!: number;
32
+
33
+ @Prop()
34
+ trial_start?: Date;
35
+
36
+ @Prop()
37
+ trial_end?: Date;
38
+
39
+ @Prop({
40
+ default: 0
41
+ })
42
+ regular_balance!: number;
43
+
44
+ @Prop({
45
+ default: 0
46
+ })
47
+ total_balance!: number;
48
+ }
49
+
50
+ export class JinaEmbeddingsTokenAccount extends FirestoreRecord {
51
+ static override collectionName = 'embeddingsTokenAccounts';
52
+
53
+ override _id!: string;
54
+
55
+ @Prop({
56
+ required: true
57
+ })
58
+ user_id!: string;
59
+
60
+ @Prop({
61
+ nullable: true,
62
+ type: String,
63
+ })
64
+ email?: string;
65
+
66
+ @Prop({
67
+ nullable: true,
68
+ type: String,
69
+ })
70
+ full_name?: string;
71
+
72
+ @Prop({
73
+ nullable: true,
74
+ type: String,
75
+ })
76
+ customer_id?: string;
77
+
78
+ @Prop({
79
+ nullable: true,
80
+ type: String,
81
+ })
82
+ avatar_url?: string;
83
+
84
+ // Not keeping sensitive info for now
85
+ // @Prop()
86
+ // billing_address?: object;
87
+
88
+ // @Prop()
89
+ // payment_method?: object;
90
+
91
+ @Prop({
92
+ required: true
93
+ })
94
+ wallet!: JinaWallet;
95
+
96
+ @Prop({
97
+ type: Object
98
+ })
99
+ metadata?: { [k: string]: any; };
100
+
101
+ @Prop({
102
+ defaultFactory: () => new Date()
103
+ })
104
+ lastSyncedAt!: Date;
105
+
106
+ @Prop({
107
+ dictOf: [ArrayOf(RateLimitDesc)]
108
+ })
109
+ customRateLimits?: { [k: string]: RateLimitDesc[]; };
110
+
111
+ static patchedFields = [
112
+ ];
113
+
114
+ static override from(input: any) {
115
+ for (const field of this.patchedFields) {
116
+ if (typeof input[field] === 'string') {
117
+ input[field] = parseJSONText(input[field]);
118
+ }
119
+ }
120
+
121
+ return super.from(input) as JinaEmbeddingsTokenAccount;
122
+ }
123
+
124
+ override degradeForFireStore() {
125
+ const copy: any = {
126
+ ...this,
127
+ wallet: { ...this.wallet },
128
+ // Firebase disability
129
+ customRateLimits: _.mapValues(this.customRateLimits, (v) => v.map((x) => ({ ...x }))),
130
+ };
131
+
132
+ for (const field of (this.constructor as typeof JinaEmbeddingsTokenAccount).patchedFields) {
133
+ if (typeof copy[field] === 'object') {
134
+ copy[field] = JSON.stringify(copy[field]) as any;
135
+ }
136
+ }
137
+
138
+ return copy;
139
+ }
140
+
141
+ [k: string]: any;
142
+ }
143
+
144
+
145
+ const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
146
+
147
+ export interface FireBaseHTTPCtx {
148
+ req: express.Request,
149
+ res: express.Response,
150
+ }
151
+
152
+ const THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT = new JinaEmbeddingsDashboardHTTP(envConfig.JINA_EMBEDDINGS_DASHBOARD_API_KEY);
153
+
154
+ @Also({
155
+ openapi: {
156
+ operation: {
157
+ parameters: {
158
+ 'Authorization': {
159
+ description: htmlEscape`Jina Token for authentication.\n\n` +
160
+ htmlEscape`- Member of <JinaEmbeddingsAuthDTO>\n\n` +
161
+ `- Authorization: Bearer {YOUR_JINA_TOKEN}`
162
+ ,
163
+ in: 'header',
164
+ schema: {
165
+ anyOf: [
166
+ { type: 'string', format: 'token' }
167
+ ]
168
+ }
169
+ }
170
+ }
171
+ }
172
+ }
173
+ })
174
+ export class JinaEmbeddingsAuthDTO extends AutoCastable {
175
+ uid?: string;
176
+ bearerToken?: string;
177
+ user?: JinaEmbeddingsTokenAccount;
178
+
179
+ @InjectProperty(AsyncLocalContext)
180
+ ctxMgr!: AsyncLocalContext;
181
+
182
+ jinaEmbeddingsDashboard = THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT;
183
+
184
+ static override from(input: any) {
185
+ const instance = super.from(input) as JinaEmbeddingsAuthDTO;
186
+
187
+ const ctx = input[RPC_CALL_ENVIRONMENT];
188
+
189
+ const req = (ctx.rawRequest || ctx.req) as express.Request | undefined;
190
+
191
+ if (req) {
192
+ const authorization = req.get('authorization');
193
+
194
+ if (authorization) {
195
+ const authToken = authorization.split(' ')[1] || authorization;
196
+ instance.bearerToken = authToken;
197
+ }
198
+
199
+ }
200
+
201
+ if (!instance.bearerToken && input._token) {
202
+ instance.bearerToken = input._token;
203
+ }
204
+
205
+ return instance;
206
+ }
207
+
208
+ async getBrief(ignoreCache?: boolean | string) {
209
+ if (!this.bearerToken) {
210
+ throw new AuthenticationRequiredError({
211
+ message: 'Jina API key is required to authenticate. Please get one from https://jina.ai'
212
+ });
213
+ }
214
+
215
+ let account;
216
+ try {
217
+ account = await JinaEmbeddingsTokenAccount.fromFirestore(this.bearerToken);
218
+ } catch (err) {
219
+ // FireStore would not accept any string as input and may throw if not happy with it
220
+ void 0;
221
+ }
222
+
223
+
224
+ const age = account?.lastSyncedAt ? Date.now() - account.lastSyncedAt.getTime() : Infinity;
225
+
226
+ if (account && !ignoreCache) {
227
+ if (account && age < 180_000) {
228
+ this.user = account;
229
+ this.uid = this.user?.user_id;
230
+
231
+ return account;
232
+ }
233
+ }
234
+
235
+ try {
236
+ const r = await this.jinaEmbeddingsDashboard.validateToken(this.bearerToken);
237
+ const brief = r.data;
238
+ const draftAccount = JinaEmbeddingsTokenAccount.from({
239
+ ...account, ...brief, _id: this.bearerToken,
240
+ lastSyncedAt: new Date()
241
+ });
242
+ await JinaEmbeddingsTokenAccount.save(draftAccount.degradeForFireStore(), undefined, { merge: true });
243
+
244
+ this.user = draftAccount;
245
+ this.uid = this.user?.user_id;
246
+
247
+ return draftAccount;
248
+ } catch (err: any) {
249
+ authDtoLogger.warn(`Failed to get user brief: ${err}`, { err: marshalErrorLike(err) });
250
+
251
+ if (err?.status === 401) {
252
+ throw new AuthenticationFailedError({
253
+ message: 'Invalid API key, please get a new one from https://jina.ai'
254
+ });
255
+ }
256
+
257
+ if (account) {
258
+ this.user = account;
259
+ this.uid = this.user?.user_id;
260
+
261
+ return account;
262
+ }
263
+
264
+
265
+ throw new DownstreamServiceFailureError(`Failed to authenticate: ${err}`);
266
+ }
267
+ }
268
+
269
+ async reportUsage(tokenCount: number, mdl: string, endpoint: string = '/encode') {
270
+ const user = await this.assertUser();
271
+ const uid = user.user_id;
272
+ user.wallet.total_balance -= tokenCount;
273
+
274
+ return this.jinaEmbeddingsDashboard.reportUsage(this.bearerToken!, {
275
+ model_name: mdl,
276
+ api_endpoint: endpoint,
277
+ consumer: {
278
+ id: uid,
279
+ user_id: uid,
280
+ },
281
+ usage: {
282
+ total_tokens: tokenCount
283
+ },
284
+ labels: {
285
+ model_name: mdl
286
+ }
287
+ }).then((r) => {
288
+ JinaEmbeddingsTokenAccount.COLLECTION.doc(this.bearerToken!)
289
+ .update({ 'wallet.total_balance': JinaEmbeddingsTokenAccount.OPS.increment(-tokenCount) })
290
+ .catch((err) => {
291
+ authDtoLogger.warn(`Failed to update cache for ${uid}: ${err}`, { err: marshalErrorLike(err) });
292
+ });
293
+
294
+ return r;
295
+ }).catch((err) => {
296
+ user.wallet.total_balance += tokenCount;
297
+ authDtoLogger.warn(`Failed to report usage for ${uid}: ${err}`, { err: marshalErrorLike(err) });
298
+ });
299
+ }
300
+
301
+ async solveUID() {
302
+ if (this.uid) {
303
+ this.ctxMgr.set('uid', this.uid);
304
+
305
+ return this.uid;
306
+ }
307
+
308
+ if (this.bearerToken) {
309
+ await this.getBrief();
310
+ this.ctxMgr.set('uid', this.uid);
311
+
312
+ return this.uid;
313
+ }
314
+
315
+ return undefined;
316
+ }
317
+
318
+ async assertUID() {
319
+ const uid = await this.solveUID();
320
+
321
+ if (!uid) {
322
+ throw new AuthenticationRequiredError('Authentication failed');
323
+ }
324
+
325
+ return uid;
326
+ }
327
+
328
+ async assertUser() {
329
+ if (this.user) {
330
+ return this.user;
331
+ }
332
+
333
+ await this.getBrief();
334
+
335
+ return this.user!;
336
+ }
337
+
338
+ getRateLimits(...tags: string[]) {
339
+ const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
340
+
341
+ if (descs.length) {
342
+ return descs;
343
+ }
344
+
345
+ return undefined;
346
+ }
347
+ }
jina-ai/src/lib/async-context.ts ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import { GlobalAsyncContext } from 'civkit/async-context';
2
+ import { container, singleton } from 'tsyringe';
3
+
4
+ @singleton()
5
+ export class AsyncLocalContext extends GlobalAsyncContext {}
6
+
7
+ const instance = container.resolve(AsyncLocalContext);
8
+ Reflect.set(process, 'asyncLocalContext', instance);
9
+ export default instance;
jina-ai/src/lib/billing.ts ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { HTTPService } from 'civkit';
2
+ import _ from 'lodash';
3
+
4
+
5
+ export interface JinaWallet {
6
+ trial_balance: number;
7
+ trial_start: Date;
8
+ trial_end: Date;
9
+ regular_balance: number;
10
+ total_balance: number;
11
+ }
12
+
13
+
14
+ export interface JinaUserBrief {
15
+ user_id: string;
16
+ email: string | null;
17
+ full_name: string | null;
18
+ customer_id: string | null;
19
+ avatar_url?: string;
20
+ billing_address: Partial<{
21
+ address: string;
22
+ city: string;
23
+ state: string;
24
+ country: string;
25
+ postal_code: string;
26
+ }>;
27
+ payment_method: Partial<{
28
+ brand: string;
29
+ last4: string;
30
+ exp_month: number;
31
+ exp_year: number;
32
+ }>;
33
+ wallet: JinaWallet;
34
+ metadata: {
35
+ [k: string]: any;
36
+ };
37
+ }
38
+
39
+ export interface JinaUsageReport {
40
+ model_name: string;
41
+ api_endpoint: string;
42
+ consumer: {
43
+ user_id: string;
44
+ customer_plan?: string;
45
+ [k: string]: any;
46
+ };
47
+ usage: {
48
+ total_tokens: number;
49
+ };
50
+ labels: {
51
+ user_type?: string;
52
+ model_name?: string;
53
+ [k: string]: any;
54
+ };
55
+ }
56
+
57
+ export class JinaEmbeddingsDashboardHTTP extends HTTPService {
58
+ name = 'JinaEmbeddingsDashboardHTTP';
59
+
60
+ constructor(
61
+ public apiKey: string,
62
+ public baseUri: string = 'https://embeddings-dashboard-api.jina.ai/api'
63
+ ) {
64
+ super(baseUri);
65
+
66
+ this.baseOptions.timeout = 30_000; // 30 sec
67
+ }
68
+
69
+ async authorization(token: string) {
70
+ const r = await this.get<JinaUserBrief>('/v1/authorization', {
71
+ headers: {
72
+ Authorization: `Bearer ${token}`
73
+ },
74
+ responseType: 'json',
75
+ });
76
+
77
+ return r;
78
+ }
79
+
80
+ async validateToken(token: string) {
81
+ const r = await this.getWithSearchParams<JinaUserBrief>('/v1/api_key/user', {
82
+ api_key: token,
83
+ }, {
84
+ responseType: 'json',
85
+ });
86
+
87
+ return r;
88
+ }
89
+
90
+ async reportUsage(token: string, query: JinaUsageReport) {
91
+ const r = await this.postJson('/v1/usage', query, {
92
+ headers: {
93
+ Authorization: `Bearer ${token}`,
94
+ 'x-api-key': this.apiKey,
95
+ },
96
+ responseType: 'text',
97
+ });
98
+
99
+ return r;
100
+ }
101
+
102
+ }
jina-ai/src/lib/env-config.ts ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { container, singleton } from 'tsyringe';
2
+
3
+ export const SPECIAL_COMBINED_ENV_KEY = 'ENV_COMBINED';
4
+ const CONF_ENV = [
5
+ 'OPENAI_API_KEY',
6
+
7
+ 'ANTHROPIC_API_KEY',
8
+
9
+ 'REPLICATE_API_KEY',
10
+
11
+ 'GOOGLE_AI_STUDIO_API_KEY',
12
+
13
+ 'JINA_EMBEDDINGS_API_KEY',
14
+
15
+ 'JINA_EMBEDDINGS_DASHBOARD_API_KEY',
16
+
17
+ 'BRAVE_SEARCH_API_KEY',
18
+
19
+ ] as const;
20
+
21
+
22
+ @singleton()
23
+ export class EnvConfig {
24
+ dynamic!: Record<string, string>;
25
+
26
+ combined: Record<string, string> = {};
27
+ originalEnv: Record<string, string | undefined> = { ...process.env };
28
+
29
+ constructor() {
30
+ if (process.env[SPECIAL_COMBINED_ENV_KEY]) {
31
+ Object.assign(this.combined, JSON.parse(
32
+ Buffer.from(process.env[SPECIAL_COMBINED_ENV_KEY]!, 'base64').toString('utf-8')
33
+ ));
34
+ delete process.env[SPECIAL_COMBINED_ENV_KEY];
35
+ }
36
+
37
+ // Static config
38
+ for (const x of CONF_ENV) {
39
+ const s = this.combined[x] || process.env[x] || '';
40
+ Reflect.set(this, x, s);
41
+ if (x in process.env) {
42
+ delete process.env[x];
43
+ }
44
+ }
45
+
46
+ // Dynamic config
47
+ this.dynamic = new Proxy({
48
+ get: (_target: any, prop: string) => {
49
+ return this.combined[prop] || process.env[prop] || '';
50
+ }
51
+ }, {}) as any;
52
+ }
53
+ }
54
+
55
+ // eslint-disable-next-line @typescript-eslint/no-empty-interface
56
+ export interface EnvConfig extends Record<typeof CONF_ENV[number], string> { }
57
+
58
+ const instance = container.resolve(EnvConfig);
59
+ export default instance;
jina-ai/src/lib/errors.ts ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ApplicationError, Prop, RPC_TRANSFER_PROTOCOL_META_SYMBOL, StatusCode } from 'civkit';
2
+ import _ from 'lodash';
3
+ import dayjs from 'dayjs';
4
+ import utc from 'dayjs/plugin/utc';
5
+
6
+ dayjs.extend(utc);
7
+
8
+ @StatusCode(50301)
9
+ export class ServiceDisabledError extends ApplicationError { }
10
+
11
+ @StatusCode(50302)
12
+ export class ServiceCrashedError extends ApplicationError { }
13
+
14
+ @StatusCode(50303)
15
+ export class ServiceNodeResourceDrainError extends ApplicationError { }
16
+
17
+ @StatusCode(40104)
18
+ export class EmailUnverifiedError extends ApplicationError { }
19
+
20
+ @StatusCode(40201)
21
+ export class InsufficientCreditsError extends ApplicationError { }
22
+
23
+ @StatusCode(40202)
24
+ export class FreeFeatureLimitError extends ApplicationError { }
25
+
26
+ @StatusCode(40203)
27
+ export class InsufficientBalanceError extends ApplicationError { }
28
+
29
+ @StatusCode(40903)
30
+ export class LockConflictError extends ApplicationError { }
31
+
32
+ @StatusCode(40904)
33
+ export class BudgetExceededError extends ApplicationError { }
34
+
35
+ @StatusCode(45101)
36
+ export class HarmfulContentError extends ApplicationError { }
37
+
38
+ @StatusCode(45102)
39
+ export class SecurityCompromiseError extends ApplicationError { }
40
+
41
+ @StatusCode(41201)
42
+ export class BatchSizeTooLargeError extends ApplicationError { }
43
+
44
+
45
+ @StatusCode(42903)
46
+ export class RateLimitTriggeredError extends ApplicationError {
47
+
48
+ @Prop({
49
+ desc: 'Retry after seconds',
50
+ })
51
+ retryAfter?: number;
52
+
53
+ @Prop({
54
+ desc: 'Retry after date',
55
+ })
56
+ retryAfterDate?: Date;
57
+
58
+ protected override get [RPC_TRANSFER_PROTOCOL_META_SYMBOL]() {
59
+ const retryAfter = this.retryAfter || this.retryAfterDate;
60
+ if (!retryAfter) {
61
+ return super[RPC_TRANSFER_PROTOCOL_META_SYMBOL];
62
+ }
63
+
64
+ return _.merge(_.cloneDeep(super[RPC_TRANSFER_PROTOCOL_META_SYMBOL]), {
65
+ headers: {
66
+ 'Retry-After': `${retryAfter instanceof Date ? dayjs(retryAfter).utc().format('ddd, DD MMM YYYY HH:mm:ss [GMT]') : retryAfter}`,
67
+ }
68
+ });
69
+ }
70
+ }
jina-ai/src/lib/firestore.ts ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import _ from 'lodash';
2
+ import { AutoCastable, Prop, RPC_MARSHAL } from 'civkit/civ-rpc';
3
+ import {
4
+ Firestore, FieldValue, DocumentReference,
5
+ Query, Timestamp, SetOptions, DocumentSnapshot,
6
+ } from '@google-cloud/firestore';
7
+
8
+ // Firestore doesn't support JavaScript objects with custom prototypes (i.e. objects that were created via the \"new\" operator)
9
+ function patchFireStoreArrogance(func: Function) {
10
+ return function (this: unknown) {
11
+ const origObjectGetPrototype = Object.getPrototypeOf;
12
+ Object.getPrototypeOf = function (x) {
13
+ const r = origObjectGetPrototype.call(this, x);
14
+ if (!r) {
15
+ return r;
16
+ }
17
+ return Object.prototype;
18
+ };
19
+ try {
20
+ return func.call(this, ...arguments);
21
+ } finally {
22
+ Object.getPrototypeOf = origObjectGetPrototype;
23
+ }
24
+ };
25
+ }
26
+
27
+ Reflect.set(DocumentReference.prototype, 'set', patchFireStoreArrogance(Reflect.get(DocumentReference.prototype, 'set')));
28
+ Reflect.set(DocumentSnapshot, 'fromObject', patchFireStoreArrogance(Reflect.get(DocumentSnapshot, 'fromObject')));
29
+
30
+ function mapValuesDeep(v: any, fn: (i: any) => any): any {
31
+ if (_.isPlainObject(v)) {
32
+ return _.mapValues(v, (i) => mapValuesDeep(i, fn));
33
+ } else if (_.isArray(v)) {
34
+ return v.map((i) => mapValuesDeep(i, fn));
35
+ } else {
36
+ return fn(v);
37
+ }
38
+ }
39
+
40
+ export type Constructor<T> = { new(...args: any[]): T; };
41
+ export type Constructed<T> = T extends Partial<infer U> ? U : T extends object ? T : object;
42
+
43
+ export function fromFirestore<T extends FirestoreRecord>(
44
+ this: Constructor<T>, id: string, overrideCollection?: string
45
+ ): Promise<T | undefined>;
46
+ export async function fromFirestore(
47
+ this: any, id: string, overrideCollection?: string
48
+ ) {
49
+ const collection = overrideCollection || this.collectionName;
50
+ if (!collection) {
51
+ throw new Error(`Missing collection name to construct ${this.name}`);
52
+ }
53
+
54
+ const ref = this.DB.collection(overrideCollection || this.collectionName).doc(id);
55
+
56
+ const ptr = await ref.get();
57
+
58
+ if (!ptr.exists) {
59
+ return undefined;
60
+ }
61
+
62
+ const doc = this.from(
63
+ // Fixes non-native firebase types
64
+ mapValuesDeep(ptr.data(), (i: any) => {
65
+ if (i instanceof Timestamp) {
66
+ return i.toDate();
67
+ }
68
+
69
+ return i;
70
+ })
71
+ );
72
+
73
+ Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
74
+ Object.defineProperty(doc, '_id', { value: ptr.id, enumerable: true });
75
+
76
+ return doc;
77
+ }
78
+
79
+ export function fromFirestoreQuery<T extends FirestoreRecord>(
80
+ this: Constructor<T>, query: Query
81
+ ): Promise<T[]>;
82
+ export async function fromFirestoreQuery(this: any, query: Query) {
83
+ const ptr = await query.get();
84
+
85
+ if (ptr.docs.length) {
86
+ return ptr.docs.map(doc => {
87
+ const r = this.from(
88
+ mapValuesDeep(doc.data(), (i: any) => {
89
+ if (i instanceof Timestamp) {
90
+ return i.toDate();
91
+ }
92
+
93
+ return i;
94
+ })
95
+ );
96
+ Object.defineProperty(r, '_ref', { value: doc.ref, enumerable: false });
97
+ Object.defineProperty(r, '_id', { value: doc.id, enumerable: true });
98
+
99
+ return r;
100
+ });
101
+ }
102
+
103
+ return [];
104
+ }
105
+
106
+ export function setToFirestore<T extends FirestoreRecord>(
107
+ this: Constructor<T>, doc: T, overrideCollection?: string, setOptions?: SetOptions
108
+ ): Promise<T>;
109
+ export async function setToFirestore(
110
+ this: any, doc: any, overrideCollection?: string, setOptions?: SetOptions
111
+ ) {
112
+ let ref: DocumentReference<any> = doc._ref;
113
+ if (!ref) {
114
+ const collection = overrideCollection || this.collectionName;
115
+ if (!collection) {
116
+ throw new Error(`Missing collection name to construct ${this.name}`);
117
+ }
118
+
119
+ const predefinedId = doc._id || undefined;
120
+ const hdl = this.DB.collection(overrideCollection || this.collectionName);
121
+ ref = predefinedId ? hdl.doc(predefinedId) : hdl.doc();
122
+
123
+ Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
124
+ Object.defineProperty(doc, '_id', { value: ref.id, enumerable: true });
125
+ }
126
+
127
+ await ref.set(doc, { merge: true, ...setOptions });
128
+
129
+ return doc;
130
+ }
131
+
132
+ export function deleteQueryBatch<T extends FirestoreRecord>(
133
+ this: Constructor<T>, query: Query
134
+ ): Promise<T>;
135
+ export async function deleteQueryBatch(this: any, query: Query) {
136
+ const snapshot = await query.get();
137
+
138
+ const batchSize = snapshot.size;
139
+ if (batchSize === 0) {
140
+ return;
141
+ }
142
+
143
+ // Delete documents in a batch
144
+ const batch = this.DB.batch();
145
+ snapshot.docs.forEach((doc) => {
146
+ batch.delete(doc.ref);
147
+ });
148
+ await batch.commit();
149
+
150
+ process.nextTick(() => {
151
+ this.deleteQueryBatch(query);
152
+ });
153
+ };
154
+
155
+ export function fromFirestoreDoc<T extends FirestoreRecord>(
156
+ this: Constructor<T>, snapshot: DocumentSnapshot,
157
+ ): T | undefined;
158
+ export function fromFirestoreDoc(
159
+ this: any, snapshot: DocumentSnapshot,
160
+ ) {
161
+ const doc = this.from(
162
+ // Fixes non-native firebase types
163
+ mapValuesDeep(snapshot.data(), (i: any) => {
164
+ if (i instanceof Timestamp) {
165
+ return i.toDate();
166
+ }
167
+
168
+ return i;
169
+ })
170
+ );
171
+
172
+ Object.defineProperty(doc, '_ref', { value: snapshot.ref, enumerable: false });
173
+ Object.defineProperty(doc, '_id', { value: snapshot.id, enumerable: true });
174
+
175
+ return doc;
176
+ }
177
+ const defaultFireStore = new Firestore({
178
+ projectId: process.env.GCLOUD_PROJECT,
179
+ });
180
+ export class FirestoreRecord extends AutoCastable {
181
+ static collectionName?: string;
182
+ static OPS = FieldValue;
183
+ static DB = defaultFireStore;
184
+ static get COLLECTION() {
185
+ if (!this.collectionName) {
186
+ throw new Error('Not implemented');
187
+ }
188
+
189
+ return this.DB.collection(this.collectionName);
190
+ }
191
+
192
+ @Prop()
193
+ _id?: string;
194
+ _ref?: DocumentReference<Partial<Omit<this, '_ref' | '_id'>>>;
195
+
196
+ static fromFirestore = fromFirestore;
197
+ static fromFirestoreDoc = fromFirestoreDoc;
198
+ static fromFirestoreQuery = fromFirestoreQuery;
199
+
200
+ static save = setToFirestore;
201
+ static deleteQueryBatch = deleteQueryBatch;
202
+
203
+ [RPC_MARSHAL]() {
204
+ return {
205
+ ...this,
206
+ _id: this._id,
207
+ _ref: this._ref?.path
208
+ };
209
+ }
210
+
211
+ degradeForFireStore(): this {
212
+ return JSON.parse(JSON.stringify(this, function (k, v) {
213
+ if (k === '') {
214
+ return v;
215
+ }
216
+ if (typeof v === 'object' && v && (typeof v.degradeForFireStore === 'function')) {
217
+ return v.degradeForFireStore();
218
+ }
219
+
220
+ return v;
221
+ }));
222
+ }
223
+ }
jina-ai/src/lib/logger.ts ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AbstractPinoLogger } from 'civkit/pino-logger';
2
+ import { singleton, container } from 'tsyringe';
3
+ import { threadId } from 'node:worker_threads';
4
+ import { getTraceCtx } from 'civkit/async-context';
5
+
6
+
7
+ const levelToSeverityMap: { [k: string]: string | undefined; } = {
8
+ trace: 'DEFAULT',
9
+ debug: 'DEBUG',
10
+ info: 'INFO',
11
+ warn: 'WARNING',
12
+ error: 'ERROR',
13
+ fatal: 'CRITICAL',
14
+ };
15
+
16
+ @singleton()
17
+ export class GlobalLogger extends AbstractPinoLogger {
18
+ loggerOptions = {
19
+ level: 'debug',
20
+ base: {
21
+ tid: threadId,
22
+ }
23
+ };
24
+
25
+ override init(): void {
26
+ if (process.env['NODE_ENV']?.startsWith('prod')) {
27
+ super.init(process.stdout);
28
+ } else {
29
+ const PinoPretty = require('pino-pretty').PinoPretty;
30
+ super.init(PinoPretty({
31
+ singleLine: true,
32
+ colorize: true,
33
+ messageFormat(log: any, messageKey: any) {
34
+ return `${log['tid'] ? `[${log['tid']}]` : ''}[${log['service'] || 'ROOT'}] ${log[messageKey]}`;
35
+ },
36
+ }));
37
+ }
38
+
39
+
40
+ this.emit('ready');
41
+ }
42
+
43
+ override log(...args: any[]) {
44
+ const [levelObj, ...rest] = args;
45
+ const severity = levelToSeverityMap[levelObj?.level];
46
+ const traceCtx = getTraceCtx();
47
+ const patched: any= { ...levelObj, severity };
48
+ if (traceCtx?.traceId && process.env['GCLOUD_PROJECT']) {
49
+ patched['logging.googleapis.com/trace'] = `projects/${process.env['GCLOUD_PROJECT']}/traces/${traceCtx.traceId}`;
50
+ }
51
+ return super.log(patched, ...rest);
52
+ }
53
+ }
54
+
55
+ const instance = container.resolve(GlobalLogger);
56
+ export default instance;
jina-ai/src/lib/registry.ts ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ import { container } from 'tsyringe';
2
+ import { propertyInjectorFactory } from 'civkit/property-injector';
3
+
4
+ export const InjectProperty = propertyInjectorFactory(container);
jina-ai/src/patch-express.ts ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ApplicationError, Prop, RPC_CALL_ENVIRONMENT } from "civkit/civ-rpc";
2
+ import { marshalErrorLike } from "civkit/lang";
3
+ import { randomUUID } from "crypto";
4
+ import { once } from "events";
5
+ import type { NextFunction, Request, Response } from "express";
6
+
7
+ import { JinaEmbeddingsAuthDTO } from "./dto/jina-embeddings-auth";
8
+ import rateLimitControl, { API_CALL_STATUS, RateLimitDesc } from "./rate-limit";
9
+ import asyncLocalContext from "./lib/async-context";
10
+ import globalLogger from "./lib/logger";
11
+ import { InsufficientBalanceError } from "./lib/errors";
12
+ import { FirestoreRecord } from "./lib/firestore";
13
+ import cors from "cors";
14
+
15
+ globalLogger.serviceReady();
16
+ const logger = globalLogger.child({ service: 'JinaAISaaSMiddleware' });
17
+ const appName = 'DEEPRESEARCH';
18
+
19
+ export class KnowledgeItem extends FirestoreRecord {
20
+ static override collectionName = 'knowledgeItems';
21
+
22
+ @Prop({
23
+ required: true
24
+ })
25
+ traceId!: string;
26
+
27
+ @Prop({
28
+ required: true
29
+ })
30
+ uid!: string;
31
+
32
+ @Prop({
33
+ default: ''
34
+ })
35
+ question!: string;
36
+
37
+ @Prop({
38
+ default: ''
39
+ })
40
+ answer!: string;
41
+
42
+ @Prop({
43
+ default: ''
44
+ })
45
+ type!: string;
46
+
47
+ @Prop({
48
+ arrayOf: Object,
49
+ default: []
50
+ })
51
+ references!: any[];
52
+
53
+ @Prop({
54
+ defaultFactory: () => new Date()
55
+ })
56
+ createdAt!: Date;
57
+
58
+ @Prop({
59
+ defaultFactory: () => new Date()
60
+ })
61
+ updatedAt!: Date;
62
+ }
63
+ const corsMiddleware = cors();
64
+ export const jinaAiMiddleware = (req: Request, res: Response, next: NextFunction) => {
65
+ if (req.path === '/ping') {
66
+ res.status(200).end('pone');
67
+ return;
68
+ }
69
+ if (req.path.startsWith('/v1/models')) {
70
+ next();
71
+ return;
72
+ }
73
+ if (req.method !== 'POST' && req.method !== 'GET') {
74
+ next();
75
+ return;
76
+ }
77
+ asyncLocalContext.run(async () => {
78
+ const googleTraceId = req.get('x-cloud-trace-context')?.split('/')?.[0];
79
+ const ctx = asyncLocalContext.ctx;
80
+ ctx.traceId = req.get('x-request-id') || req.get('request-id') || googleTraceId || randomUUID();
81
+ ctx.traceT0 = new Date();
82
+ ctx.ip = req?.ip;
83
+
84
+ try {
85
+ const authDto = JinaEmbeddingsAuthDTO.from({
86
+ [RPC_CALL_ENVIRONMENT]: { req, res }
87
+ });
88
+
89
+ const user = await authDto.assertUser();
90
+ const uid = await authDto.assertUID();
91
+ if (!(user.wallet.total_balance > 0)) {
92
+ throw new InsufficientBalanceError(`Account balance not enough to run this query, please recharge.`);
93
+ }
94
+ await rateLimitControl.serviceReady();
95
+ const rateLimitPolicy = authDto.getRateLimits(appName) || [
96
+ parseInt(user.metadata?.speed_level) >= 2 ?
97
+ RateLimitDesc.from({
98
+ occurrence: 30,
99
+ periodSeconds: 60
100
+ }) :
101
+ RateLimitDesc.from({
102
+ occurrence: 10,
103
+ periodSeconds: 60
104
+ })
105
+ ];
106
+ const criterions = rateLimitPolicy.map((c) => rateLimitControl.rateLimitDescToCriterion(c));
107
+ await Promise.all(criterions.map(([pointInTime, n]) => rateLimitControl.assertUidPeriodicLimit(uid, pointInTime, n, appName)));
108
+
109
+ const apiRoll = rateLimitControl.record({ uid, tags: [appName] })
110
+ apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
111
+
112
+ const pResClose = once(res, 'close');
113
+
114
+ next();
115
+
116
+ await pResClose;
117
+ const chargeAmount = ctx.chargeAmount;
118
+ if (chargeAmount) {
119
+ authDto.reportUsage(chargeAmount, `reader-${appName}`).catch((err) => {
120
+ logger.warn(`Unable to report usage for ${uid}`, { err: marshalErrorLike(err) });
121
+ });
122
+ apiRoll.chargeAmount = chargeAmount;
123
+ }
124
+ apiRoll.status = res.statusCode === 200 ? API_CALL_STATUS.SUCCESS : API_CALL_STATUS.ERROR;
125
+ apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
126
+ logger.info(`HTTP ${res.statusCode} for request ${ctx.traceId} after ${Date.now() - ctx.traceT0.valueOf()}ms`, {
127
+ uid,
128
+ chargeAmount,
129
+ });
130
+
131
+ if (ctx.promptContext?.knowledge?.length) {
132
+ Promise.all(ctx.promptContext.knowledge.map((x: any) => KnowledgeItem.save(
133
+ KnowledgeItem.from({
134
+ ...x,
135
+ uid,
136
+ traceId: ctx.traceId,
137
+ })
138
+ ))).catch((err: any) => {
139
+ logger.warn(`Failed to save knowledge`, { err: marshalErrorLike(err) });
140
+ });
141
+ }
142
+
143
+ } catch (err: any) {
144
+ if (!res.headersSent) {
145
+ corsMiddleware(req, res, () => 'noop');
146
+ if (err instanceof ApplicationError) {
147
+ res.status(parseInt(err.code as string) || 500).json({ error: err.message });
148
+
149
+ return;
150
+ }
151
+
152
+ res.status(500).json({ error: 'Internal' });
153
+ }
154
+
155
+ logger.error(`Error in billing middleware`, { err: marshalErrorLike(err) });
156
+ if (err.stack) {
157
+ logger.error(err.stack);
158
+ }
159
+ }
160
+
161
+ });
162
+ }
jina-ai/src/rate-limit.ts ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { AutoCastable, ResourcePolicyDenyError, Also, Prop } from 'civkit/civ-rpc';
2
+ import { AsyncService } from 'civkit/async-service';
3
+ import { getTraceId } from 'civkit/async-context';
4
+ import { singleton, container } from 'tsyringe';
5
+
6
+ import { RateLimitTriggeredError } from './lib/errors';
7
+ import { FirestoreRecord } from './lib/firestore';
8
+ import { GlobalLogger } from './lib/logger';
9
+
10
+ export enum API_CALL_STATUS {
11
+ SUCCESS = 'success',
12
+ ERROR = 'error',
13
+ PENDING = 'pending',
14
+ }
15
+
16
+ @Also({ dictOf: Object })
17
+ export class APICall extends FirestoreRecord {
18
+ static override collectionName = 'apiRoll';
19
+
20
+ @Prop({
21
+ required: true,
22
+ defaultFactory: () => getTraceId()
23
+ })
24
+ traceId!: string;
25
+
26
+ @Prop()
27
+ uid?: string;
28
+
29
+ @Prop()
30
+ ip?: string;
31
+
32
+ @Prop({
33
+ arrayOf: String,
34
+ default: [],
35
+ })
36
+ tags!: string[];
37
+
38
+ @Prop({
39
+ required: true,
40
+ defaultFactory: () => new Date(),
41
+ })
42
+ createdAt!: Date;
43
+
44
+ @Prop()
45
+ completedAt?: Date;
46
+
47
+ @Prop({
48
+ required: true,
49
+ default: API_CALL_STATUS.PENDING,
50
+ })
51
+ status!: API_CALL_STATUS;
52
+
53
+ @Prop({
54
+ required: true,
55
+ defaultFactory: () => new Date(Date.now() + 1000 * 60 * 60 * 24 * 90),
56
+ })
57
+ expireAt!: Date;
58
+
59
+ [k: string]: any;
60
+
61
+ tag(...tags: string[]) {
62
+ for (const t of tags) {
63
+ if (!this.tags.includes(t)) {
64
+ this.tags.push(t);
65
+ }
66
+ }
67
+ }
68
+
69
+ save() {
70
+ return (this.constructor as typeof APICall).save(this);
71
+ }
72
+ }
73
+
74
+
75
+ export class RateLimitDesc extends AutoCastable {
76
+ @Prop({
77
+ default: 1000
78
+ })
79
+ occurrence!: number;
80
+
81
+ @Prop({
82
+ default: 3600
83
+ })
84
+ periodSeconds!: number;
85
+
86
+ @Prop()
87
+ notBefore?: Date;
88
+
89
+ @Prop()
90
+ notAfter?: Date;
91
+
92
+ isEffective() {
93
+ const now = new Date();
94
+ if (this.notBefore && this.notBefore > now) {
95
+ return false;
96
+ }
97
+ if (this.notAfter && this.notAfter < now) {
98
+ return false;
99
+ }
100
+
101
+ return true;
102
+ }
103
+ }
104
+
105
+
106
+ @singleton()
107
+ export class RateLimitControl extends AsyncService {
108
+
109
+ logger = this.globalLogger.child({ service: this.constructor.name });
110
+
111
+ constructor(
112
+ protected globalLogger: GlobalLogger,
113
+ ) {
114
+ super(...arguments);
115
+ }
116
+
117
+ override async init() {
118
+ await this.dependencyReady();
119
+
120
+ this.emit('ready');
121
+ }
122
+
123
+ async queryByUid(uid: string, pointInTime: Date, ...tags: string[]) {
124
+ let q = APICall.COLLECTION
125
+ .orderBy('createdAt', 'asc')
126
+ .where('createdAt', '>=', pointInTime)
127
+ .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
128
+ .where('uid', '==', uid);
129
+ if (tags.length) {
130
+ q = q.where('tags', 'array-contains-any', tags);
131
+ }
132
+
133
+ return APICall.fromFirestoreQuery(q);
134
+ }
135
+
136
+ async queryByIp(ip: string, pointInTime: Date, ...tags: string[]) {
137
+ let q = APICall.COLLECTION
138
+ .orderBy('createdAt', 'asc')
139
+ .where('createdAt', '>=', pointInTime)
140
+ .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
141
+ .where('ip', '==', ip);
142
+ if (tags.length) {
143
+ q = q.where('tags', 'array-contains-any', tags);
144
+ }
145
+
146
+ return APICall.fromFirestoreQuery(q);
147
+ }
148
+
149
+ async assertUidPeriodicLimit(uid: string, pointInTime: Date, limit: number, ...tags: string[]) {
150
+ if (limit <= 0) {
151
+ throw new ResourcePolicyDenyError(`This UID(${uid}) is not allowed to call this endpoint (rate limit quota is 0).`);
152
+ }
153
+
154
+ let q = APICall.COLLECTION
155
+ .orderBy('createdAt', 'asc')
156
+ .where('createdAt', '>=', pointInTime)
157
+ .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
158
+ .where('uid', '==', uid);
159
+ if (tags.length) {
160
+ q = q.where('tags', 'array-contains-any', tags);
161
+ }
162
+ const count = (await q.count().get()).data().count;
163
+
164
+ if (count >= limit) {
165
+ const r = await APICall.fromFirestoreQuery(q.limit(1));
166
+ const [r1] = r;
167
+
168
+ const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
169
+ const dtSec = Math.ceil(dtMs / 1000);
170
+
171
+ throw RateLimitTriggeredError.from({
172
+ message: `Per UID rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
173
+ retryAfter: dtSec,
174
+ });
175
+ }
176
+
177
+ return count + 1;
178
+ }
179
+
180
+ async assertIPPeriodicLimit(ip: string, pointInTime: Date, limit: number, ...tags: string[]) {
181
+ let q = APICall.COLLECTION
182
+ .orderBy('createdAt', 'asc')
183
+ .where('createdAt', '>=', pointInTime)
184
+ .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
185
+ .where('ip', '==', ip);
186
+ if (tags.length) {
187
+ q = q.where('tags', 'array-contains-any', tags);
188
+ }
189
+
190
+ const count = (await q.count().get()).data().count;
191
+
192
+ if (count >= limit) {
193
+ const r = await APICall.fromFirestoreQuery(q.limit(1));
194
+ const [r1] = r;
195
+
196
+ const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
197
+ const dtSec = Math.ceil(dtMs / 1000);
198
+
199
+ throw RateLimitTriggeredError.from({
200
+ message: `Per IP rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
201
+ retryAfter: dtSec,
202
+ });
203
+ }
204
+
205
+ return count + 1;
206
+ }
207
+
208
+ record(partialRecord: Partial<APICall>) {
209
+ const record = APICall.from(partialRecord);
210
+ const newId = APICall.COLLECTION.doc().id;
211
+ record._id = newId;
212
+
213
+ return record;
214
+ }
215
+
216
+ // async simpleRPCUidBasedLimit(rpcReflect: RPCReflection, uid: string, tags: string[] = [],
217
+ // ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
218
+ // const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
219
+
220
+ // await Promise.all(criterion.map(([pointInTime, n]) =>
221
+ // this.assertUidPeriodicLimit(uid, pointInTime, n, ...tags)));
222
+
223
+ // const r = this.record({
224
+ // uid,
225
+ // tags,
226
+ // });
227
+
228
+ // r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
229
+ // rpcReflect.then(() => {
230
+ // r.status = API_CALL_STATUS.SUCCESS;
231
+ // r.save()
232
+ // .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
233
+ // });
234
+ // rpcReflect.catch((err) => {
235
+ // r.status = API_CALL_STATUS.ERROR;
236
+ // r.error = err.toString();
237
+ // r.save()
238
+ // .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
239
+ // });
240
+
241
+ // return r;
242
+ // }
243
+
244
+ rateLimitDescToCriterion(rateLimitDesc: RateLimitDesc) {
245
+ return [new Date(Date.now() - rateLimitDesc.periodSeconds * 1000), rateLimitDesc.occurrence] as [Date, number];
246
+ }
247
+
248
+ // async simpleRpcIPBasedLimit(rpcReflect: RPCReflection, ip: string, tags: string[] = [],
249
+ // ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
250
+ // const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
251
+ // await Promise.all(criterion.map(([pointInTime, n]) =>
252
+ // this.assertIPPeriodicLimit(ip, pointInTime, n, ...tags)));
253
+
254
+ // const r = this.record({
255
+ // ip,
256
+ // tags,
257
+ // });
258
+
259
+ // r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
260
+ // rpcReflect.then(() => {
261
+ // r.status = API_CALL_STATUS.SUCCESS;
262
+ // r.save()
263
+ // .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
264
+ // });
265
+ // rpcReflect.catch((err) => {
266
+ // r.status = API_CALL_STATUS.ERROR;
267
+ // r.error = err.toString();
268
+ // r.save()
269
+ // .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
270
+ // });
271
+
272
+ // return r;
273
+ // }
274
+ }
275
+
276
+ const instance = container.resolve(RateLimitControl);
277
+
278
+ export default instance;
jina-ai/src/server.ts ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import 'reflect-metadata'
2
+ import express from 'express';
3
+ import { jinaAiMiddleware } from "./patch-express";
4
+ import { Server } from 'http';
5
+
6
+ const app = require('../..').default;
7
+
8
+ const rootApp = express();
9
+ rootApp.use(jinaAiMiddleware, app);
10
+
11
+
12
+ const port = process.env.PORT || 3000;
13
+
14
+ let server: Server | undefined;
15
+ // Export server startup function for better testing
16
+ export function startServer() {
17
+ return rootApp.listen(port, () => {
18
+ console.log(`Server running at http://localhost:${port}`);
19
+ });
20
+ }
21
+
22
+ // Start server if running directly
23
+ if (process.env.NODE_ENV !== 'test') {
24
+ server = startServer();
25
+ }
26
+
27
+ process.on('unhandledRejection', (_err) => `Is false alarm`);
28
+
29
+ process.on('uncaughtException', (err) => {
30
+ console.log('Uncaught exception', err);
31
+
32
+ // Looks like Firebase runtime does not handle error properly.
33
+ // Make sure to quit the process.
34
+ process.nextTick(() => process.exit(1));
35
+ console.error('Uncaught exception, process quit.');
36
+ throw err;
37
+ });
38
+
39
+ const sigHandler = (signal: string) => {
40
+ console.log(`Received ${signal}, exiting...`);
41
+ if (server && server.listening) {
42
+ console.log(`Shutting down gracefully...`);
43
+ console.log(`Waiting for the server to drain and close...`);
44
+ server.close((err) => {
45
+ if (err) {
46
+ console.error('Error while closing server', err);
47
+ return;
48
+ }
49
+ process.exit(0);
50
+ });
51
+ server.closeIdleConnections();
52
+ }
53
+
54
+ }
55
+ process.on('SIGTERM', sigHandler);
56
+ process.on('SIGINT', sigHandler);
jina-ai/tsconfig.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2020",
4
+ "module": "node16",
5
+ "outDir": "./dist",
6
+ "rootDir": "./src",
7
+ "sourceMap": true,
8
+ "esModuleInterop": true,
9
+ "skipLibCheck": true,
10
+ "forceConsistentCasingInFileNames": true,
11
+ "strict": true,
12
+ "experimentalDecorators": true,
13
+ "emitDecoratorMetadata": true,
14
+ "resolveJsonModule": true
15
+ }
16
+ }
17
+
package-lock.json ADDED
The diff for this file is too large to render. See raw diff
 
package.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "node-deepresearch",
3
+ "version": "1.0.0",
4
+ "main": "dist/app.js",
5
+ "files": [
6
+ "dist",
7
+ "README.md",
8
+ "LICENSE"
9
+ ],
10
+ "scripts": {
11
+ "build": "tsc",
12
+ "dev": "npx ts-node src/agent.ts",
13
+ "search": "npx ts-node src/test-duck.ts",
14
+ "rewrite": "npx ts-node src/tools/query-rewriter.ts",
15
+ "lint": "eslint . --ext .ts",
16
+ "lint:fix": "eslint . --ext .ts --fix",
17
+ "serve": "ts-node src/server.ts",
18
+ "start": "ts-node src/server.ts",
19
+ "eval": "ts-node src/evals/batch-evals.ts",
20
+ "test": "jest --testTimeout=30000",
21
+ "test:watch": "jest --watch",
22
+ "test:docker": "jest src/__tests__/docker.test.ts --testTimeout=300000"
23
+ },
24
+ "keywords": [],
25
+ "author": "Jina AI",
26
+ "license": "Apache-2.0",
27
+ "description": "",
28
+ "dependencies": {
29
+ "@ai-sdk/google": "^1.0.0",
30
+ "@ai-sdk/openai": "^1.1.9",
31
+ "ai": "^4.1.26",
32
+ "axios": "^1.7.9",
33
+ "commander": "^13.1.0",
34
+ "cors": "^2.8.5",
35
+ "dotenv": "^16.4.7",
36
+ "duck-duck-scrape": "^2.2.7",
37
+ "express": "^4.21.2",
38
+ "node-fetch": "^3.3.2",
39
+ "undici": "^7.3.0",
40
+ "zod": "^3.22.4",
41
+ "zod-to-json-schema": "^3.24.1"
42
+ },
43
+ "devDependencies": {
44
+ "@types/commander": "^2.12.0",
45
+ "@types/cors": "^2.8.17",
46
+ "@types/express": "^5.0.0",
47
+ "@types/jest": "^29.5.14",
48
+ "@types/node": "^22.10.10",
49
+ "@types/node-fetch": "^2.6.12",
50
+ "@types/supertest": "^6.0.2",
51
+ "@typescript-eslint/eslint-plugin": "^7.0.1",
52
+ "@typescript-eslint/parser": "^7.0.1",
53
+ "eslint": "^8.56.0",
54
+ "jest": "^29.7.0",
55
+ "supertest": "^7.0.0",
56
+ "ts-jest": "^29.2.5",
57
+ "ts-node": "^10.9.2",
58
+ "typescript": "^5.7.3"
59
+ },
60
+ "optionalDependencies": {
61
+ "@ai-sdk/google-vertex": "^2.1.12"
62
+ }
63
+ }
src/__tests__/agent.test.ts ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { getResponse } from '../agent';
2
+ import { generateObject } from 'ai';
3
+ import { search } from '../tools/jina-search';
4
+ import { readUrl } from '../tools/read';
5
+
6
+ // Mock external dependencies
7
+ jest.mock('ai', () => ({
8
+ generateObject: jest.fn()
9
+ }));
10
+
11
+ jest.mock('../tools/jina-search', () => ({
12
+ search: jest.fn()
13
+ }));
14
+
15
+ jest.mock('../tools/read', () => ({
16
+ readUrl: jest.fn()
17
+ }));
18
+
19
+ describe('getResponse', () => {
20
+ beforeEach(() => {
21
+ // Mock generateObject to return a valid response
22
+ (generateObject as jest.Mock).mockResolvedValue({
23
+ object: { action: 'answer', answer: 'mocked response', references: [], think: 'mocked thought' },
24
+ usage: { totalTokens: 100 }
25
+ });
26
+
27
+ // Mock search to return empty results
28
+ (search as jest.Mock).mockResolvedValue({
29
+ response: { data: [] }
30
+ });
31
+
32
+ // Mock readUrl to return empty content
33
+ (readUrl as jest.Mock).mockResolvedValue({
34
+ response: { data: { content: '', url: 'test-url' } },
35
+ tokens: 0
36
+ });
37
+ });
38
+
39
+ afterEach(() => {
40
+ jest.useRealTimers();
41
+ jest.clearAllMocks();
42
+ });
43
+
44
+ it('should handle search action', async () => {
45
+ const result = await getResponse('What is TypeScript?', 50000); // Increased token budget to handle real-world usage
46
+ expect(result.result.action).toBeDefined();
47
+ expect(result.context).toBeDefined();
48
+ expect(result.context.tokenTracker).toBeDefined();
49
+ expect(result.context.actionTracker).toBeDefined();
50
+ }, 30000);
51
+ });
src/__tests__/docker.test.ts ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { exec } from 'child_process';
2
+ import { promisify } from 'util';
3
+
4
+ const execAsync = promisify(exec);
5
+
6
+ describe('Docker build', () => {
7
+ jest.setTimeout(300000); // 5 minutes for build
8
+
9
+ it('should build Docker image successfully', async () => {
10
+ const { stderr } = await execAsync('docker build -t node-deepresearch-test .');
11
+ expect(stderr).not.toContain('error');
12
+ });
13
+
14
+ it('should start container and respond to health check', async () => {
15
+ // Start container with mock API keys
16
+ await execAsync(
17
+ 'docker run -d --name test-container -p 3001:3000 ' +
18
+ '-e GEMINI_API_KEY=mock_key ' +
19
+ '-e JINA_API_KEY=mock_key ' +
20
+ 'node-deepresearch-test'
21
+ );
22
+
23
+ // Wait for container to start
24
+ await new Promise(resolve => setTimeout(resolve, 5000));
25
+
26
+ try {
27
+ // Check if server responds
28
+ const { stdout } = await execAsync('curl -s http://localhost:3001/health');
29
+ expect(stdout).toContain('ok');
30
+ } finally {
31
+ // Cleanup
32
+ await execAsync('docker rm -f test-container').catch(console.error);
33
+ }
34
+ });
35
+
36
+ afterAll(async () => {
37
+ // Clean up any leftover containers
38
+ await execAsync('docker rm -f test-container').catch(() => {});
39
+ await execAsync('docker rmi node-deepresearch-test').catch(() => {});
40
+ });
41
+ });
src/__tests__/server.test.ts ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import request from 'supertest';
2
+ import { EventEmitter } from 'events';
3
+ import type { Express } from 'express';
4
+
5
+ const TEST_SECRET = 'test-secret';
6
+ let app: Express;
7
+
8
+ describe('/v1/chat/completions', () => {
9
+ jest.setTimeout(120000); // Increase timeout for all tests in this suite
10
+
11
+ beforeEach(async () => {
12
+ // Set up test environment
13
+ process.env.NODE_ENV = 'test';
14
+ process.env.LLM_PROVIDER = 'openai'; // Use OpenAI provider for tests
15
+ process.env.OPENAI_API_KEY = 'test-key';
16
+ process.env.JINA_API_KEY = 'test-key';
17
+
18
+ // Clean up any existing secret
19
+ const existingSecretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
20
+ if (existingSecretIndex !== -1) {
21
+ process.argv.splice(existingSecretIndex, 1);
22
+ }
23
+
24
+ // Set up test secret and import server module
25
+ process.argv.push(`--secret=${TEST_SECRET}`);
26
+
27
+ // Import server module (jest.resetModules() is called automatically before each test)
28
+ const { default: serverModule } = await require('../app');
29
+ app = serverModule;
30
+ });
31
+
32
+ afterEach(async () => {
33
+ // Clean up environment variables
34
+ delete process.env.OPENAI_API_KEY;
35
+ delete process.env.JINA_API_KEY;
36
+
37
+ // Clean up any remaining event listeners
38
+ const emitter = EventEmitter.prototype;
39
+ emitter.removeAllListeners();
40
+ emitter.setMaxListeners(emitter.getMaxListeners() + 1);
41
+
42
+ // Clean up test secret
43
+ const secretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
44
+ if (secretIndex !== -1) {
45
+ process.argv.splice(secretIndex, 1);
46
+ }
47
+
48
+ // Wait for any pending promises to settle
49
+ await new Promise(resolve => setTimeout(resolve, 500));
50
+
51
+ // Reset module cache to ensure clean state
52
+ jest.resetModules();
53
+ });
54
+ it('should require authentication when secret is set', async () => {
55
+ // Note: secret is already set in beforeEach
56
+
57
+ const response = await request(app)
58
+ .post('/v1/chat/completions')
59
+ .send({
60
+ model: 'test-model',
61
+ messages: [{ role: 'user', content: 'test' }]
62
+ });
63
+ expect(response.status).toBe(401);
64
+ });
65
+
66
+ it('should allow requests without auth when no secret is set', async () => {
67
+ // Remove secret for this test
68
+ const secretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
69
+ if (secretIndex !== -1) {
70
+ process.argv.splice(secretIndex, 1);
71
+ }
72
+
73
+ // Reset module cache to ensure clean state
74
+ jest.resetModules();
75
+
76
+ // Reload server module without secret
77
+ const { default: serverModule } = await require('../app');
78
+ app = serverModule;
79
+
80
+ const response = await request(app)
81
+ .post('/v1/chat/completions')
82
+ .send({
83
+ model: 'test-model',
84
+ messages: [{ role: 'user', content: 'test' }]
85
+ });
86
+ expect(response.status).toBe(200);
87
+ });
88
+
89
+ it('should reject requests without user message', async () => {
90
+ const response = await request(app)
91
+ .post('/v1/chat/completions')
92
+ .set('Authorization', `Bearer ${TEST_SECRET}`)
93
+ .send({
94
+ model: 'test-model',
95
+ messages: [{ role: 'developer', content: 'test' }]
96
+ });
97
+ expect(response.status).toBe(400);
98
+ expect(response.body.error).toBe('Last message must be from user');
99
+ });
100
+
101
+ it('should handle non-streaming request', async () => {
102
+ const response = await request(app)
103
+ .post('/v1/chat/completions')
104
+ .set('Authorization', `Bearer ${TEST_SECRET}`)
105
+ .send({
106
+ model: 'test-model',
107
+ messages: [{ role: 'user', content: 'test' }]
108
+ });
109
+ expect(response.status).toBe(200);
110
+ expect(response.body).toMatchObject({
111
+ object: 'chat.completion',
112
+ choices: [{
113
+ message: {
114
+ role: 'assistant'
115
+ }
116
+ }]
117
+ });
118
+ });
119
+
120
+ it('should handle streaming request and track tokens correctly', async () => {
121
+ return new Promise<void>((resolve, reject) => {
122
+ let isDone = false;
123
+ let totalCompletionTokens = 0;
124
+
125
+ const cleanup = () => {
126
+ clearTimeout(timeoutHandle);
127
+ isDone = true;
128
+ resolve();
129
+ };
130
+
131
+ const timeoutHandle = setTimeout(() => {
132
+ if (!isDone) {
133
+ cleanup();
134
+ reject(new Error('Test timed out'));
135
+ }
136
+ }, 30000);
137
+
138
+ request(app)
139
+ .post('/v1/chat/completions')
140
+ .set('Authorization', `Bearer ${TEST_SECRET}`)
141
+ .send({
142
+ model: 'test-model',
143
+ messages: [{ role: 'user', content: 'test' }],
144
+ stream: true
145
+ })
146
+ .buffer(true)
147
+ .parse((res, callback) => {
148
+ const response = res as unknown as {
149
+ on(event: 'data', listener: (chunk: Buffer) => void): void;
150
+ on(event: 'end', listener: () => void): void;
151
+ on(event: 'error', listener: (err: Error) => void): void;
152
+ };
153
+ let responseData = '';
154
+
155
+ response.on('error', (err) => {
156
+ cleanup();
157
+ callback(err, null);
158
+ });
159
+
160
+ response.on('data', (chunk) => {
161
+ responseData += chunk.toString();
162
+ });
163
+
164
+ response.on('end', () => {
165
+ try {
166
+ callback(null, responseData);
167
+ } catch (err) {
168
+ cleanup();
169
+ callback(err instanceof Error ? err : new Error(String(err)), null);
170
+ }
171
+ });
172
+ })
173
+ .end((err, res) => {
174
+ if (err) return reject(err);
175
+
176
+ expect(res.status).toBe(200);
177
+ expect(res.headers['content-type']).toBe('text/event-stream');
178
+
179
+ // Verify stream format and content
180
+ if (isDone) return; // Prevent multiple resolves
181
+
182
+ const responseText = res.body as string;
183
+ const chunks = responseText
184
+ .split('\n\n')
185
+ .filter((line: string) => line.startsWith('data: '))
186
+ .map((line: string) => JSON.parse(line.replace('data: ', '')));
187
+
188
+ // Process all chunks
189
+ expect(chunks.length).toBeGreaterThan(0);
190
+
191
+ // Verify initial chunk format
192
+ expect(chunks[0]).toMatchObject({
193
+ id: expect.any(String),
194
+ object: 'chat.completion.chunk',
195
+ choices: [{
196
+ index: 0,
197
+ delta: { role: 'assistant' },
198
+ logprobs: null,
199
+ finish_reason: null
200
+ }]
201
+ });
202
+
203
+ // Verify content chunks have content
204
+ chunks.slice(1).forEach(chunk => {
205
+ const content = chunk.choices[0].delta.content;
206
+ if (content && content.trim()) {
207
+ totalCompletionTokens += 1; // Count 1 token per chunk as per Vercel convention
208
+ }
209
+ expect(chunk).toMatchObject({
210
+ object: 'chat.completion.chunk',
211
+ choices: [{
212
+ delta: expect.objectContaining({
213
+ content: expect.any(String)
214
+ })
215
+ }]
216
+ });
217
+ });
218
+
219
+ // Verify final chunk format if present
220
+ const lastChunk = chunks[chunks.length - 1];
221
+ if (lastChunk?.choices?.[0]?.finish_reason === 'stop') {
222
+ expect(lastChunk).toMatchObject({
223
+ object: 'chat.completion.chunk',
224
+ choices: [{
225
+ delta: {},
226
+ finish_reason: 'stop'
227
+ }]
228
+ });
229
+ }
230
+
231
+ // Verify we tracked some completion tokens
232
+ expect(totalCompletionTokens).toBeGreaterThan(0);
233
+
234
+ // Clean up and resolve
235
+ if (!isDone) {
236
+ cleanup();
237
+ }
238
+ });
239
+ });
240
+ });
241
+
242
+ it('should track tokens correctly in error response', async () => {
243
+ const response = await request(app)
244
+ .post('/v1/chat/completions')
245
+ .set('Authorization', `Bearer ${TEST_SECRET}`)
246
+ .send({
247
+ model: 'test-model',
248
+ messages: [] // Invalid messages array
249
+ });
250
+
251
+ expect(response.status).toBe(400);
252
+ expect(response.body).toHaveProperty('error');
253
+ expect(response.body.error).toBe('Messages array is required and must not be empty');
254
+
255
+ // Make another request to verify token tracking after error
256
+ const validResponse = await request(app)
257
+ .post('/v1/chat/completions')
258
+ .set('Authorization', `Bearer ${TEST_SECRET}`)
259
+ .send({
260
+ model: 'test-model',
261
+ messages: [{ role: 'user', content: 'test' }]
262
+ });
263
+
264
+ // Verify token tracking still works after error
265
+ expect(validResponse.body.usage).toMatchObject({
266
+ prompt_tokens: expect.any(Number),
267
+ completion_tokens: expect.any(Number),
268
+ total_tokens: expect.any(Number)
269
+ });
270
+
271
+ // Basic token tracking structure should be present
272
+ expect(validResponse.body.usage.total_tokens).toBe(
273
+ validResponse.body.usage.prompt_tokens + validResponse.body.usage.completion_tokens
274
+ );
275
+ });
276
+
277
+ it('should provide token usage in Vercel AI SDK format', async () => {
278
+ const response = await request(app)
279
+ .post('/v1/chat/completions')
280
+ .set('Authorization', `Bearer ${TEST_SECRET}`)
281
+ .send({
282
+ model: 'test-model',
283
+ messages: [{ role: 'user', content: 'test' }]
284
+ });
285
+
286
+ expect(response.status).toBe(200);
287
+ const usage = response.body.usage;
288
+
289
+ expect(usage).toMatchObject({
290
+ prompt_tokens: expect.any(Number),
291
+ completion_tokens: expect.any(Number),
292
+ total_tokens: expect.any(Number)
293
+ });
294
+
295
+ // Basic token tracking structure should be present
296
+ expect(usage.total_tokens).toBe(
297
+ usage.prompt_tokens + usage.completion_tokens
298
+ );
299
+ });
300
+ });
src/agent.ts ADDED
@@ -0,0 +1,774 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {z, ZodObject} from 'zod';
2
+ import {CoreAssistantMessage, CoreUserMessage} from 'ai';
3
+ import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
4
+ import {readUrl, removeAllLineBreaks} from "./tools/read";
5
+ import fs from 'fs/promises';
6
+ import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
7
+ import {braveSearch} from "./tools/brave-search";
8
+ import {rewriteQuery} from "./tools/query-rewriter";
9
+ import {dedupQueries} from "./tools/jina-dedup";
10
+ import {evaluateAnswer, evaluateQuestion} from "./tools/evaluator";
11
+ import {analyzeSteps} from "./tools/error-analyzer";
12
+ import {TokenTracker} from "./utils/token-tracker";
13
+ import {ActionTracker} from "./utils/action-tracker";
14
+ import {StepAction, AnswerAction, KnowledgeItem, EvaluationCriteria} from "./types";
15
+ import {TrackerContext} from "./types";
16
+ import {search} from "./tools/jina-search";
17
+ // import {grounding} from "./tools/grounding";
18
+ import {zodToJsonSchema} from "zod-to-json-schema";
19
+ import {ObjectGeneratorSafe} from "./utils/safe-generator";
20
+
21
+ async function sleep(ms: number) {
22
+ const seconds = Math.ceil(ms / 1000);
23
+ console.log(`Waiting ${seconds}s...`);
24
+ return new Promise(resolve => setTimeout(resolve, ms));
25
+ }
26
+
27
+ function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, languageStyle: string = 'same language as the question') {
28
+ const actions: string[] = [];
29
+ const properties: Record<string, z.ZodTypeAny> = {
30
+ action: z.enum(['placeholder']), // Will update later with actual actions
31
+ think: z.string().describe("Explain why choose this action, what's the thought process behind choosing this action")
32
+ };
33
+
34
+ if (allowSearch) {
35
+ actions.push("search");
36
+ properties.searchQuery = z.string().max(30)
37
+ .describe("Required when action='search'. Must be a short, keyword-based query that BM25, tf-idf based search engines can understand. Existing queries must be avoided").optional();
38
+ }
39
+
40
+ if (allowAnswer) {
41
+ actions.push("answer");
42
+ properties.answer = z.string()
43
+ .describe(`Required when action='answer'. Must in ${languageStyle}`).optional();
44
+ properties.references = z.array(
45
+ z.object({
46
+ exactQuote: z.string().describe("Exact relevant quote from the document"),
47
+ url: z.string().describe("source URL; must be directly from the context")
48
+ }).required()
49
+ ).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document").optional();
50
+ }
51
+
52
+ if (allowReflect) {
53
+ actions.push("reflect");
54
+ properties.questionsToAnswer = z.array(
55
+ z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
56
+ ).max(2)
57
+ .describe("Required when action='reflect'. List of most important questions to fill the knowledge gaps of finding the answer to the original question").optional();
58
+ }
59
+
60
+ if (allowRead) {
61
+ actions.push("visit");
62
+ properties.URLTargets = z.array(z.string())
63
+ .max(2)
64
+ .describe("Required when action='visit'. Must be an array of URLs, choose up the most relevant 2 URLs to visit").optional();
65
+ }
66
+
67
+ // Update the enum values after collecting all actions
68
+ properties.action = z.enum(actions as [string, ...string[]])
69
+ .describe("Must match exactly one action type");
70
+
71
+ return z.object(properties);
72
+
73
+ }
74
+
75
+
76
+ function getPrompt(
77
+ question: string,
78
+ context?: string[],
79
+ allQuestions?: string[],
80
+ allKeywords?: string[],
81
+ allowReflect: boolean = true,
82
+ allowAnswer: boolean = true,
83
+ allowRead: boolean = true,
84
+ allowSearch: boolean = true,
85
+ badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
86
+ knowledge?: KnowledgeItem[],
87
+ allURLs?: Record<string, string>,
88
+ beastMode?: boolean,
89
+ languageStyle?: string
90
+ ): string {
91
+ const sections: string[] = [];
92
+ const actionSections: string[] = [];
93
+
94
+ // Add header section
95
+ sections.push(`Current date: ${new Date().toUTCString()}
96
+
97
+ You are an advanced AI research agent from Jina AI. You are specialized in multistep reasoning. Using your training data and prior lessons learned, answer the following question with absolute certainty:
98
+
99
+ <question>
100
+ ${question}
101
+ </question>
102
+ `);
103
+
104
+ // Add context section if exists
105
+ if (context?.length) {
106
+ sections.push(`
107
+ You have conducted the following actions:
108
+ <context>
109
+ ${context.join('\n')}
110
+
111
+ </context>
112
+ `);
113
+ }
114
+
115
+ // Add knowledge section if exists
116
+ if (knowledge?.length) {
117
+ const knowledgeItems = knowledge
118
+ .map((k, i) => `
119
+ <knowledge-${i + 1}>
120
+ <question>
121
+ ${k.question}
122
+ </question>
123
+ <answer>
124
+ ${k.answer}
125
+ </answer>
126
+ ${k.references ? `
127
+ <references>
128
+ ${JSON.stringify(k.references)}
129
+ </references>
130
+ ` : ''}
131
+ </knowledge-${i + 1}>
132
+ `)
133
+ .join('\n\n');
134
+
135
+ sections.push(`
136
+ You have successfully gathered some knowledge which might be useful for answering the original question. Here is the knowledge you have gathered so far:
137
+ <knowledge>
138
+
139
+ ${knowledgeItems}
140
+
141
+ </knowledge>
142
+ `);
143
+ }
144
+
145
+ // Add bad context section if exists
146
+ if (badContext?.length) {
147
+ const attempts = badContext
148
+ .map((c, i) => `
149
+ <attempt-${i + 1}>
150
+ - Question: ${c.question}
151
+ - Answer: ${c.answer}
152
+ - Reject Reason: ${c.evaluation}
153
+ - Actions Recap: ${c.recap}
154
+ - Actions Blame: ${c.blame}
155
+ </attempt-${i + 1}>
156
+ `)
157
+ .join('\n\n');
158
+
159
+ const learnedStrategy = badContext.map(c => c.improvement).join('\n');
160
+
161
+ sections.push(`
162
+ Your have tried the following actions but failed to find the answer to the question:
163
+ <bad-attempts>
164
+
165
+ ${attempts}
166
+
167
+ </bad-attempts>
168
+
169
+ Based on the failed attempts, you have learned the following strategy:
170
+ <learned-strategy>
171
+ ${learnedStrategy}
172
+ </learned-strategy>
173
+ `);
174
+ }
175
+
176
+ // Build actions section
177
+
178
+ if (allowRead) {
179
+ let urlList = '';
180
+ if (allURLs && Object.keys(allURLs).length > 0) {
181
+ urlList = Object.entries(allURLs)
182
+ .map(([url, desc]) => ` + "${url}": "${desc}"`)
183
+ .join('\n');
184
+ }
185
+
186
+ actionSections.push(`
187
+ <action-visit>
188
+ - This allows you to access the full content behind any URLs.
189
+ - If the <question> contains a URL, you must visit the URL to gather more information.
190
+ ${urlList ? `
191
+ - Visit any URLs from below to gather external knowledge, choose the most relevant URLs that might contain the answer
192
+ <url-list>
193
+ ${urlList}
194
+ </url-list>
195
+ `.trim() : ''}
196
+ </action-visit>
197
+ `);
198
+ }
199
+
200
+ if (allowSearch) {
201
+
202
+ actionSections.push(`
203
+ <action-search>
204
+ ${allKeywords?.length ? `
205
+ - Avoid the searched queries below as they do not give any useful information, you need to think out of the box and propose queries from a completely different angle:
206
+ <bad-queries>
207
+ ${allKeywords.join('\n')}
208
+ </bad-queries>
209
+ `.trim() : ''}
210
+ - Propose some unique new queries that might help you find the answer to the question
211
+ - Focus on solving one specific aspect of the original question
212
+ - Only use keywords, not full sentences
213
+ </action-search>
214
+ `);
215
+ }
216
+
217
+ if (allowAnswer) {
218
+ actionSections.push(`
219
+ <action-answer>
220
+ - If <question> is a simple greeting, chit-chat, or general knowledge, provide the answer directly;
221
+ - Must provide "references" and each must specify "exactQuote" and "url";
222
+ - In the answer, use markdown footnote syntax like [^1], [^2] to refer to the references;
223
+ - Responses must be definitive (no ambiguity, uncertainty, or disclaimers) and in the style of ${languageStyle};
224
+ - Provide final response only when 100% certain;${allowReflect ? '\n- If doubts remain, use <action-reflect> instead' : ''}
225
+ </action-answer>
226
+ `);
227
+ }
228
+
229
+ if (beastMode) {
230
+ actionSections.push(`
231
+ <action-answer>
232
+ - Any answer is better than no answer
233
+ - Partial answers are allowed, but make sure they are based on the context and knowledge you have gathered
234
+ - When uncertain, educated guess based on the context and knowledge is allowed and encouraged.
235
+ - Responses must be definitive (no ambiguity, uncertainty, or disclaimers)
236
+ </action-answer>
237
+ `);
238
+ }
239
+
240
+ if (allowReflect) {
241
+ actionSections.push(`
242
+ <action-reflect>
243
+ - Perform critical analysis through hypothetical scenarios or systematic breakdowns
244
+ - Identify knowledge gaps and formulate essential clarifying questions
245
+ - Questions must be:
246
+ - Original (not variations of existing questions)
247
+ - Focused on single concepts
248
+ - Under 20 words
249
+ - Non-compound/non-complex
250
+ </action-reflect>
251
+ `);
252
+ }
253
+
254
+ sections.push(`
255
+ Based on the current context, you must choose one of the following actions:
256
+ <actions>
257
+ ${actionSections.join('\n\n')}
258
+ </actions>
259
+ `);
260
+
261
+ // Add footer
262
+ sections.push(`Respond exclusively in valid JSON format matching exact JSON schema.
263
+
264
+ Critical Requirements:
265
+ - Include ONLY ONE action type
266
+ - Never add unsupported keys
267
+ - Exclude all non-JSON text, markdown, or explanations
268
+ - Maintain strict JSON syntax`);
269
+
270
+ return removeExtraLineBreaks(sections.join('\n\n'));
271
+ }
272
+
273
+ const removeExtraLineBreaks = (text: string) => {
274
+ return text.replace(/\n{2,}/gm, '\n\n');
275
+ }
276
+
277
+ const allContext: StepAction[] = []; // all steps in the current session, including those leads to wrong results
278
+
279
+ function updateContext(step: any) {
280
+ allContext.push(step)
281
+ }
282
+
283
+
284
+ function removeHTMLtags(text: string) {
285
+ return text.replace(/<[^>]*>?/gm, '');
286
+ }
287
+
288
+
289
+ export async function getResponse(question: string,
290
+ tokenBudget: number = 1_000_000,
291
+ maxBadAttempts: number = 3,
292
+ existingContext?: Partial<TrackerContext>,
293
+ historyMessages?: Array<CoreAssistantMessage | CoreUserMessage>
294
+ ): Promise<{ result: StepAction; context: TrackerContext }> {
295
+ const context: TrackerContext = {
296
+ tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
297
+ actionTracker: existingContext?.actionTracker || new ActionTracker()
298
+ };
299
+ let step = 0;
300
+ let totalStep = 0;
301
+ let badAttempts = 0;
302
+ let schema: ZodObject<any> = getSchema(true, true, true, true)
303
+ question = question.trim()
304
+ const gaps: string[] = [question]; // All questions to be answered including the orginal question
305
+ const allQuestions = [question];
306
+ const allKeywords = [];
307
+ const allKnowledge: KnowledgeItem[] = []; // knowledge are intermedidate questions that are answered
308
+ // iterate over historyMessages
309
+ // if role is user and content is question, add to allQuestions, the next assistant content should be the answer
310
+ // put this pair to the allKnowledge
311
+ historyMessages?.forEach((message, i) => {
312
+ if (message.role === 'user' && message.content && historyMessages[i + 1]?.role === 'assistant') {
313
+ allQuestions.push(message.content as string)
314
+ allKnowledge.push({
315
+ question: message.content as string,
316
+ answer: (historyMessages[i + 1]?.content || '') as string,
317
+ type: 'chat-history',
318
+ updated: new Date().toISOString()
319
+ });
320
+ }
321
+ })
322
+
323
+ const badContext = [];
324
+ let diaryContext = [];
325
+ let allowAnswer = true;
326
+ let allowSearch = true;
327
+ let allowRead = true;
328
+ let allowReflect = true;
329
+ let prompt = '';
330
+ let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};
331
+
332
+ const allURLs: Record<string, string> = {};
333
+ const visitedURLs: string[] = [];
334
+ const evaluationMetrics: Record<string, EvaluationCriteria> = {};
335
+ while (context.tokenTracker.getTotalUsage().totalTokens < tokenBudget && badAttempts <= maxBadAttempts) {
336
+ // add 1s delay to avoid rate limiting
337
+ await sleep(STEP_SLEEP);
338
+ step++;
339
+ totalStep++;
340
+ const budgetPercentage = (context.tokenTracker.getTotalUsage().totalTokens / tokenBudget * 100).toFixed(2);
341
+ console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
342
+ console.log('Gaps:', gaps);
343
+ allowReflect = allowReflect && (gaps.length <= 1);
344
+ const currentQuestion = gaps.length > 0 ? gaps.shift()! : question
345
+ if (!evaluationMetrics[currentQuestion]) {
346
+ evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
347
+ }
348
+
349
+ // update all urls with buildURLMap
350
+ // allowRead = allowRead && (Object.keys(allURLs).length > 0);
351
+ allowSearch = allowSearch && (Object.keys(allURLs).length < 50); // disable search when too many urls already
352
+
353
+ // generate prompt for this step
354
+ prompt = getPrompt(
355
+ currentQuestion,
356
+ diaryContext,
357
+ allQuestions,
358
+ allKeywords,
359
+ allowReflect,
360
+ allowAnswer,
361
+ allowRead,
362
+ allowSearch,
363
+ badContext,
364
+ allKnowledge,
365
+ allURLs,
366
+ false,
367
+ evaluationMetrics[currentQuestion].languageStyle
368
+ );
369
+ schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch,
370
+ evaluationMetrics[currentQuestion].languageStyle)
371
+ const generator = new ObjectGeneratorSafe(context.tokenTracker);
372
+ const result = await generator.generateObject({
373
+ model: 'agent',
374
+ schema,
375
+ prompt,
376
+ });
377
+ thisStep = result.object as StepAction;
378
+ // print allowed and chose action
379
+ const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
380
+ console.log(`${thisStep.action} <- [${actionsStr}]`);
381
+ console.log(thisStep)
382
+
383
+ context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
384
+
385
+ // reset allowAnswer to true
386
+ allowAnswer = true;
387
+ allowReflect = true;
388
+ allowRead = true;
389
+ allowSearch = true;
390
+
391
+ // execute the step and action
392
+ if (thisStep.action === 'answer') {
393
+ if (step === 1) {
394
+ // LLM is so confident and answer immediately, skip all evaluations
395
+ thisStep.isFinal = true;
396
+ break
397
+ }
398
+
399
+ updateContext({
400
+ totalStep,
401
+ question: currentQuestion,
402
+ ...thisStep,
403
+ });
404
+
405
+ const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
406
+ evaluationMetrics[currentQuestion], context.tokenTracker);
407
+
408
+ if (currentQuestion.trim() === question) {
409
+ if (evaluation.pass) {
410
+ diaryContext.push(`
411
+ At step ${step}, you took **answer** action and finally found the answer to the original question:
412
+
413
+ Original question:
414
+ ${currentQuestion}
415
+
416
+ Your answer:
417
+ ${thisStep.answer}
418
+
419
+ The evaluator thinks your answer is good because:
420
+ ${evaluation.think}
421
+
422
+ Your journey ends here. You have successfully answered the original question. Congratulations! 🎉
423
+ `);
424
+ thisStep.isFinal = true;
425
+ break
426
+ } else {
427
+ if (badAttempts >= maxBadAttempts) {
428
+ thisStep.isFinal = false;
429
+ break
430
+ } else {
431
+ diaryContext.push(`
432
+ At step ${step}, you took **answer** action but evaluator thinks it is not a good answer:
433
+
434
+ Original question:
435
+ ${currentQuestion}
436
+
437
+ Your answer:
438
+ ${thisStep.answer}
439
+
440
+ The evaluator thinks your answer is bad because:
441
+ ${evaluation.think}
442
+ `);
443
+ // store the bad context and reset the diary context
444
+ const {response: errorAnalysis} = await analyzeSteps(diaryContext, context.tokenTracker);
445
+
446
+ allKnowledge.push({
447
+ question: currentQuestion,
448
+ answer: thisStep.answer,
449
+ references: thisStep.references,
450
+ type: 'qa',
451
+ updated: new Date().toISOString()
452
+ });
453
+
454
+ badContext.push({
455
+ question: currentQuestion,
456
+ answer: thisStep.answer,
457
+ evaluation: evaluation.think,
458
+ ...errorAnalysis
459
+ });
460
+
461
+ if (errorAnalysis.questionsToAnswer) {
462
+ // reranker? maybe
463
+ gaps.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
464
+ allQuestions.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
465
+ gaps.push(question); // always keep the original question in the gaps
466
+ }
467
+
468
+ badAttempts++;
469
+ allowAnswer = false; // disable answer action in the immediate next step
470
+ diaryContext = [];
471
+ step = 0;
472
+ }
473
+ }
474
+ } else if (evaluation.pass) {
475
+ diaryContext.push(`
476
+ At step ${step}, you took **answer** action. You found a good answer to the sub-question:
477
+
478
+ Sub-question:
479
+ ${currentQuestion}
480
+
481
+ Your answer:
482
+ ${thisStep.answer}
483
+
484
+ The evaluator thinks your answer is good because:
485
+ ${evaluation.think}
486
+
487
+ Although you solved a sub-question, you still need to find the answer to the original question. You need to keep going.
488
+ `);
489
+ allKnowledge.push({
490
+ question: currentQuestion,
491
+ answer: thisStep.answer,
492
+ references: thisStep.references,
493
+ type: 'qa',
494
+ updated: new Date().toISOString()
495
+ });
496
+ }
497
+ } else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) {
498
+ let newGapQuestions = thisStep.questionsToAnswer
499
+ const oldQuestions = newGapQuestions;
500
+ newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions, context.tokenTracker)).unique_queries;
501
+ if (newGapQuestions.length > 0) {
502
+ // found new gap questions
503
+ diaryContext.push(`
504
+ At step ${step}, you took **reflect** and think about the knowledge gaps. You found some sub-questions are important to the question: "${currentQuestion}"
505
+ You realize you need to know the answers to the following sub-questions:
506
+ ${newGapQuestions.map((q: string) => `- ${q}`).join('\n')}
507
+
508
+ You will now figure out the answers to these sub-questions and see if they can help you find the answer to the original question.
509
+ `);
510
+ gaps.push(...newGapQuestions.slice(0, 2));
511
+ allQuestions.push(...newGapQuestions.slice(0, 2));
512
+ gaps.push(question); // always keep the original question in the gaps
513
+ } else {
514
+ diaryContext.push(`
515
+ At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')}
516
+ But then you realized you have asked them before. You decided to to think out of the box or cut from a completely different angle.
517
+ `);
518
+ updateContext({
519
+ totalStep,
520
+ ...thisStep,
521
+ result: 'You have tried all possible questions and found no useful information. You must think out of the box or different angle!!!'
522
+ });
523
+
524
+ allowReflect = false;
525
+ }
526
+ } else if (thisStep.action === 'search' && thisStep.searchQuery) {
527
+ // rewrite queries
528
+ let {queries: keywordsQueries} = await rewriteQuery(thisStep, context.tokenTracker);
529
+
530
+ const oldKeywords = keywordsQueries;
531
+ // avoid exisitng searched queries
532
+ const {unique_queries: dedupedQueries} = await dedupQueries(keywordsQueries, allKeywords, context.tokenTracker);
533
+ keywordsQueries = dedupedQueries;
534
+
535
+ if (keywordsQueries.length > 0) {
536
+ // let googleGrounded = '';
537
+ const searchResults = [];
538
+ context.actionTracker.trackThink(`Let me search for "${keywordsQueries.join(', ')}" to gather more information.`)
539
+ for (const query of keywordsQueries) {
540
+ console.log(`Search query: ${query}`);
541
+
542
+ let results;
543
+
544
+ switch (SEARCH_PROVIDER) {
545
+ case 'jina':
546
+ // use jinaSearch
547
+ results = {results: (await search(query, context.tokenTracker)).response?.data || []};
548
+ // if (LLM_PROVIDER === 'gemini') {
549
+ // googleGrounded = await grounding(query, context.tokenTracker);
550
+ // }
551
+ break;
552
+ case 'duck':
553
+ results = await duckSearch(query, {safeSearch: SafeSearchType.STRICT});
554
+ break;
555
+ case 'brave':
556
+ try {
557
+ const {response} = await braveSearch(query);
558
+ results = {
559
+ results: response.web?.results?.map(r => ({
560
+ title: r.title,
561
+ url: r.url,
562
+ description: r.description
563
+ })) || []
564
+ };
565
+ } catch (error) {
566
+ console.error('Brave search failed:', error);
567
+ results = {results: []};
568
+ }
569
+ await sleep(STEP_SLEEP)
570
+ break;
571
+ default:
572
+ results = {results: []};
573
+ }
574
+ const minResults = results.results.map(r => ({
575
+ title: r.title,
576
+ url: r.url,
577
+ description: r.description
578
+ }));
579
+
580
+ Object.assign(allURLs, Object.fromEntries(
581
+ minResults.map(r => [r.url, r.title])
582
+ ));
583
+ searchResults.push({query, results: minResults});
584
+ allKeywords.push(query);
585
+ }
586
+
587
+ allKnowledge.push({
588
+ question: `What do Internet say about ${thisStep.searchQuery}?`,
589
+ answer: removeHTMLtags(searchResults.map(r => r.results.map(r => r.description).join('; ')).join('; ')),
590
+ // answer: googleGrounded + removeHTMLtags(searchResults.map(r => r.results.map(r => r.description).join('; ')).join('; ')),
591
+ type: 'side-info',
592
+ updated: new Date().toISOString()
593
+ });
594
+
595
+ diaryContext.push(`
596
+ At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
597
+ In particular, you tried to search for the following keywords: "${keywordsQueries.join(', ')}".
598
+ You found quite some information and add them to your URL list and **visit** them later when needed.
599
+ `);
600
+
601
+ updateContext({
602
+ totalStep,
603
+ question: currentQuestion,
604
+ ...thisStep,
605
+ result: searchResults
606
+ });
607
+ } else {
608
+ diaryContext.push(`
609
+ At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
610
+ In particular, you tried to search for the following keywords: ${oldKeywords.join(', ')}.
611
+ But then you realized you have already searched for these keywords before.
612
+ You decided to think out of the box or cut from a completely different angle.
613
+ `);
614
+
615
+
616
+ updateContext({
617
+ totalStep,
618
+ ...thisStep,
619
+ result: 'You have tried all possible queries and found no new information. You must think out of the box or different angle!!!'
620
+ });
621
+
622
+ allowSearch = false;
623
+ }
624
+ } else if (thisStep.action === 'visit' && thisStep.URLTargets?.length) {
625
+
626
+ let uniqueURLs = thisStep.URLTargets;
627
+ if (visitedURLs.length > 0) {
628
+ // check duplicate urls
629
+ uniqueURLs = uniqueURLs.filter((url: string) => !visitedURLs.includes(url));
630
+ }
631
+
632
+ if (uniqueURLs.length > 0) {
633
+ context.actionTracker.trackThink(`Let me read ${uniqueURLs.join(', ')} to gather more information.`);
634
+ const urlResults = await Promise.all(
635
+ uniqueURLs.map(async (url: string) => {
636
+ try {
637
+ const {response} = await readUrl(url, context.tokenTracker);
638
+ allKnowledge.push({
639
+ question: `What is in ${response.data?.url || 'the URL'}?`,
640
+ answer: removeAllLineBreaks(response.data?.content || 'No content available'),
641
+ references: [response.data?.url],
642
+ type: 'url',
643
+ updated: new Date().toISOString()
644
+ });
645
+ visitedURLs.push(url);
646
+ delete allURLs[url];
647
+ return {url, result: response};
648
+ } catch (error) {
649
+ console.error('Error reading URL:', error);
650
+ }
651
+ })
652
+ );
653
+ diaryContext.push(`
654
+ At step ${step}, you took the **visit** action and deep dive into the following URLs:
655
+ ${urlResults.map(r => r?.url).join('\n')}
656
+ You found some useful information on the web and add them to your knowledge for future reference.
657
+ `);
658
+ updateContext({
659
+ totalStep,
660
+ question: currentQuestion,
661
+ ...thisStep,
662
+ result: urlResults
663
+ });
664
+ } else {
665
+
666
+ diaryContext.push(`
667
+ At step ${step}, you took the **visit** action and try to visit the following URLs:
668
+ ${thisStep.URLTargets.join('\n')}
669
+ But then you realized you have already visited these URLs and you already know very well about their contents.
670
+
671
+ You decided to think out of the box or cut from a completely different angle.`);
672
+
673
+ updateContext({
674
+ totalStep,
675
+ ...thisStep,
676
+ result: 'You have visited all possible URLs and found no new information. You must think out of the box or different angle!!!'
677
+ });
678
+
679
+ allowRead = false;
680
+ }
681
+ }
682
+
683
+ await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
684
+ }
685
+
686
+ await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
687
+ if (!(thisStep as AnswerAction).isFinal) {
688
+ console.log('Enter Beast mode!!!')
689
+ // any answer is better than no answer, humanity last resort
690
+ step++;
691
+ totalStep++;
692
+ const prompt = getPrompt(
693
+ question,
694
+ diaryContext,
695
+ allQuestions,
696
+ allKeywords,
697
+ false,
698
+ false,
699
+ false,
700
+ false,
701
+ badContext,
702
+ allKnowledge,
703
+ allURLs,
704
+ true,
705
+ evaluationMetrics[question]?.languageStyle || 'same language as the question'
706
+ );
707
+
708
+ schema = getSchema(false, false, true, false,
709
+ evaluationMetrics[question]?.languageStyle || 'same language as the question');
710
+ const generator = new ObjectGeneratorSafe(context.tokenTracker);
711
+ const result = await generator.generateObject({
712
+ model: 'agentBeastMode',
713
+ schema,
714
+ prompt,
715
+ });
716
+ thisStep = result.object as AnswerAction;
717
+ (thisStep as AnswerAction).isFinal = true;
718
+ context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
719
+ }
720
+ console.log(thisStep)
721
+
722
+ await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
723
+ return {result: thisStep, context};
724
+
725
+ }
726
+
727
+ async function storeContext(prompt: string, schema: any, memory: any[][], step: number) {
728
+ if ((process as any).asyncLocalContext?.available?.()) {
729
+ const [context, keywords, questions, knowledge] = memory;
730
+ (process as any).asyncLocalContext.ctx.promptContext = {
731
+ prompt,
732
+ schema,
733
+ context,
734
+ keywords,
735
+ questions,
736
+ knowledge,
737
+ step
738
+ };
739
+ return;
740
+ }
741
+
742
+ try {
743
+ await fs.writeFile(`prompt-${step}.txt`, `
744
+ Prompt:
745
+ ${prompt}
746
+
747
+ JSONSchema:
748
+ ${JSON.stringify(zodToJsonSchema(schema), null, 2)}
749
+ `);
750
+ const [context, keywords, questions, knowledge] = memory;
751
+ await fs.writeFile('context.json', JSON.stringify(context, null, 2));
752
+ await fs.writeFile('queries.json', JSON.stringify(keywords, null, 2));
753
+ await fs.writeFile('questions.json', JSON.stringify(questions, null, 2));
754
+ await fs.writeFile('knowledge.json', JSON.stringify(knowledge, null, 2));
755
+ } catch (error) {
756
+ console.error('Context storage failed:', error);
757
+ }
758
+ }
759
+
760
+
761
+ export async function main() {
762
+ const question = process.argv[2] || "";
763
+ const {
764
+ result: finalStep,
765
+ context: tracker
766
+ } = await getResponse(question) as { result: AnswerAction; context: TrackerContext };
767
+ console.log('Final Answer:', finalStep.answer);
768
+
769
+ tracker.tokenTracker.printSummary();
770
+ }
771
+
772
+ if (require.main === module) {
773
+ main().catch(console.error);
774
+ }
src/app.ts ADDED
@@ -0,0 +1,646 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import express, {Request, Response, RequestHandler} from 'express';
2
+ import cors from 'cors';
3
+ import {getResponse} from './agent';
4
+ import {
5
+ TrackerContext,
6
+ ChatCompletionRequest,
7
+ ChatCompletionResponse,
8
+ ChatCompletionChunk,
9
+ AnswerAction,
10
+ Model, StepAction
11
+ } from './types';
12
+ import {TokenTracker} from "./utils/token-tracker";
13
+ import {ActionTracker} from "./utils/action-tracker";
14
+
15
+ const app = express();
16
+
17
+ // Get secret from command line args for optional authentication
18
+ const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
19
+
20
+ app.use(cors());
21
+ app.use(express.json({
22
+ limit: '10mb'
23
+ }));
24
+
25
+ // Add health check endpoint for Docker container verification
26
+ app.get('/health', (req, res) => {
27
+ res.json({status: 'ok'});
28
+ });
29
+
30
+ function buildMdFromAnswer(answer: AnswerAction) {
31
+ if (!answer.references?.length || !answer.references.some(ref => ref.url.startsWith('http'))) {
32
+ return answer.answer;
33
+ }
34
+
35
+ const references = answer.references.map((ref, i) => {
36
+ const escapedQuote = ref.exactQuote
37
+ .replace(/([[\]_*`])/g, '\\$1')
38
+ .replace(/\n/g, ' ')
39
+ .trim();
40
+
41
+ return `[^${i + 1}]: [${escapedQuote}](${ref.url})`;
42
+ }).join('\n\n');
43
+
44
+ return `
45
+ ${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}
46
+
47
+
48
+ ${references}
49
+
50
+ `.trim();
51
+ }
52
+
53
+ async function* streamTextNaturally(text: string, streamingState: StreamingState) {
54
+ // Split text into chunks that preserve CJK characters, URLs, and regular words
55
+ const chunks = splitTextIntoChunks(text);
56
+ let burstMode = false;
57
+ let consecutiveShortItems = 0;
58
+
59
+ for (const chunk of chunks) {
60
+ if (!streamingState.currentlyStreaming) {
61
+ yield chunks.slice(chunks.indexOf(chunk)).join('');
62
+ return;
63
+ }
64
+
65
+ const delay = calculateDelay(chunk, burstMode);
66
+
67
+ // Handle consecutive short items
68
+ if (getEffectiveLength(chunk) <= 3 && chunk.trim().length > 0) {
69
+ consecutiveShortItems++;
70
+ if (consecutiveShortItems >= 3) {
71
+ burstMode = true;
72
+ }
73
+ } else {
74
+ consecutiveShortItems = 0;
75
+ burstMode = false;
76
+ }
77
+
78
+ await new Promise(resolve => setTimeout(resolve, delay));
79
+ yield chunk;
80
+ }
81
+ }
82
+
83
+ function splitTextIntoChunks(text: string): string[] {
84
+ const chunks: string[] = [];
85
+ let currentChunk = '';
86
+ let inURL = false;
87
+
88
+ const pushCurrentChunk = () => {
89
+ if (currentChunk) {
90
+ chunks.push(currentChunk);
91
+ currentChunk = '';
92
+ }
93
+ };
94
+
95
+ for (let i = 0; i < text.length; i++) {
96
+ const char = text[i];
97
+ const nextChar = text[i + 1] || '';
98
+
99
+ // URL detection
100
+ if (char === 'h' && text.slice(i, i + 8).match(/https?:\/\//)) {
101
+ pushCurrentChunk();
102
+ inURL = true;
103
+ }
104
+
105
+ if (inURL) {
106
+ currentChunk += char;
107
+ // End of URL detection (whitespace or certain punctuation)
108
+ if (/[\s\])}"']/.test(nextChar) || i === text.length - 1) {
109
+ pushCurrentChunk();
110
+ inURL = false;
111
+ }
112
+ continue;
113
+ }
114
+
115
+ // CJK character detection (including kana and hangul)
116
+ if (/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(char)) {
117
+ pushCurrentChunk();
118
+ chunks.push(char);
119
+ continue;
120
+ }
121
+
122
+ // Whitespace handling
123
+ if (/\s/.test(char)) {
124
+ pushCurrentChunk();
125
+ chunks.push(char);
126
+ continue;
127
+ }
128
+
129
+ // Regular word building
130
+ currentChunk += char;
131
+
132
+ // Break on punctuation
133
+ if (/[.!?,;:]/.test(nextChar)) {
134
+ pushCurrentChunk();
135
+ }
136
+ }
137
+
138
+ pushCurrentChunk();
139
+ return chunks.filter(chunk => chunk !== '');
140
+ }
141
+
142
+ function calculateDelay(chunk: string, burstMode: boolean): number {
143
+ const trimmedChunk = chunk.trim();
144
+
145
+ // Handle whitespace
146
+ if (trimmedChunk.length === 0) {
147
+ return Math.random() * 20 + 10;
148
+ }
149
+
150
+ // Special handling for URLs
151
+ if (chunk.match(/^https?:\/\//)) {
152
+ return Math.random() * 50 + 100; // Slower typing for URLs
153
+ }
154
+
155
+ // Special handling for CJK characters
156
+ if (/^[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]$/.test(chunk)) {
157
+ return Math.random() * 100 + 150; // Longer delay for individual CJK characters
158
+ }
159
+
160
+ // Base delay calculation
161
+ let baseDelay;
162
+ if (burstMode) {
163
+ baseDelay = Math.random() * 30 + 20;
164
+ } else {
165
+ const effectiveLength = getEffectiveLength(chunk);
166
+ const perCharacterDelay = Math.max(10, 40 - effectiveLength * 2);
167
+ baseDelay = Math.random() * perCharacterDelay + perCharacterDelay;
168
+ }
169
+
170
+ // Add variance based on chunk characteristics
171
+ if (/[A-Z]/.test(chunk[0])) {
172
+ baseDelay += Math.random() * 20 + 10;
173
+ }
174
+
175
+ if (/[^a-zA-Z\s]/.test(chunk)) {
176
+ baseDelay += Math.random() * 30 + 15;
177
+ }
178
+
179
+ // Add pauses for punctuation
180
+ if (/[.!?]$/.test(chunk)) {
181
+ baseDelay += Math.random() * 350 + 200;
182
+ } else if (/[,;:]$/.test(chunk)) {
183
+ baseDelay += Math.random() * 150 + 100;
184
+ }
185
+
186
+ return baseDelay;
187
+ }
188
+
189
+ function getEffectiveLength(chunk: string): number {
190
+ // Count CJK characters as 2 units
191
+ const cjkCount = (chunk.match(/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/g) || []).length;
192
+ const regularCount = chunk.length - cjkCount;
193
+ return regularCount + (cjkCount * 2);
194
+ }
195
+
196
+ // Helper function to emit remaining content immediately
197
+ async function emitRemainingContent(
198
+ res: Response,
199
+ requestId: string,
200
+ created: number,
201
+ model: string,
202
+ content: string,
203
+ ) {
204
+ if (!content) return;
205
+
206
+ const chunk: ChatCompletionChunk = {
207
+ id: requestId,
208
+ object: 'chat.completion.chunk',
209
+ created,
210
+ model: model,
211
+ system_fingerprint: 'fp_' + requestId,
212
+ choices: [{
213
+ index: 0,
214
+ delta: {content},
215
+ logprobs: null,
216
+ finish_reason: null
217
+ }],
218
+ };
219
+ res.write(`data: ${JSON.stringify(chunk)}\n\n`);
220
+ }
221
+
222
+ interface StreamingState {
223
+ currentlyStreaming: boolean;
224
+ currentGenerator: AsyncGenerator<string> | null;
225
+ remainingContent: string;
226
+ isEmitting: boolean;
227
+ queue: { content: string; resolve: () => void }[];
228
+ processingQueue: boolean;
229
+ }
230
+
231
+ function getTokenBudgetAndMaxAttempts(
232
+ reasoningEffort: 'low' | 'medium' | 'high' | null = 'medium',
233
+ maxCompletionTokens: number | null = null
234
+ ): { tokenBudget: number, maxBadAttempts: number } {
235
+ if (maxCompletionTokens !== null) {
236
+ return {
237
+ tokenBudget: maxCompletionTokens,
238
+ maxBadAttempts: 3 // Default to medium setting for max attempts
239
+ };
240
+ }
241
+
242
+ switch (reasoningEffort) {
243
+ case 'low':
244
+ return {tokenBudget: 100000, maxBadAttempts: 1};
245
+ case 'high':
246
+ return {tokenBudget: 1000000, maxBadAttempts: 3};
247
+ case 'medium':
248
+ default:
249
+ return {tokenBudget: 500000, maxBadAttempts: 2};
250
+ }
251
+ }
252
+
253
+
254
+ async function completeCurrentStreaming(
255
+ streamingState: StreamingState,
256
+ res: Response,
257
+ requestId: string,
258
+ created: number,
259
+ model: string
260
+ ) {
261
+ if (streamingState.currentlyStreaming && streamingState.remainingContent) {
262
+ // Force completion of current streaming
263
+ await emitRemainingContent(
264
+ res,
265
+ requestId,
266
+ created,
267
+ model,
268
+ streamingState.remainingContent
269
+ );
270
+ // Reset streaming state
271
+ streamingState.currentlyStreaming = false;
272
+ streamingState.remainingContent = '';
273
+ streamingState.currentGenerator = null;
274
+ }
275
+ }
276
+
277
+ // OpenAI-compatible chat completions endpoint
278
+ // Models API endpoints
279
+ app.get('/v1/models', (async (_req: Request, res: Response) => {
280
+ const models: Model[] = [{
281
+ id: 'jina-deepsearch-v1',
282
+ object: 'model',
283
+ created: 1686935002,
284
+ owned_by: 'jina-ai'
285
+ }];
286
+
287
+ res.json({
288
+ object: 'list',
289
+ data: models
290
+ });
291
+ }) as RequestHandler);
292
+
293
+ app.get('/v1/models/:model', (async (req: Request, res: Response) => {
294
+ const modelId = req.params.model;
295
+
296
+ if (modelId === 'jina-deepsearch-v1') {
297
+ res.json({
298
+ id: 'jina-deepsearch-v1',
299
+ object: 'model',
300
+ created: 1686935002,
301
+ owned_by: 'jina-ai'
302
+ });
303
+ } else {
304
+ res.status(404).json({
305
+ error: {
306
+ message: `Model '${modelId}' not found`,
307
+ type: 'invalid_request_error',
308
+ param: null,
309
+ code: 'model_not_found'
310
+ }
311
+ });
312
+ }
313
+ }) as RequestHandler);
314
+
315
+ if (secret) {
316
+ // Check authentication only if secret is set
317
+ app.use((req, res, next) => {
318
+ const authHeader = req.headers.authorization;
319
+ if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
320
+ console.log('[chat/completions] Unauthorized request');
321
+ res.status(401).json({error: 'Unauthorized'});
322
+ return;
323
+ }
324
+
325
+ return next();
326
+ });
327
+ }
328
+
329
+ async function processQueue(streamingState: StreamingState, res: Response, requestId: string, created: number, model: string) {
330
+ if (streamingState.processingQueue) return;
331
+
332
+ streamingState.processingQueue = true;
333
+
334
+ while (streamingState.queue.length > 0) {
335
+ const current = streamingState.queue[0];
336
+
337
+ // Reset streaming state for new content
338
+ streamingState.currentlyStreaming = true;
339
+ streamingState.remainingContent = current.content;
340
+ streamingState.isEmitting = true;
341
+
342
+ try {
343
+ for await (const word of streamTextNaturally(current.content, streamingState)) {
344
+ const chunk: ChatCompletionChunk = {
345
+ id: requestId,
346
+ object: 'chat.completion.chunk',
347
+ created,
348
+ model,
349
+ system_fingerprint: 'fp_' + requestId,
350
+ choices: [{
351
+ index: 0,
352
+ delta: {content: word},
353
+ logprobs: null,
354
+ finish_reason: null
355
+ }]
356
+ };
357
+ res.write(`data: ${JSON.stringify(chunk)}\n\n`);
358
+
359
+ // Small delay between words
360
+ await new Promise(resolve => setTimeout(resolve, 30));
361
+ }
362
+
363
+ // Add newline after content
364
+ const newlineChunk: ChatCompletionChunk = {
365
+ id: requestId,
366
+ object: 'chat.completion.chunk',
367
+ created,
368
+ model,
369
+ system_fingerprint: 'fp_' + requestId,
370
+ choices: [{
371
+ index: 0,
372
+ delta: {content: '\n'},
373
+ logprobs: null,
374
+ finish_reason: null
375
+ }]
376
+ };
377
+ res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
378
+
379
+ } catch (error) {
380
+ console.error('Error in streaming:', error);
381
+ } finally {
382
+ // Reset state and remove from queue
383
+ streamingState.isEmitting = false;
384
+ streamingState.currentlyStreaming = false;
385
+ streamingState.remainingContent = '';
386
+ streamingState.queue.shift();
387
+ current.resolve();
388
+
389
+ // Small delay between queue items
390
+ await new Promise(resolve => setTimeout(resolve, 50));
391
+ }
392
+ }
393
+
394
+ streamingState.processingQueue = false;
395
+ }
396
+
397
+ app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
398
+ // Check authentication only if secret is set
399
+ if (secret) {
400
+ const authHeader = req.headers.authorization;
401
+ if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
402
+ console.log('[chat/completions] Unauthorized request');
403
+ res.status(401).json({error: 'Unauthorized'});
404
+ return;
405
+ }
406
+ }
407
+
408
+ // Log request details (excluding sensitive data)
409
+ console.log('[chat/completions] Request:', {
410
+ model: req.body.model,
411
+ stream: req.body.stream,
412
+ messageCount: req.body.messages?.length,
413
+ hasAuth: !!req.headers.authorization,
414
+ requestId: Date.now().toString()
415
+ });
416
+
417
+ const body = req.body as ChatCompletionRequest;
418
+ if (!body.messages?.length) {
419
+ return res.status(400).json({error: 'Messages array is required and must not be empty'});
420
+ }
421
+ const lastMessage = body.messages[body.messages.length - 1];
422
+ if (lastMessage.role !== 'user') {
423
+ return res.status(400).json({error: 'Last message must be from user'});
424
+ }
425
+
426
+ const {tokenBudget, maxBadAttempts} = getTokenBudgetAndMaxAttempts(
427
+ body.reasoning_effort,
428
+ body.max_completion_tokens
429
+ );
430
+
431
+ const requestId = Date.now().toString();
432
+ const created = Math.floor(Date.now() / 1000);
433
+ const context: TrackerContext = {
434
+ tokenTracker: new TokenTracker(),
435
+ actionTracker: new ActionTracker()
436
+ };
437
+
438
+ // Add this inside the chat completions endpoint, before setting up the action listener
439
+ const streamingState: StreamingState = {
440
+ currentlyStreaming: false,
441
+ currentGenerator: null,
442
+ remainingContent: '',
443
+ isEmitting: false,
444
+ queue: [],
445
+ processingQueue: false
446
+ };
447
+
448
+ if (body.stream) {
449
+ res.setHeader('Content-Type', 'text/event-stream');
450
+ res.setHeader('Cache-Control', 'no-cache');
451
+ res.setHeader('Connection', 'keep-alive');
452
+
453
+
454
+ // Send initial chunk with opening think tag
455
+ const initialChunk: ChatCompletionChunk = {
456
+ id: requestId,
457
+ object: 'chat.completion.chunk',
458
+ created,
459
+ model: body.model,
460
+ system_fingerprint: 'fp_' + requestId,
461
+ choices: [{
462
+ index: 0,
463
+ delta: {role: 'assistant', content: '<think>'},
464
+ logprobs: null,
465
+ finish_reason: null
466
+ }]
467
+ };
468
+ res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
469
+
470
+ // Set up progress listener with cleanup
471
+ const actionListener = async (step: StepAction) => {
472
+ // Add content to queue for both thinking steps and final answer
473
+ if (step.think) {
474
+ const content = step.think;
475
+ await new Promise<void>(resolve => {
476
+ streamingState.queue.push({
477
+ content,
478
+ resolve
479
+ });
480
+ // Single call to process queue is sufficient
481
+ processQueue(streamingState, res, requestId, created, body.model);
482
+ });
483
+ }
484
+ };
485
+ context.actionTracker.on('action', actionListener);
486
+
487
+ // Make sure to update the cleanup code
488
+ res.on('finish', () => {
489
+ streamingState.currentlyStreaming = false;
490
+ streamingState.currentGenerator = null;
491
+ streamingState.remainingContent = '';
492
+ context.actionTracker.removeListener('action', actionListener);
493
+ });
494
+ }
495
+
496
+ try {
497
+ const {result: finalStep} = await getResponse(lastMessage.content as string, tokenBudget, maxBadAttempts, context, body.messages)
498
+
499
+ const usage = context.tokenTracker.getTotalUsageSnakeCase();
500
+ if (body.stream) {
501
+ // Complete any ongoing streaming before sending final answer
502
+ await completeCurrentStreaming(streamingState, res, requestId, created, body.model);
503
+ const finalAnswer = buildMdFromAnswer(finalStep as AnswerAction);
504
+ // Send closing think tag
505
+ const closeThinkChunk: ChatCompletionChunk = {
506
+ id: requestId,
507
+ object: 'chat.completion.chunk',
508
+ created,
509
+ model: body.model,
510
+ system_fingerprint: 'fp_' + requestId,
511
+ choices: [{
512
+ index: 0,
513
+ delta: {content: `</think>\n\n${finalAnswer}`},
514
+ logprobs: null,
515
+ finish_reason: null
516
+ }]
517
+ };
518
+ res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
519
+
520
+ // After the content is fully streamed, send the final chunk with finish_reason and usage
521
+ const finalChunk: ChatCompletionChunk = {
522
+ id: requestId,
523
+ object: 'chat.completion.chunk',
524
+ created,
525
+ model: body.model,
526
+ system_fingerprint: 'fp_' + requestId,
527
+ choices: [{
528
+ index: 0,
529
+ delta: {content: ''},
530
+ logprobs: null,
531
+ finish_reason: 'stop'
532
+ }],
533
+ usage
534
+ };
535
+ res.write(`data: ${JSON.stringify(finalChunk)}\n\n`);
536
+ res.end();
537
+ } else {
538
+
539
+ const response: ChatCompletionResponse = {
540
+ id: requestId,
541
+ object: 'chat.completion',
542
+ created,
543
+ model: body.model,
544
+ system_fingerprint: 'fp_' + requestId,
545
+ choices: [{
546
+ index: 0,
547
+ message: {
548
+ role: 'assistant',
549
+ content: finalStep.action === 'answer' ? buildMdFromAnswer(finalStep) : finalStep.think
550
+ },
551
+ logprobs: null,
552
+ finish_reason: 'stop'
553
+ }],
554
+ usage
555
+ };
556
+
557
+ // Log final response (excluding full content for brevity)
558
+ console.log('[chat/completions] Response:', {
559
+ id: response.id,
560
+ status: 200,
561
+ contentLength: response.choices[0].message.content.length,
562
+ usage: response.usage
563
+ });
564
+
565
+ res.json(response);
566
+ }
567
+ } catch (error: any) {
568
+ // Log error details
569
+ console.error('[chat/completions] Error:', {
570
+ message: error?.message || 'An error occurred',
571
+ stack: error?.stack,
572
+ type: error?.constructor?.name,
573
+ requestId
574
+ });
575
+
576
+ // Track error as rejected tokens with Vercel token counting
577
+ const errorMessage = error?.message || 'An error occurred';
578
+
579
+ // Clean up event listeners
580
+ context.actionTracker.removeAllListeners('action');
581
+
582
+ // Get token usage in OpenAI API format
583
+ const usage = context.tokenTracker.getTotalUsageSnakeCase();
584
+
585
+ if (body.stream && res.headersSent) {
586
+ // For streaming responses that have already started, send error as a chunk
587
+ // First send closing think tag if we're in the middle of thinking
588
+ const closeThinkChunk: ChatCompletionChunk = {
589
+ id: requestId,
590
+ object: 'chat.completion.chunk',
591
+ created,
592
+ model: body.model,
593
+ system_fingerprint: 'fp_' + requestId,
594
+ choices: [{
595
+ index: 0,
596
+ delta: {content: '</think>'},
597
+ logprobs: null,
598
+ finish_reason: null
599
+ }],
600
+ usage
601
+ };
602
+ res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
603
+
604
+
605
+ const errorChunk: ChatCompletionChunk = {
606
+ id: requestId,
607
+ object: 'chat.completion.chunk',
608
+ created,
609
+ model: body.model,
610
+ system_fingerprint: 'fp_' + requestId,
611
+ choices: [{
612
+ index: 0,
613
+ delta: {content: errorMessage},
614
+ logprobs: null,
615
+ finish_reason: 'stop'
616
+ }],
617
+ usage
618
+ };
619
+ res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
620
+ res.end();
621
+ } else {
622
+ // For non-streaming or not-yet-started responses, send error as JSON
623
+ const response: ChatCompletionResponse = {
624
+ id: requestId,
625
+ object: 'chat.completion',
626
+ created,
627
+ model: body.model,
628
+ system_fingerprint: 'fp_' + requestId,
629
+ choices: [{
630
+ index: 0,
631
+ message: {
632
+ role: 'assistant',
633
+ content: `Error: ${errorMessage}`
634
+ },
635
+ logprobs: null,
636
+ finish_reason: 'stop'
637
+ }],
638
+ usage
639
+ };
640
+ res.json(response);
641
+ }
642
+ }
643
+ }) as RequestHandler);
644
+
645
+
646
+ export default app;
src/cli.ts ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ import { getResponse } from './agent';
4
+ import { version } from '../package.json';
5
+
6
+ const program = new Command();
7
+
8
+ program
9
+ .name('deepresearch')
10
+ .description('AI-powered research assistant that keeps searching until it finds the answer')
11
+ .version(version)
12
+ .argument('<query>', 'The research query to investigate')
13
+ .option('-t, --token-budget <number>', 'Maximum token budget', (val) => {
14
+ const num = parseInt(val);
15
+ if (isNaN(num)) throw new Error('Invalid token budget: must be a number');
16
+ return num;
17
+ }, 1000000)
18
+ .option('-m, --max-attempts <number>', 'Maximum bad attempts before giving up', (val) => {
19
+ const num = parseInt(val);
20
+ if (isNaN(num)) throw new Error('Invalid max attempts: must be a number');
21
+ return num;
22
+ }, 3)
23
+ .option('-v, --verbose', 'Show detailed progress')
24
+ .action(async (query: string, options: any) => {
25
+ try {
26
+ const { result } = await getResponse(
27
+ query,
28
+ parseInt(options.tokenBudget),
29
+ parseInt(options.maxAttempts)
30
+ );
31
+
32
+ if (result.action === 'answer') {
33
+ console.log('\nAnswer:', result.answer);
34
+ if (result.references?.length) {
35
+ console.log('\nReferences:');
36
+ result.references.forEach(ref => {
37
+ console.log(`- ${ref.url}`);
38
+ console.log(` "${ref.exactQuote}"`);
39
+ });
40
+ }
41
+ }
42
+ } catch (error) {
43
+ console.error('Error:', error instanceof Error ? error.message : String(error));
44
+ process.exit(1);
45
+ }
46
+ });
47
+
48
+ program.parse();
src/config.ts ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dotenv from 'dotenv';
2
+ import { ProxyAgent, setGlobalDispatcher } from 'undici';
3
+ import { createGoogleGenerativeAI } from '@ai-sdk/google';
4
+ import { createOpenAI, OpenAIProviderSettings } from '@ai-sdk/openai';
5
+ import configJson from '../config.json';
6
+ // Load environment variables
7
+ dotenv.config();
8
+
9
+ // Types
10
+ export type LLMProvider = 'openai' | 'gemini' | 'vertex';
11
+ export type ToolName = keyof typeof configJson.models.gemini.tools;
12
+
13
+ // Type definitions for our config structure
14
+ type EnvConfig = typeof configJson.env;
15
+
16
+ interface ProviderConfig {
17
+ createClient: string;
18
+ clientConfig?: Record<string, any>;
19
+ }
20
+
21
+ // Environment setup
22
+ const env: EnvConfig = { ...configJson.env };
23
+ (Object.keys(env) as (keyof EnvConfig)[]).forEach(key => {
24
+ if (process.env[key]) {
25
+ env[key] = process.env[key] || env[key];
26
+ }
27
+ });
28
+
29
+ // Setup proxy if present
30
+ if (env.https_proxy) {
31
+ try {
32
+ const proxyUrl = new URL(env.https_proxy).toString();
33
+ const dispatcher = new ProxyAgent({ uri: proxyUrl });
34
+ setGlobalDispatcher(dispatcher);
35
+ } catch (error) {
36
+ console.error('Failed to set proxy:', error);
37
+ }
38
+ }
39
+
40
+ // Export environment variables
41
+ export const OPENAI_BASE_URL = env.OPENAI_BASE_URL;
42
+ export const GEMINI_API_KEY = env.GEMINI_API_KEY;
43
+ export const OPENAI_API_KEY = env.OPENAI_API_KEY;
44
+ export const JINA_API_KEY = env.JINA_API_KEY;
45
+ export const BRAVE_API_KEY = env.BRAVE_API_KEY;
46
+ export const SEARCH_PROVIDER = configJson.defaults.search_provider;
47
+ export const STEP_SLEEP = configJson.defaults.step_sleep;
48
+
49
+ // Determine LLM provider
50
+ export const LLM_PROVIDER: LLMProvider = (() => {
51
+ const provider = process.env.LLM_PROVIDER || configJson.defaults.llm_provider;
52
+ if (!isValidProvider(provider)) {
53
+ throw new Error(`Invalid LLM provider: ${provider}`);
54
+ }
55
+ return provider;
56
+ })();
57
+
58
+ function isValidProvider(provider: string): provider is LLMProvider {
59
+ return provider === 'openai' || provider === 'gemini' || provider === 'vertex';
60
+ }
61
+
62
+ interface ToolConfig {
63
+ model: string;
64
+ temperature: number;
65
+ maxTokens: number;
66
+ }
67
+
68
+ interface ToolOverrides {
69
+ temperature?: number;
70
+ maxTokens?: number;
71
+ }
72
+
73
+ // Get tool configuration
74
+ export function getToolConfig(toolName: ToolName): ToolConfig {
75
+ const providerConfig = configJson.models[LLM_PROVIDER === 'vertex' ? 'gemini' : LLM_PROVIDER];
76
+ const defaultConfig = providerConfig.default;
77
+ const toolOverrides = providerConfig.tools[toolName] as ToolOverrides;
78
+
79
+ return {
80
+ model: process.env.DEFAULT_MODEL_NAME || defaultConfig.model,
81
+ temperature: toolOverrides.temperature ?? defaultConfig.temperature,
82
+ maxTokens: toolOverrides.maxTokens ?? defaultConfig.maxTokens
83
+ };
84
+ }
85
+
86
+ export function getMaxTokens(toolName: ToolName): number {
87
+ return getToolConfig(toolName).maxTokens;
88
+ }
89
+
90
+ // Get model instance
91
+ export function getModel(toolName: ToolName) {
92
+ const config = getToolConfig(toolName);
93
+ const providerConfig = (configJson.providers as Record<string, ProviderConfig | undefined>)[LLM_PROVIDER];
94
+
95
+ if (LLM_PROVIDER === 'openai') {
96
+ if (!OPENAI_API_KEY) {
97
+ throw new Error('OPENAI_API_KEY not found');
98
+ }
99
+
100
+ const opt: OpenAIProviderSettings = {
101
+ apiKey: OPENAI_API_KEY,
102
+ compatibility: providerConfig?.clientConfig?.compatibility
103
+ };
104
+
105
+ if (OPENAI_BASE_URL) {
106
+ opt.baseURL = OPENAI_BASE_URL;
107
+ }
108
+
109
+ return createOpenAI(opt)(config.model);
110
+ }
111
+
112
+ if (LLM_PROVIDER === 'vertex') {
113
+ const createVertex = require('@ai-sdk/google-vertex').createVertex;
114
+ if (toolName === 'searchGrounding') {
115
+ return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model, { useSearchGrounding: true });
116
+ }
117
+ return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model);
118
+ }
119
+
120
+ if (!GEMINI_API_KEY) {
121
+ throw new Error('GEMINI_API_KEY not found');
122
+ }
123
+
124
+ if (toolName === 'searchGrounding') {
125
+ return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model, { useSearchGrounding: true });
126
+ }
127
+ return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model);
128
+ }
129
+
130
+ // Validate required environment variables
131
+ if (LLM_PROVIDER === 'gemini' && !GEMINI_API_KEY) throw new Error("GEMINI_API_KEY not found");
132
+ if (LLM_PROVIDER === 'openai' && !OPENAI_API_KEY) throw new Error("OPENAI_API_KEY not found");
133
+ if (!JINA_API_KEY) throw new Error("JINA_API_KEY not found");
134
+
135
+ // Log all configurations
136
+ const configSummary = {
137
+ provider: {
138
+ name: LLM_PROVIDER,
139
+ model: LLM_PROVIDER === 'openai'
140
+ ? configJson.models.openai.default.model
141
+ : configJson.models.gemini.default.model,
142
+ ...(LLM_PROVIDER === 'openai' && { baseUrl: OPENAI_BASE_URL })
143
+ },
144
+ search: {
145
+ provider: SEARCH_PROVIDER
146
+ },
147
+ tools: Object.fromEntries(
148
+ Object.keys(configJson.models[LLM_PROVIDER === 'vertex' ? 'gemini' : LLM_PROVIDER].tools).map(name => [
149
+ name,
150
+ getToolConfig(name as ToolName)
151
+ ])
152
+ ),
153
+ defaults: {
154
+ stepSleep: STEP_SLEEP
155
+ }
156
+ };
157
+
158
+ console.log('Configuration Summary:', JSON.stringify(configSummary, null, 2));
src/evals/batch-evals.ts ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fs from 'fs/promises';
2
+ import {exec} from 'child_process';
3
+ import {promisify} from 'util';
4
+ import {getResponse} from '../agent';
5
+ import {generateObject} from 'ai';
6
+ import {GEMINI_API_KEY} from '../config';
7
+ import {z} from 'zod';
8
+ import {AnswerAction, TrackerContext} from "../types";
9
+ import {createGoogleGenerativeAI} from "@ai-sdk/google";
10
+
11
+ const execAsync = promisify(exec);
12
+
13
+ interface Question {
14
+ question: string;
15
+ answer: string;
16
+ }
17
+
18
+ interface EvaluationResult {
19
+ pass: boolean;
20
+ reason: string;
21
+ total_steps: number;
22
+ total_tokens: number;
23
+ question: string;
24
+ expected_answer: string;
25
+ actual_answer: string;
26
+ }
27
+
28
+ interface EvaluationStats {
29
+ model_name: string;
30
+ pass_rate: number;
31
+ avg_steps: number;
32
+ max_steps: number;
33
+ min_steps: number;
34
+ median_steps: number;
35
+ avg_tokens: number;
36
+ median_tokens: number;
37
+ max_tokens: number;
38
+ min_tokens: number;
39
+ }
40
+
41
+ function calculateMedian(numbers: number[]): number {
42
+ const sorted = [...numbers].sort((a, b) => a - b);
43
+ const middle = Math.floor(sorted.length / 2);
44
+
45
+ if (sorted.length % 2 === 0) {
46
+ return (sorted[middle - 1] + sorted[middle]) / 2;
47
+ }
48
+ return sorted[middle];
49
+ }
50
+
51
+ function calculateStats(results: EvaluationResult[], modelName: string): EvaluationStats {
52
+ const steps = results.map(r => r.total_steps);
53
+ const tokens = results.map(r => r.total_tokens);
54
+ const passCount = results.filter(r => r.pass).length;
55
+
56
+ return {
57
+ model_name: modelName,
58
+ pass_rate: (passCount / results.length) * 100,
59
+ avg_steps: steps.reduce((a, b) => a + b, 0) / steps.length,
60
+ max_steps: Math.max(...steps),
61
+ min_steps: Math.min(...steps),
62
+ median_steps: calculateMedian(steps),
63
+ avg_tokens: tokens.reduce((a, b) => a + b, 0) / tokens.length,
64
+ median_tokens: calculateMedian(tokens),
65
+ max_tokens: Math.max(...tokens),
66
+ min_tokens: Math.min(...tokens)
67
+ };
68
+ }
69
+
70
+ function printStats(stats: EvaluationStats): void {
71
+ console.log('\n=== Evaluation Statistics ===');
72
+ console.log(`Model: ${stats.model_name}`);
73
+ console.log(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`);
74
+ console.log(`Average Steps: ${stats.avg_steps.toFixed(0)}`);
75
+ console.log(`Maximum Steps: ${stats.max_steps}`);
76
+ console.log(`Minimum Steps: ${stats.min_steps}`);
77
+ console.log(`Median Steps: ${stats.median_steps.toFixed(0)}`);
78
+ console.log(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`);
79
+ console.log(`Median Tokens: ${stats.median_tokens.toFixed(0)}`);
80
+ console.log(`Maximum Tokens: ${stats.max_tokens}`);
81
+ console.log(`Minimum Tokens: ${stats.min_tokens}`);
82
+ console.log('===========================\n');
83
+ }
84
+
85
+ async function getCurrentGitCommit(): Promise<string> {
86
+ try {
87
+ const {stdout} = await execAsync('git rev-parse --short HEAD');
88
+ return stdout.trim();
89
+ } catch (error) {
90
+ console.error('Error getting git commit:', error);
91
+ return 'unknown';
92
+ }
93
+ }
94
+
95
+ async function evaluateAnswer(expectedAnswer: string, actualAnswer: string): Promise<{ pass: boolean; reason: string }> {
96
+ const prompt = `You are a deterministic evaluator with zero temperature. Compare the following expected answer with the actual answer and determine if they convey the same information.
97
+
98
+ Expected answer: ${expectedAnswer}
99
+ Actual answer: ${actualAnswer}
100
+
101
+ Minor wording differences are acceptable as long as the core information of the expected answer is preserved in the actual answer.'`;
102
+
103
+ const schema = z.object({
104
+ pass: z.boolean().describe('Whether the actual answer matches the expected answer'),
105
+ reason: z.string().describe('Detailed explanation of why the evaluation passed or failed')
106
+ });
107
+
108
+ try {
109
+ const result = await generateObject({
110
+ model: createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })('gemini-2.0-flash'), // fix to gemini-2.0-flash for evaluation
111
+ schema,
112
+ prompt,
113
+ maxTokens: 1000,
114
+ temperature: 0 // Setting temperature to 0 for deterministic output
115
+ });
116
+
117
+ return result.object;
118
+ } catch (error) {
119
+ console.error('Evaluation failed:', error);
120
+ return {
121
+ pass: false,
122
+ reason: `Evaluation error: ${error}`
123
+ };
124
+ }
125
+ }
126
+
127
+ async function batchEvaluate(inputFile: string): Promise<void> {
128
+ // Read and parse input file
129
+ const questions: Question[] = JSON.parse(await fs.readFile(inputFile, 'utf-8'));
130
+ const results: EvaluationResult[] = [];
131
+ const gitCommit = await getCurrentGitCommit();
132
+ const modelName = process.env.DEFAULT_MODEL_NAME || 'unknown';
133
+ const outputFile = `eval-${gitCommit}-${modelName}.json`;
134
+
135
+ // Process each question
136
+ for (let i = 0; i < questions.length; i++) {
137
+ const {question, answer: expectedAnswer} = questions[i];
138
+ console.log(`\nProcessing question ${i + 1}/${questions.length}: ${question}`);
139
+
140
+ try {
141
+ // Get response using the agent
142
+ const {
143
+ result: response,
144
+ context
145
+ } = await getResponse(question) as { result: AnswerAction; context: TrackerContext };
146
+
147
+ // Get response using the streaming agent
148
+ // const {
149
+ // result: response,
150
+ // context
151
+ // } = await getResponseStreamingAgent(question) as { result: AnswerAction; context: TrackerContext };
152
+
153
+ const actualAnswer = response.answer;
154
+
155
+ // Evaluate the response
156
+ const evaluation = await evaluateAnswer(expectedAnswer, actualAnswer);
157
+
158
+ // Record results
159
+ results.push({
160
+ pass: evaluation.pass,
161
+ reason: evaluation.reason,
162
+ total_steps: context.actionTracker.getState().totalStep,
163
+ total_tokens: context.tokenTracker.getTotalUsage().totalTokens,
164
+ question,
165
+ expected_answer: expectedAnswer,
166
+ actual_answer: actualAnswer
167
+ });
168
+
169
+ console.log(`Evaluation: ${evaluation.pass ? 'PASS' : 'FAIL'}`);
170
+ console.log(`Reason: ${evaluation.reason}`);
171
+ } catch (error) {
172
+ console.error(`Error processing question: ${question}`, error);
173
+ results.push({
174
+ pass: false,
175
+ reason: `Error: ${error}`,
176
+ total_steps: 0,
177
+ total_tokens: 0,
178
+ question,
179
+ expected_answer: expectedAnswer,
180
+ actual_answer: 'Error occurred'
181
+ });
182
+ }
183
+ }
184
+
185
+ // Calculate and print statistics
186
+ const stats = calculateStats(results, modelName);
187
+ printStats(stats);
188
+
189
+ // Save results
190
+ await fs.writeFile(outputFile, JSON.stringify({
191
+ results,
192
+ statistics: stats
193
+ }, null, 2));
194
+
195
+ console.log(`\nEvaluation results saved to ${outputFile}`);
196
+ }
197
+
198
+ // Run batch evaluation if this is the main module
199
+ if (require.main === module) {
200
+ const inputFile = process.argv[2];
201
+ if (!inputFile) {
202
+ console.error('Please provide an input file path');
203
+ process.exit(1);
204
+ }
205
+
206
+ batchEvaluate(inputFile).catch(console.error);
207
+ }
208
+
209
+ export {batchEvaluate};
src/evals/ego-questions.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "question": "what did jina ai ceo say about deepseek that went viral and become a meme?",
4
+ "answer": "a side project"
5
+ },
6
+ {
7
+ "question": "when was jina ai founded, month and year?",
8
+ "answer": "feb 2020"
9
+ },
10
+ {
11
+ "question": "what is the latest model published by jina ai?",
12
+ "answer": "ReaderLM-2.0"
13
+ },
14
+ {
15
+ "question": "what is the latest blog post that jina ai published?",
16
+ "answer": "A Practical Guide to Deploying Search Foundation Models in Production"
17
+ },
18
+ {
19
+ "question": "what is the context length of readerlm-v2?",
20
+ "answer": "512K"
21
+ },
22
+ {
23
+ "question": "how many employees does jina ai have right now?",
24
+ "answer": "30"
25
+ },
26
+ {
27
+ "question": "when was jina reader api released?",
28
+ "answer": "April 2024"
29
+ },
30
+ {
31
+ "question": "How many offices do Jina AI have and where are they?",
32
+ "answer": "four: sunnyvale, berlin, beijing, shenzhen"
33
+ },
34
+ {
35
+ "question": "what exactly jina-colbert-v2 improves over jina-colbert-v1?",
36
+ "answer": "v2 add multilingual support"
37
+ },
38
+ {
39
+ "question": "who are the authors of jina-clip-v2 paper?",
40
+ "answer": "Andreas Koukounas, Georgios Mastrapas, Bo Wang, Mohammad Kalim Akram, Sedigheh Eslami, Michael Günther, Isabelle Mohr, Saba Sturua, Scott Martens, Nan Wang, Han Xiao"
41
+ },
42
+ {
43
+ "question": "who created the node-deepresearch project?",
44
+ "answer": "Han Xiao / jina ai"
45
+ },
46
+ {
47
+ "question": "Which countries are the investors of Jina AI from?",
48
+ "answer": "USA and China only, no German investors"
49
+ },
50
+ {
51
+ "question": "what is the grounding api endpoint of jina ai?",
52
+ "answer": "g.jina.ai"
53
+ },
54
+ {
55
+ "question": "which of the following models do not support Matryoshka representation? jina-embeddings-v3, jina-embeddings-v2-base-en, jina-clip-v2, jina-clip-v1",
56
+ "answer": "jina-embeddings-v2-base-en and jina-clip-v1"
57
+ },
58
+ {
59
+ "question": "Can I purchase the 2024 yearbook that jina ai published today?",
60
+ "answer": "No it is sold out."
61
+ },
62
+ {
63
+ "question": "How many free tokens do you get from a new jina api key?",
64
+ "answer": "1 million."
65
+ },
66
+ {
67
+ "question": "Who is the legal signatory of Jina AI gmbh?",
68
+ "answer": "Jiao Liu"
69
+ },
70
+ {
71
+ "question": "what is the key idea behind node-deepresearch project?",
72
+ "answer": "It keeps searching, reading webpages, reasoning until an answer is found."
73
+ },
74
+ {
75
+ "question": "what is the name of the jina ai's mascot?",
76
+ "answer": "No, Jina AI does not have a mascot."
77
+ },
78
+ {
79
+ "question": "Does late chunking work with cls pooling?",
80
+ "answer": "No. late chunking only works with mean pooling."
81
+ }
82
+ ]
src/server.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import app from "./app";
2
+
3
+ const port = process.env.PORT || 3000;
4
+
5
+ // Export server startup function for better testing
6
+ export function startServer() {
7
+ return app.listen(port, () => {
8
+ console.log(`Server running at http://localhost:${port}`);
9
+ });
10
+ }
11
+
12
+ // Start server if running directly
13
+ if (process.env.NODE_ENV !== 'test') {
14
+ startServer();
15
+ }
src/tools/__tests__/error-analyzer.test.ts ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { analyzeSteps } from '../error-analyzer';
2
+ import { LLMProvider } from '../../config';
3
+
4
+ describe('analyzeSteps', () => {
5
+ const providers: Array<LLMProvider> = ['openai', 'gemini'];
6
+ const originalEnv = process.env;
7
+
8
+ beforeEach(() => {
9
+ jest.resetModules();
10
+ process.env = { ...originalEnv };
11
+ });
12
+
13
+ afterEach(() => {
14
+ process.env = originalEnv;
15
+ });
16
+
17
+ providers.forEach(provider => {
18
+ describe(`with ${provider} provider`, () => {
19
+ beforeEach(() => {
20
+ process.env.LLM_PROVIDER = provider;
21
+ });
22
+
23
+ it('should analyze error steps', async () => {
24
+ const { response } = await analyzeSteps(['Step 1: Search failed', 'Step 2: Invalid query']);
25
+ expect(response).toHaveProperty('recap');
26
+ expect(response).toHaveProperty('blame');
27
+ expect(response).toHaveProperty('improvement');
28
+ }, 30000);
29
+ });
30
+ });
31
+ });
src/tools/__tests__/evaluator.test.ts ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { evaluateAnswer } from '../evaluator';
2
+ import { TokenTracker } from '../../utils/token-tracker';
3
+ import { LLMProvider } from '../../config';
4
+
5
+ describe('evaluateAnswer', () => {
6
+ const providers: Array<LLMProvider> = ['openai', 'gemini'];
7
+ const originalEnv = process.env;
8
+
9
+ beforeEach(() => {
10
+ jest.resetModules();
11
+ process.env = { ...originalEnv };
12
+ });
13
+
14
+ afterEach(() => {
15
+ process.env = originalEnv;
16
+ });
17
+
18
+ providers.forEach(provider => {
19
+ describe(`with ${provider} provider`, () => {
20
+ beforeEach(() => {
21
+ process.env.LLM_PROVIDER = provider;
22
+ });
23
+
24
+ it('should evaluate answer definitiveness', async () => {
25
+ const tokenTracker = new TokenTracker();
26
+ const { response } = await evaluateAnswer(
27
+ 'What is TypeScript?',
28
+ {
29
+ action: "answer",
30
+ think: "Providing a clear definition of TypeScript",
31
+ answer: "TypeScript is a strongly typed programming language that builds on JavaScript.",
32
+ references: []
33
+ },
34
+ ['definitive'],
35
+ tokenTracker
36
+ );
37
+ expect(response).toHaveProperty('pass');
38
+ expect(response).toHaveProperty('think');
39
+ expect(response.type).toBe('definitive');
40
+ });
41
+
42
+ it('should evaluate answer plurality', async () => {
43
+ const tokenTracker = new TokenTracker();
44
+ const { response } = await evaluateAnswer(
45
+ 'List three programming languages.',
46
+ {
47
+ action: "answer",
48
+ think: "Providing an example of a programming language",
49
+ answer: "Python is a programming language.",
50
+ references: []
51
+ },
52
+ ['plurality'],
53
+ tokenTracker
54
+ );
55
+ expect(response).toHaveProperty('pass');
56
+ expect(response).toHaveProperty('think');
57
+ expect(response.type).toBe('plurality');
58
+ expect(response.plurality_analysis?.expects_multiple).toBe(true);
59
+ });
60
+ });
61
+ });
62
+ });
src/tools/__tests__/read.test.ts ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { readUrl } from '../read';
2
+ import { TokenTracker } from '../../utils/token-tracker';
3
+
4
+ describe('readUrl', () => {
5
+ it.skip('should read and parse URL content (skipped due to insufficient balance)', async () => {
6
+ const tokenTracker = new TokenTracker();
7
+ const { response } = await readUrl('https://www.typescriptlang.org', tokenTracker);
8
+ expect(response).toHaveProperty('code');
9
+ expect(response).toHaveProperty('status');
10
+ expect(response.data).toHaveProperty('content');
11
+ expect(response.data).toHaveProperty('title');
12
+ }, 15000);
13
+
14
+ it.skip('should handle invalid URLs (skipped due to insufficient balance)', async () => {
15
+ await expect(readUrl('invalid-url')).rejects.toThrow();
16
+ }, 15000);
17
+
18
+ beforeEach(() => {
19
+ jest.setTimeout(15000);
20
+ });
21
+ });
src/tools/__tests__/search.test.ts ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { search } from '../jina-search';
2
+ import { TokenTracker } from '../../utils/token-tracker';
3
+
4
+ describe('search', () => {
5
+ it.skip('should perform search with Jina API (skipped due to insufficient balance)', async () => {
6
+ const tokenTracker = new TokenTracker();
7
+ const { response } = await search('TypeScript programming', tokenTracker);
8
+ expect(response).toBeDefined();
9
+ expect(response.data).toBeDefined();
10
+ if (response.data === null) {
11
+ throw new Error('Response data is null');
12
+ }
13
+ expect(Array.isArray(response.data)).toBe(true);
14
+ expect(response.data.length).toBeGreaterThan(0);
15
+ }, 15000);
16
+
17
+ it('should handle empty query', async () => {
18
+ await expect(search('')).rejects.toThrow();
19
+ }, 15000);
20
+
21
+ beforeEach(() => {
22
+ jest.setTimeout(15000);
23
+ });
24
+ });
src/tools/brave-search.ts ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import axios from 'axios';
2
+ import {BRAVE_API_KEY} from "../config";
3
+
4
+ import { BraveSearchResponse } from '../types';
5
+
6
+ export async function braveSearch(query: string): Promise<{ response: BraveSearchResponse }> {
7
+ const response = await axios.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
8
+ params: {
9
+ q: query,
10
+ count: 10,
11
+ safesearch: 'off'
12
+ },
13
+ headers: {
14
+ 'Accept': 'application/json',
15
+ 'X-Subscription-Token': BRAVE_API_KEY
16
+ },
17
+ timeout: 10000
18
+ });
19
+
20
+ // Maintain the same return structure as the original code
21
+ return { response: response.data };
22
+ }
src/tools/dedup.ts ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {z} from 'zod';
2
+ import {TokenTracker} from "../utils/token-tracker";
3
+ import {ObjectGeneratorSafe} from "../utils/safe-generator";
4
+
5
+
6
+ const responseSchema = z.object({
7
+ think: z.string().describe('Strategic reasoning about the overall deduplication approach'),
8
+ unique_queries: z.array(z.string().describe('Unique query that passed the deduplication process, must be less than 30 characters'))
9
+ .describe('Array of semantically unique queries').max(3)
10
+ });
11
+
12
+ function getPrompt(newQueries: string[], existingQueries: string[]): string {
13
+ return `You are an expert in semantic similarity analysis. Given a set of queries (setA) and a set of queries (setB)
14
+
15
+ <rules>
16
+ Function FilterSetA(setA, setB, threshold):
17
+ filteredA = empty set
18
+
19
+ for each candidateQuery in setA:
20
+ isValid = true
21
+
22
+ // Check similarity with already accepted queries in filteredA
23
+ for each acceptedQuery in filteredA:
24
+ similarity = calculateSimilarity(candidateQuery, acceptedQuery)
25
+ if similarity >= threshold:
26
+ isValid = false
27
+ break
28
+
29
+ // If passed first check, compare with set B
30
+ if isValid:
31
+ for each queryB in setB:
32
+ similarity = calculateSimilarity(candidateQuery, queryB)
33
+ if similarity >= threshold:
34
+ isValid = false
35
+ break
36
+
37
+ // If passed all checks, add to filtered set
38
+ if isValid:
39
+ add candidateQuery to filteredA
40
+
41
+ return filteredA
42
+ </rules>
43
+
44
+ <similarity-definition>
45
+ 1. Consider semantic meaning and query intent, not just lexical similarity
46
+ 2. Account for different phrasings of the same information need
47
+ 3. Queries with same base keywords but different operators are NOT duplicates
48
+ 4. Different aspects or perspectives of the same topic are not duplicates
49
+ 5. Consider query specificity - a more specific query is not a duplicate of a general one
50
+ 6. Search operators that make queries behave differently:
51
+ - Different site: filters (e.g., site:youtube.com vs site:github.com)
52
+ - Different file types (e.g., filetype:pdf vs filetype:doc)
53
+ - Different language/location filters (e.g., lang:en vs lang:es)
54
+ - Different exact match phrases (e.g., "exact phrase" vs no quotes)
55
+ - Different inclusion/exclusion (+/- operators)
56
+ - Different title/body filters (intitle: vs inbody:)
57
+ </similarity-definition>
58
+
59
+ Now with threshold set to 0.2; run FilterSetA on the following:
60
+ SetA: ${JSON.stringify(newQueries)}
61
+ SetB: ${JSON.stringify(existingQueries)}`;
62
+ }
63
+
64
+
65
+ const TOOL_NAME = 'dedup';
66
+
67
+ export async function dedupQueries(
68
+ newQueries: string[],
69
+ existingQueries: string[],
70
+ tracker?: TokenTracker
71
+ ): Promise<{ unique_queries: string[] }> {
72
+ try {
73
+ const generator = new ObjectGeneratorSafe(tracker);
74
+ const prompt = getPrompt(newQueries, existingQueries);
75
+
76
+ const result = await generator.generateObject({
77
+ model: TOOL_NAME,
78
+ schema: responseSchema,
79
+ prompt,
80
+ });
81
+
82
+ console.log(TOOL_NAME, result.object.unique_queries);
83
+ return {unique_queries: result.object.unique_queries};
84
+
85
+ } catch (error) {
86
+ console.error(`Error in ${TOOL_NAME}`, error);
87
+ throw error;
88
+ }
89
+ }
src/tools/error-analyzer.ts ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {z} from 'zod';
2
+ import {TokenTracker} from "../utils/token-tracker";
3
+ import {ErrorAnalysisResponse} from '../types';
4
+ import {ObjectGeneratorSafe} from "../utils/safe-generator";
5
+
6
+
7
+ const responseSchema = z.object({
8
+ recap: z.string().describe('Recap of the actions taken and the steps conducted'),
9
+ blame: z.string().describe('Which action or the step was the root cause of the answer rejection'),
10
+ improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.'),
11
+ questionsToAnswer: z.array(
12
+ z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
13
+ ).max(2)
14
+ .describe("List of most important reflect questions to fill the knowledge gaps"),
15
+ });
16
+
17
+
18
+ function getPrompt(diaryContext: string[]): string {
19
+ return `You are an expert at analyzing search and reasoning processes. Your task is to analyze the given sequence of steps and identify what went wrong in the search process.
20
+
21
+ <rules>
22
+ 1. The sequence of actions taken
23
+ 2. The effectiveness of each step
24
+ 3. The logic between consecutive steps
25
+ 4. Alternative approaches that could have been taken
26
+ 5. Signs of getting stuck in repetitive patterns
27
+ 6. Whether the final answer matches the accumulated information
28
+
29
+ Analyze the steps and provide detailed feedback following these guidelines:
30
+ - In the recap: Summarize key actions chronologically, highlight patterns, and identify where the process started to go wrong
31
+ - In the blame: Point to specific steps or patterns that led to the inadequate answer
32
+ - In the improvement: Provide actionable suggestions that could have led to a better outcome
33
+
34
+ Generate a JSON response following JSON schema.
35
+ </rules>
36
+
37
+ <example>
38
+ <input>
39
+ <steps>
40
+
41
+ At step 1, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
42
+ In particular, you tried to search for the following keywords: "jina ai ceo age".
43
+ You found quite some information and add them to your URL list and **visit** them later when needed.
44
+
45
+
46
+ At step 2, you took the **visit** action and deep dive into the following URLs:
47
+ https://www.linkedin.com/in/hxiao87
48
+ https://www.crunchbase.com/person/han-xiao
49
+ You found some useful information on the web and add them to your knowledge for future reference.
50
+
51
+
52
+ At step 3, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
53
+ In particular, you tried to search for the following keywords: "Han Xiao birthdate, Jina AI founder birthdate".
54
+ You found quite some information and add them to your URL list and **visit** them later when needed.
55
+
56
+
57
+ At step 4, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
58
+ In particular, you tried to search for the following keywords: han xiao birthday.
59
+ But then you realized you have already searched for these keywords before.
60
+ You decided to think out of the box or cut from a completely different angle.
61
+
62
+
63
+ At step 5, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
64
+ In particular, you tried to search for the following keywords: han xiao birthday.
65
+ But then you realized you have already searched for these keywords before.
66
+ You decided to think out of the box or cut from a completely different angle.
67
+
68
+
69
+ At step 6, you took the **visit** action and deep dive into the following URLs:
70
+ https://kpopwall.com/han-xiao/
71
+ https://www.idolbirthdays.net/han-xiao
72
+ You found some useful information on the web and add them to your knowledge for future reference.
73
+
74
+
75
+ At step 7, you took **answer** action but evaluator thinks it is not a good answer:
76
+
77
+ </steps>
78
+
79
+ Original question:
80
+ how old is jina ai ceo?
81
+
82
+ Your answer:
83
+ The age of the Jina AI CEO cannot be definitively determined from the provided information.
84
+
85
+ The evaluator thinks your answer is bad because:
86
+ The answer is not definitive and fails to provide the requested information. Lack of information is unacceptable, more search and deep reasoning is needed.
87
+ </input>
88
+
89
+
90
+ <output>
91
+ {
92
+ "recap": "The search process consisted of 7 steps with multiple search and visit actions. The initial searches focused on basic biographical information through LinkedIn and Crunchbase (steps 1-2). When this didn't yield the specific age information, additional searches were conducted for birthdate information (steps 3-5). The process showed signs of repetition in steps 4-5 with identical searches. Final visits to entertainment websites (step 6) suggested a loss of focus on reliable business sources.",
93
+
94
+ "blame": "The root cause of failure was getting stuck in a repetitive search pattern without adapting the strategy. Steps 4-5 repeated the same search, and step 6 deviated to less reliable entertainment sources instead of exploring business journals, news articles, or professional databases. Additionally, the process didn't attempt to triangulate age through indirect information like education history or career milestones.",
95
+
96
+ "improvement": "1. Avoid repeating identical searches and implement a strategy to track previously searched terms. 2. When direct age/birthdate searches fail, try indirect approaches like: searching for earliest career mentions, finding university graduation years, or identifying first company founding dates. 3. Focus on high-quality business sources and avoid entertainment websites for professional information. 4. Consider using industry event appearances or conference presentations where age-related context might be mentioned. 5. If exact age cannot be determined, provide an estimated range based on career timeline and professional achievements.",
97
+
98
+ "questionsToAnswer": [
99
+ "What alternative professional databases or news archives could provide reliable biographical information?",
100
+ "How can we use education history or career milestones to estimate age range?"
101
+ ]
102
+ }
103
+ </output>
104
+ </example>
105
+ Review the steps below carefully and generate your analysis following this format.
106
+
107
+ ${diaryContext.join('\n')}
108
+ `;
109
+ }
110
+
111
+ const TOOL_NAME = 'errorAnalyzer';
112
+ export async function analyzeSteps(
113
+ diaryContext: string[],
114
+ tracker?: TokenTracker
115
+ ): Promise<{ response: ErrorAnalysisResponse }> {
116
+ try {
117
+ const generator = new ObjectGeneratorSafe(tracker);
118
+ const prompt = getPrompt(diaryContext);
119
+
120
+ const result = await generator.generateObject({
121
+ model: TOOL_NAME,
122
+ schema: responseSchema,
123
+ prompt,
124
+ });
125
+
126
+ console.log(TOOL_NAME, result.object);
127
+
128
+ return { response: result.object };
129
+
130
+ } catch (error) {
131
+ console.error(`Error in ${TOOL_NAME}`, error);
132
+ throw error;
133
+ }
134
+ }
src/tools/evaluator.ts ADDED
@@ -0,0 +1,553 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {z} from 'zod';
2
+ import {GenerateObjectResult} from 'ai';
3
+ import {TokenTracker} from "../utils/token-tracker";
4
+ import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
5
+ import {readUrl, removeAllLineBreaks} from "./read";
6
+ import {ObjectGeneratorSafe} from "../utils/safe-generator";
7
+
8
+
9
+
10
+ const baseSchema = {
11
+ pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
12
+ think: z.string().describe('Explanation the thought process why the answer does not pass the evaluation criteria')
13
+ };
14
+
15
+ const definitiveSchema = z.object({
16
+ ...baseSchema,
17
+ type: z.literal('definitive')
18
+ });
19
+
20
+ const freshnessSchema = z.object({
21
+ ...baseSchema,
22
+ type: z.literal('freshness'),
23
+ freshness_analysis: z.object({
24
+ likely_outdated: z.boolean().describe('Whether the answer content is likely outdated based on dates and current time'),
25
+ dates_mentioned: z.array(z.string()).describe('All dates mentioned in the answer'),
26
+ current_time: z.string().describe('Current system time when evaluation was performed'),
27
+ max_age_days: z.number().optional().describe('Maximum allowed age in days before content is considered outdated')
28
+ })
29
+ });
30
+
31
+ const pluralitySchema = z.object({
32
+ ...baseSchema,
33
+ type: z.literal('plurality'),
34
+ plurality_analysis: z.object({
35
+ expects_multiple: z.boolean().describe('Whether the question asks for multiple items'),
36
+ provides_multiple: z.boolean().describe('Whether the answer provides multiple items'),
37
+ count_expected: z.number().optional().describe('Number of items expected if specified in question'),
38
+ count_provided: z.number().describe('Number of items provided in answer')
39
+ })
40
+ });
41
+
42
+ const attributionSchema = z.object({
43
+ ...baseSchema,
44
+ type: z.literal('attribution'),
45
+ attribution_analysis: z.object({
46
+ sources_provided: z.boolean().describe('Whether the answer provides source references'),
47
+ sources_verified: z.boolean().describe('Whether the provided sources contain the claimed information'),
48
+ quotes_accurate: z.boolean().describe('Whether the quotes accurately represent the source content')
49
+ })
50
+ });
51
+
52
+ function getAttributionPrompt(question: string, answer: string, sourceContent: string): string {
53
+ return `You are an evaluator that verifies if answer content is properly attributed to and supported by the provided sources.
54
+
55
+ <rules>
56
+ 1. Source Verification:
57
+ - Check if answer claims are supported by the provided source content
58
+ - Verify that quotes are accurate and in proper context
59
+ - Ensure numerical data and statistics match the source
60
+ - Flag any claims that go beyond what the sources support
61
+
62
+ 2. Attribution Analysis:
63
+ - Check if answer properly references its sources
64
+ - Verify that important claims have clear source attribution
65
+ - Ensure quotes are properly marked and cited
66
+ - Check for any unsupported generalizations
67
+
68
+ 3. Accuracy Requirements:
69
+ - Direct quotes must match source exactly
70
+ - Paraphrasing must maintain original meaning
71
+ - Statistics and numbers must be precise
72
+ - Context must be preserved
73
+ </rules>
74
+
75
+ <examples>
76
+ Question: "What are Jina AI's main products?"
77
+ Answer: "According to Jina AI's website, their main products are DocArray and Jina Framework."
78
+ Source Content: "Jina AI's flagship products include DocArray, Jina Framework, and JCloud, offering a complete ecosystem for neural search applications."
79
+ Evaluation: {
80
+ "pass": false,
81
+ "think": "The answer omits JCloud which is mentioned as a main product in the source. The information provided is incomplete and potentially misleading as it fails to mention a significant product from the company's ecosystem.",
82
+ "attribution_analysis": {
83
+ "sources_provided": true,
84
+ "sources_verified": false,
85
+ "quotes_accurate": false
86
+ }
87
+ }
88
+
89
+ Question: "When was Python first released?"
90
+ Answer: "Python was first released in 1991 by Guido van Rossum."
91
+ Source Content: "Python was first released in 1991 by Guido van Rossum while working at CWI."
92
+ Evaluation: {
93
+ "pass": true,
94
+ "think": "The answer accurately reflects the core information from the source about Python's release date and creator, though it omits the additional context about CWI which isn't essential to the question.",
95
+ "attribution_analysis": {
96
+ "sources_provided": true,
97
+ "sources_verified": true,
98
+ "quotes_accurate": true
99
+ }
100
+ }
101
+ </examples>
102
+
103
+ Now evaluate this pair:
104
+ Question: ${JSON.stringify(question)}
105
+ Answer: ${JSON.stringify(answer)}
106
+ Source Content: ${JSON.stringify(sourceContent)}`;
107
+ }
108
+
109
+ function getDefinitivePrompt(question: string, answer: string): string {
110
+ return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
111
+
112
+ <rules>
113
+ First, if the answer is not a direct response to the question, it must return false.
114
+ Definitiveness is the king! The following types of responses are NOT definitive and must return false:
115
+ 1. Expressions of uncertainty: "I don't know", "not sure", "might be", "probably"
116
+ 2. Lack of information statements: "doesn't exist", "lack of information", "could not find"
117
+ 3. Inability statements: "I cannot provide", "I am unable to", "we cannot"
118
+ 4. Negative statements that redirect: "However, you can...", "Instead, try..."
119
+ 5. Non-answers that suggest alternatives
120
+ </rules>
121
+
122
+ <examples>
123
+ Question: "What are the system requirements for running Python 3.9?"
124
+ Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
125
+ Evaluation: {
126
+ "pass": false,
127
+ "think": "The answer contains uncertainty markers like 'not entirely sure' and 'I think', making it non-definitive."
128
+ }
129
+
130
+ Question: "What are the system requirements for running Python 3.9?"
131
+ Answer: "Python 3.9 requires Windows 7 or later, macOS 10.11 or later, or Linux."
132
+ Evaluation: {
133
+ "pass": true,
134
+ "think": "The answer makes clear, definitive statements without uncertainty markers or ambiguity."
135
+ }
136
+
137
+ Question: "Who will be the president of the United States in 2032?"
138
+ Answer: "I cannot predict the future, it depends on the election results."
139
+ Evaluation: {
140
+ "pass": false,
141
+ "think": "The answer contains a statement of inability to predict the future, making it non-definitive."
142
+ }
143
+
144
+ Question: "Who is the sales director at Company X?"
145
+ Answer: "I cannot provide the name of the sales director, but you can contact their sales team at [email protected]"
146
+ Evaluation: {
147
+ "pass": false,
148
+ "think": "The answer starts with 'I cannot provide' and redirects to an alternative contact method instead of answering the original question."
149
+ }
150
+
151
+ Question: "what is the twitter account of jina ai's founder?"
152
+ Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
153
+ Evaluation: {
154
+ "pass": false,
155
+ "think": "The answer indicates a lack of information rather than providing a definitive response."
156
+ }
157
+ </examples>
158
+
159
+ Now evaluate this pair:
160
+ Question: ${JSON.stringify(question)}
161
+ Answer: ${JSON.stringify(answer)}`;
162
+ }
163
+
164
+ function getFreshnessPrompt(question: string, answer: string, currentTime: string): string {
165
+ return `You are an evaluator that analyzes if answer content is likely outdated based on mentioned dates and current time.
166
+
167
+ <rules>
168
+ 1. Date Analysis:
169
+ - Extract all dates mentioned in the answer
170
+ - Compare against current system time: ${currentTime}
171
+ - Consider content outdated if:
172
+ * It refers to a "latest" or "current" state from more than 30 days ago
173
+ * It mentions specific dates/events that have been superseded
174
+ * It contains time-sensitive information (e.g., "current CEO", "latest version") from more than 60 days ago
175
+ - For product versions, releases, or announcements, max age is 30 days
176
+ - For company positions, leadership, or general facts, max age is 60 days
177
+
178
+ 2. Context Hints:
179
+ - Words indicating recency: "latest", "current", "newest", "just released", "recently"
180
+ - Time-sensitive terms: "CEO", "price", "version", "release"
181
+ - Future dates should be ignored in outdated calculation
182
+ </rules>
183
+
184
+ <examples>
185
+ Question: "What is Jina AI's latest embedding model?"
186
+ Answer: "The latest embedding model from Jina AI is jina-embeddings-v2, released on March 15, 2024."
187
+ Current Time: "2024-10-06T00:00:00Z"
188
+ Evaluation: {
189
+ "pass": false,
190
+ "think": "The answer refers to a 'latest' model release from over 6 months ago, which is likely outdated for product version information",
191
+ "freshness_analysis": {
192
+ "likely_outdated": true,
193
+ "dates_mentioned": ["2024-03-15"],
194
+ "current_time": "2024-10-06T00:00:00Z",
195
+ "max_age_days": 30
196
+ }
197
+ }
198
+
199
+ Question: "Who is OpenAI's CEO?"
200
+ Answer: "Sam Altman is the CEO of OpenAI as of December 2023."
201
+ Current Time: "2024-02-06T00:00:00Z"
202
+ Evaluation: {
203
+ "pass": true,
204
+ "think": "The answer is about company leadership and is within the 60-day threshold for such information",
205
+ "freshness_analysis": {
206
+ "likely_outdated": false,
207
+ "dates_mentioned": ["2023-12"],
208
+ "current_time": "2024-02-06T00:00:00Z",
209
+ "max_age_days": 60
210
+ }
211
+ }
212
+ </examples>
213
+
214
+ Now evaluate this pair:
215
+ Question: ${JSON.stringify(question)}
216
+ Answer: ${JSON.stringify(answer)}`;
217
+ }
218
+
219
+ function getPluralityPrompt(question: string, answer: string): string {
220
+ return `You are an evaluator that analyzes if answers provide the appropriate number of items requested in the question.
221
+
222
+ <rules>
223
+ 1. Question Analysis:
224
+ - Check if question asks for multiple items using indicators like:
225
+ * Plural nouns: "companies", "people", "names"
226
+ * Quantifiers: "all", "many", "several", "various", "multiple"
227
+ * List requests: "list", "enumerate", "name all", "give me all"
228
+ * Numbers: "5 examples", "top 10"
229
+ - Otherwise skip the analysis and return pass to true
230
+
231
+ 2. Answer Analysis:
232
+ - Count distinct items provided in the answer
233
+ - Check if answer uses limiting words like "only", "just", "single"
234
+ - Identify if answer acknowledges there are more items but only provides some
235
+
236
+ 3. Definitiveness Rules:
237
+ - If question asks for multiple items but answer provides only one → NOT definitive
238
+ - If question asks for specific number (e.g., "top 5") but answer provides fewer → NOT definitive
239
+ - If answer clearly states it's providing a partial list → NOT definitive
240
+ - If question asks for "all" or "every" but answer seems incomplete → NOT definitive
241
+ </rules>
242
+
243
+ <examples>
244
+ Question: "Who works in Jina AI's sales team?"
245
+ Answer: "John Smith is a sales representative at Jina AI."
246
+ Evaluation: {
247
+ "pass": true,
248
+ "think": "The question doesn't specifically ask for multiple team members, so a single name can be considered a definitive answer.",
249
+ "plurality_analysis": {
250
+ "expects_multiple": false,
251
+ "provides_multiple": false,
252
+ "count_provided": 1
253
+ }
254
+ }
255
+
256
+ Question: "List all the salespeople who work at Jina AI"
257
+ Answer: "John Smith is a sales representative at Jina AI."
258
+ Evaluation: {
259
+ "pass": false,
260
+ "think": "The question asks for 'all salespeople' but the answer only provides one name without indicating if this is the complete list.",
261
+ "plurality_analysis": {
262
+ "expects_multiple": true,
263
+ "provides_multiple": false,
264
+ "count_provided": 1
265
+ }
266
+ }
267
+
268
+ Question: "Name the top 3 products sold by Jina AI"
269
+ Answer: "Jina AI's product lineup includes DocArray and Jina."
270
+ Evaluation: {
271
+ "pass": false,
272
+ "think": "The question asks for top 3 products but only 2 are provided.",
273
+ "plurality_analysis": {
274
+ "expects_multiple": true,
275
+ "provides_multiple": true,
276
+ "count_expected": 3,
277
+ "count_provided": 2
278
+ }
279
+ }
280
+
281
+ Question: "List as many AI companies in Berlin as you can find"
282
+ Answer: "Here are several AI companies in Berlin: Ada Health, Merantix, DeepL, Understand.ai, and Zeitgold. There are many more AI companies in Berlin, but these are some notable examples."
283
+ Evaluation: {
284
+ "pass": false,
285
+ "think": "While the answer provides multiple companies, it explicitly states it's an incomplete list when the question asks to list as many as possible.",
286
+ "plurality_analysis": {
287
+ "expects_multiple": true,
288
+ "provides_multiple": true,
289
+ "count_provided": 5
290
+ }
291
+ }
292
+ </examples>
293
+
294
+ Now evaluate this pair:
295
+ Question: ${JSON.stringify(question)}
296
+ Answer: ${JSON.stringify(answer)}`;
297
+ }
298
+
299
+
300
+ const questionEvaluationSchema = z.object({
301
+ needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
302
+ needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
303
+ reasoning: z.string().describe('Explanation of why these checks are needed or not needed'),
304
+ languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question'),
305
+ });
306
+
307
+ function getQuestionEvaluationPrompt(question: string): string {
308
+ return `You are an evaluator that determines if a question requires freshness and/or plurality checks in addition to the required definitiveness check.
309
+
310
+ <evaluation_types>
311
+ 1. freshness - Checks if the question is time-sensitive or requires very recent information
312
+ 2. plurality - Checks if the question asks for multiple items or a specific count or enumeration
313
+ 3. language style - Identifies both the language used and the overall vibe of the question
314
+ </evaluation_types>
315
+
316
+ <rules>
317
+ If question is a simple greeting, chit-chat, or general knowledge, provide the answer directly.
318
+
319
+ 1. Freshness Evaluation:
320
+ - Required for questions about current state, recent events, or time-sensitive information
321
+ - Required for: prices, versions, leadership positions, status updates
322
+ - Look for terms: "current", "latest", "recent", "now", "today", "new"
323
+ - Consider company positions, product versions, market data time-sensitive
324
+
325
+ 2. Plurality Evaluation:
326
+ - Required when question asks for multiple items or specific counts
327
+ - Check for: numbers ("5 examples"), plural nouns, list requests
328
+ - Look for: "all", "list", "enumerate", "examples", plural forms
329
+ - Required when question implies completeness ("all the reasons", "every factor")
330
+
331
+ 3. Language Style Analysis:
332
+ Combine both language and emotional vibe in a descriptive phrase, considering:
333
+ - Language: The primary language or mix of languages used
334
+ - Emotional tone: panic, excitement, frustration, curiosity, etc.
335
+ - Formality level: academic, casual, professional, etc.
336
+ - Domain context: technical, academic, social, etc.
337
+ </rules>
338
+
339
+ <examples>
340
+ Question: "fam PLEASE help me calculate the eigenvalues of this 4x4 matrix ASAP!! [matrix details] got an exam tmrw 😭"
341
+ Evaluation: {
342
+ "needsFreshness": false,
343
+ "needsPlurality": true,
344
+ "reasoning": "Multiple eigenvalues needed but no time-sensitive information required",
345
+ "languageStyle": "panicked student English with math jargon"
346
+ }
347
+
348
+ Question: "Can someone explain how tf did Ferrari mess up their pit stop strategy AGAIN?! 🤦‍♂️ #MonacoGP"
349
+ Evaluation: {
350
+ "needsFreshness": true,
351
+ "needsPlurality": true,
352
+ "reasoning": "Refers to recent race event and requires analysis of multiple strategic decisions",
353
+ "languageStyle": "frustrated fan English with F1 terminology"
354
+ }
355
+
356
+ Question: "肖老师您好,请您介绍一下最近��子计算领域的三个重大突破,特别是它们在密码学领域的应用价值吗?🤔"
357
+ Evaluation: {
358
+ "needsFreshness": true,
359
+ "needsPlurality": true,
360
+ "reasoning": "Asks for recent breakthroughs (freshness) and specifically requests three examples (plurality)",
361
+ "languageStyle": "formal technical Chinese with academic undertones"
362
+ }
363
+
364
+ Question: "Bruder krass, kannst du mir erklären warum meine neural network training loss komplett durchdreht? Hab schon alles probiert 😤"
365
+ Evaluation: {
366
+ "needsFreshness": false,
367
+ "needsPlurality": true,
368
+ "reasoning": "Requires comprehensive debugging analysis of multiple potential issues",
369
+ "languageStyle": "frustrated German-English tech slang"
370
+ }
371
+
372
+ Question: "Does anyone have insights into the sociopolitical implications of GPT-4's emergence in the Global South, particularly regarding indigenous knowledge systems and linguistic diversity? Looking for a nuanced analysis."
373
+ Evaluation: {
374
+ "needsFreshness": true,
375
+ "needsPlurality": true,
376
+ "reasoning": "Requires analysis of current impacts (freshness) across multiple dimensions: sociopolitical, cultural, and linguistic (plurality)",
377
+ "languageStyle": "formal academic English with sociological terminology"
378
+ }
379
+ </examples>
380
+
381
+ Now evaluate this question:
382
+ Question: ${JSON.stringify(question)}`;
383
+ }
384
+
385
+ const TOOL_NAME = 'evaluator';
386
+
387
+ export async function evaluateQuestion(
388
+ question: string,
389
+ tracker?: TokenTracker
390
+ ): Promise<EvaluationCriteria> {
391
+ try {
392
+ const generator = new ObjectGeneratorSafe(tracker);
393
+
394
+ const result = await generator.generateObject({
395
+ model: TOOL_NAME,
396
+ schema: questionEvaluationSchema,
397
+ prompt: getQuestionEvaluationPrompt(question),
398
+ });
399
+
400
+ console.log('Question Evaluation:', result.object);
401
+
402
+ // Always include definitive in types
403
+ const types: EvaluationType[] = ['definitive'];
404
+ if (result.object.needsFreshness) types.push('freshness');
405
+ if (result.object.needsPlurality) types.push('plurality');
406
+
407
+ console.log('Question Metrics:', types);
408
+
409
+ // Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
410
+ return {types, languageStyle: result.object.languageStyle};
411
+
412
+ } catch (error) {
413
+ console.error('Error in question evaluation:', error);
414
+ // Default to all evaluation types in case of error
415
+ return {types: ['definitive', 'freshness', 'plurality'], languageStyle: 'plain English'};
416
+ }
417
+ }
418
+
419
+
420
+ async function performEvaluation<T>(
421
+ evaluationType: EvaluationType,
422
+ params: {
423
+ schema: z.ZodType<T>;
424
+ prompt: string;
425
+ },
426
+ tracker?: TokenTracker
427
+ ): Promise<GenerateObjectResult<T>> {
428
+ const generator = new ObjectGeneratorSafe(tracker);
429
+
430
+ const result = await generator.generateObject({
431
+ model: TOOL_NAME,
432
+ schema: params.schema,
433
+ prompt: params.prompt,
434
+ });
435
+
436
+ console.log(`${evaluationType} ${TOOL_NAME}`, result.object);
437
+
438
+ return result as GenerateObjectResult<any>;
439
+ }
440
+
441
+
442
+ // Main evaluation function
443
+ export async function evaluateAnswer(
444
+ question: string,
445
+ action: AnswerAction,
446
+ evaluationCri: EvaluationCriteria,
447
+ tracker?: TokenTracker
448
+ ): Promise<{ response: EvaluationResponse }> {
449
+ let result;
450
+
451
+ // Only add attribution if we have valid references
452
+ if (action.references && action.references.length > 0) {
453
+ evaluationCri.types = ['attribution', ...evaluationCri.types];
454
+ }
455
+
456
+ for (const evaluationType of evaluationCri.types) {
457
+ switch (evaluationType) {
458
+ case 'attribution': {
459
+ // Safely handle references and ensure we have content
460
+ const urls = action.references?.map(ref => ref.url) ?? [];
461
+ const uniqueURLs = [...new Set(urls)];
462
+ const allKnowledge = await fetchSourceContent(uniqueURLs, tracker);
463
+
464
+ if (!allKnowledge.trim()) {
465
+ return {
466
+ response: {
467
+ pass: false,
468
+ think: "The answer does not provide any valid attribution references that could be verified. No accessible source content was found to validate the claims made in the answer.",
469
+ type: 'attribution',
470
+ }
471
+ };
472
+ }
473
+
474
+ result = await performEvaluation(
475
+ 'attribution',
476
+ {
477
+ schema: attributionSchema,
478
+ prompt: getAttributionPrompt(question, action.answer, allKnowledge),
479
+ },
480
+ tracker
481
+ );
482
+ break;
483
+ }
484
+
485
+ case 'definitive':
486
+ result = await performEvaluation(
487
+ 'definitive',
488
+ {
489
+ schema: definitiveSchema,
490
+ prompt: getDefinitivePrompt(question, action.answer),
491
+ },
492
+ tracker
493
+ );
494
+ break;
495
+
496
+ case 'freshness':
497
+ result = await performEvaluation(
498
+ 'freshness',
499
+ {
500
+ schema: freshnessSchema,
501
+ prompt: getFreshnessPrompt(question, action.answer, new Date().toISOString()),
502
+ },
503
+ tracker
504
+ );
505
+ break;
506
+
507
+ case 'plurality':
508
+ result = await performEvaluation(
509
+ 'plurality',
510
+ {
511
+ schema: pluralitySchema,
512
+ prompt: getPluralityPrompt(question, action.answer),
513
+ },
514
+ tracker
515
+ );
516
+ break;
517
+ }
518
+
519
+ if (!result?.object.pass) {
520
+ return {response: result.object};
521
+ }
522
+ }
523
+
524
+ return {response: result!.object};
525
+ }
526
+
527
+ // Helper function to fetch and combine source content
528
+ async function fetchSourceContent(urls: string[], tracker?: TokenTracker): Promise<string> {
529
+ if (!urls.length) return '';
530
+
531
+ try {
532
+ const results = await Promise.all(
533
+ urls.map(async (url) => {
534
+ try {
535
+ const {response} = await readUrl(url, tracker);
536
+ const content = response?.data?.content || '';
537
+ return removeAllLineBreaks(content);
538
+ } catch (error) {
539
+ console.error('Error reading URL:', error);
540
+ return '';
541
+ }
542
+ })
543
+ );
544
+
545
+ // Filter out empty results and join with proper separation
546
+ return results
547
+ .filter(content => content.trim())
548
+ .join('\n\n');
549
+ } catch (error) {
550
+ console.error('Error fetching source content:', error);
551
+ return '';
552
+ }
553
+ }
src/tools/grounding.ts ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { generateText } from 'ai';
2
+ import {getModel} from "../config";
3
+ import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
4
+ import {TokenTracker} from "../utils/token-tracker";
5
+
6
+ const model = getModel('searchGrounding')
7
+
8
+ export async function grounding(query: string, tracker?: TokenTracker): Promise<string> {
9
+ try {
10
+ const { text, experimental_providerMetadata, usage } = await generateText({
11
+ model,
12
+ prompt:
13
+ `Current date is ${new Date().toISOString()}. Find the latest answer to the following question:
14
+ <query>
15
+ ${query}
16
+ </query>
17
+ Must include the date and time of the latest answer.`,
18
+ });
19
+
20
+ const metadata = experimental_providerMetadata?.google as
21
+ | GoogleGenerativeAIProviderMetadata
22
+ | undefined;
23
+ const groundingMetadata = metadata?.groundingMetadata;
24
+
25
+ // Extract and concatenate all groundingSupport text into a single line
26
+ const groundedText = groundingMetadata?.groundingSupports
27
+ ?.map(support => support.segment.text)
28
+ .join(' ') || '';
29
+
30
+ (tracker || new TokenTracker()).trackUsage('grounding', usage);
31
+ console.log('Grounding:', {text, groundedText});
32
+ return text + '|' + groundedText;
33
+
34
+ } catch (error) {
35
+ console.error('Error in search:', error);
36
+ throw error;
37
+ }
38
+ }
src/tools/jina-dedup.ts ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import axios, {AxiosError} from 'axios';
2
+ import {TokenTracker} from "../utils/token-tracker";
3
+ import {JINA_API_KEY} from "../config";
4
+
5
+ const JINA_API_URL = 'https://api.jina.ai/v1/embeddings';
6
+ const SIMILARITY_THRESHOLD = 0.93; // Adjustable threshold for cosine similarity
7
+
8
+ const JINA_API_CONFIG = {
9
+ MODEL: 'jina-embeddings-v3',
10
+ TASK: 'text-matching',
11
+ DIMENSIONS: 1024,
12
+ EMBEDDING_TYPE: 'float',
13
+ LATE_CHUNKING: false
14
+ } as const;
15
+
16
+ // Types for Jina API
17
+ interface JinaEmbeddingRequest {
18
+ model: string;
19
+ task: string;
20
+ late_chunking: boolean;
21
+ dimensions: number;
22
+ embedding_type: string;
23
+ input: string[];
24
+ }
25
+
26
+ interface JinaEmbeddingResponse {
27
+ model: string;
28
+ object: string;
29
+ usage: {
30
+ total_tokens: number;
31
+ prompt_tokens: number;
32
+ };
33
+ data: Array<{
34
+ object: string;
35
+ index: number;
36
+ embedding: number[];
37
+ }>;
38
+ }
39
+
40
+
41
+ // Compute cosine similarity between two vectors
42
+ function cosineSimilarity(vecA: number[], vecB: number[]): number {
43
+ const dotProduct = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0);
44
+ const normA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));
45
+ const normB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));
46
+ return dotProduct / (normA * normB);
47
+ }
48
+
49
+ // Get embeddings for all queries in one batch
50
+ async function getEmbeddings(queries: string[]): Promise<{ embeddings: number[][], tokens: number }> {
51
+ if (!JINA_API_KEY) {
52
+ throw new Error('JINA_API_KEY is not set');
53
+ }
54
+
55
+ const request: JinaEmbeddingRequest = {
56
+ model: JINA_API_CONFIG.MODEL,
57
+ task: JINA_API_CONFIG.TASK,
58
+ late_chunking: JINA_API_CONFIG.LATE_CHUNKING,
59
+ dimensions: JINA_API_CONFIG.DIMENSIONS,
60
+ embedding_type: JINA_API_CONFIG.EMBEDDING_TYPE,
61
+ input: queries
62
+ };
63
+
64
+ try {
65
+ const response = await axios.post<JinaEmbeddingResponse>(
66
+ JINA_API_URL,
67
+ request,
68
+ {
69
+ headers: {
70
+ 'Content-Type': 'application/json',
71
+ 'Authorization': `Bearer ${JINA_API_KEY}`
72
+ }
73
+ }
74
+ );
75
+
76
+ // Validate response format
77
+ if (!response.data.data || response.data.data.length !== queries.length) {
78
+ console.error('Invalid response from Jina API:', response.data);
79
+ return {
80
+ embeddings: [],
81
+ tokens: 0
82
+ };
83
+ }
84
+
85
+ // Sort embeddings by index to maintain original order
86
+ const embeddings = response.data.data
87
+ .sort((a, b) => a.index - b.index)
88
+ .map(item => item.embedding);
89
+
90
+ return {
91
+ embeddings,
92
+ tokens: response.data.usage.total_tokens
93
+ };
94
+ } catch (error) {
95
+ console.error('Error getting embeddings from Jina:', error);
96
+ if (error instanceof AxiosError && error.response?.status === 402) {
97
+ return {
98
+ embeddings: [],
99
+ tokens: 0
100
+ };
101
+ }
102
+ throw error;
103
+ }
104
+ }
105
+
106
+ export async function dedupQueries(
107
+ newQueries: string[],
108
+ existingQueries: string[],
109
+ tracker?: TokenTracker
110
+ ): Promise<{ unique_queries: string[] }> {
111
+ try {
112
+ // Quick return for single new query with no existing queries
113
+ if (newQueries.length === 1 && existingQueries.length === 0) {
114
+ return {
115
+ unique_queries: newQueries,
116
+ };
117
+ }
118
+
119
+ // Get embeddings for all queries in one batch
120
+ const allQueries = [...newQueries, ...existingQueries];
121
+ const {embeddings: allEmbeddings, tokens} = await getEmbeddings(allQueries);
122
+
123
+ // If embeddings is empty (due to 402 error), return all new queries
124
+ if (!allEmbeddings.length) {
125
+ return {
126
+ unique_queries: newQueries,
127
+ };
128
+ }
129
+
130
+ // Split embeddings back into new and existing
131
+ const newEmbeddings = allEmbeddings.slice(0, newQueries.length);
132
+ const existingEmbeddings = allEmbeddings.slice(newQueries.length);
133
+
134
+ const uniqueQueries: string[] = [];
135
+ const usedIndices = new Set<number>();
136
+
137
+ // Compare each new query against existing queries and already accepted queries
138
+ for (let i = 0; i < newQueries.length; i++) {
139
+ let isUnique = true;
140
+
141
+ // Check against existing queries
142
+ for (let j = 0; j < existingQueries.length; j++) {
143
+ const similarity = cosineSimilarity(newEmbeddings[i], existingEmbeddings[j]);
144
+ if (similarity >= SIMILARITY_THRESHOLD) {
145
+ isUnique = false;
146
+ break;
147
+ }
148
+ }
149
+
150
+ // Check against already accepted queries
151
+ if (isUnique) {
152
+ for (const usedIndex of usedIndices) {
153
+ const similarity = cosineSimilarity(newEmbeddings[i], newEmbeddings[usedIndex]);
154
+ if (similarity >= SIMILARITY_THRESHOLD) {
155
+ isUnique = false;
156
+ break;
157
+ }
158
+ }
159
+ }
160
+
161
+ // Add to unique queries if passed all checks
162
+ if (isUnique) {
163
+ uniqueQueries.push(newQueries[i]);
164
+ usedIndices.add(i);
165
+ }
166
+ }
167
+
168
+ // Track token usage from the API
169
+ (tracker || new TokenTracker()).trackUsage('dedup', {
170
+ promptTokens: tokens,
171
+ completionTokens: 0,
172
+ totalTokens: tokens
173
+ });
174
+ console.log('Dedup:', uniqueQueries);
175
+ return {
176
+ unique_queries: uniqueQueries,
177
+ };
178
+ } catch (error) {
179
+ console.error('Error in deduplication analysis:', error);
180
+ throw error;
181
+ }
182
+ }
src/tools/jina-search.ts ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import https from 'https';
2
+ import { TokenTracker } from "../utils/token-tracker";
3
+ import { SearchResponse } from '../types';
4
+ import { JINA_API_KEY } from "../config";
5
+
6
+ export function search(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse}> {
7
+ return new Promise((resolve, reject) => {
8
+ if (!query.trim()) {
9
+ reject(new Error('Query cannot be empty'));
10
+ return;
11
+ }
12
+
13
+ const options = {
14
+ hostname: 's.jina.ai',
15
+ port: 443,
16
+ path: `/${encodeURIComponent(query)}?count=0`,
17
+ method: 'GET',
18
+ headers: {
19
+ 'Accept': 'application/json',
20
+ 'Authorization': `Bearer ${JINA_API_KEY}`,
21
+ 'X-Retain-Images': 'none'
22
+ }
23
+ };
24
+
25
+ const req = https.request(options, (res) => {
26
+ let responseData = '';
27
+
28
+ res.on('data', (chunk) => responseData += chunk);
29
+
30
+ res.on('end', () => {
31
+ // Check HTTP status code first
32
+ if (res.statusCode && res.statusCode >= 400) {
33
+ try {
34
+ // Try to parse error message from response if available
35
+ const errorResponse = JSON.parse(responseData);
36
+ if (res.statusCode === 402) {
37
+ reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
38
+ return;
39
+ }
40
+ reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
41
+ } catch {
42
+ // If parsing fails, just return the status code
43
+ reject(new Error(`HTTP Error ${res.statusCode}`));
44
+ }
45
+ return;
46
+ }
47
+
48
+ // Only parse JSON for successful responses
49
+ let response: SearchResponse;
50
+ try {
51
+ response = JSON.parse(responseData) as SearchResponse;
52
+ } catch (error: unknown) {
53
+ reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
54
+ return;
55
+ }
56
+
57
+ if (!response.data || !Array.isArray(response.data)) {
58
+ reject(new Error('Invalid response format'));
59
+ return;
60
+ }
61
+
62
+ const totalTokens = response.data.reduce((sum, item) => sum + (item.usage?.tokens || 0), 0);
63
+ console.log('Total URLs:', response.data.length);
64
+
65
+ const tokenTracker = tracker || new TokenTracker();
66
+ tokenTracker.trackUsage('search', {
67
+ totalTokens,
68
+ promptTokens: query.length,
69
+ completionTokens: totalTokens
70
+ });
71
+
72
+ resolve({ response });
73
+ });
74
+ });
75
+
76
+ // Add timeout handling
77
+ req.setTimeout(30000, () => {
78
+ req.destroy();
79
+ reject(new Error('Request timed out'));
80
+ });
81
+
82
+ req.on('error', (error) => {
83
+ reject(new Error(`Request failed: ${error.message}`));
84
+ });
85
+
86
+ req.end();
87
+ });
88
+ }
src/tools/query-rewriter.ts ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { z } from 'zod';
2
+ import { TokenTracker } from "../utils/token-tracker";
3
+ import { SearchAction } from '../types';
4
+ import {ObjectGeneratorSafe} from "../utils/safe-generator";
5
+
6
+
7
+ const responseSchema = z.object({
8
+ think: z.string().describe('Strategic reasoning about query complexity and search approach'),
9
+ queries: z.array(z.string().describe('Search query, must be less than 30 characters'))
10
+ .min(1)
11
+ .max(3)
12
+ .describe('Array of search queries, orthogonal to each other')
13
+ });
14
+
15
+
16
+
17
+ function getPrompt(action: SearchAction): string {
18
+ return `You are an expert Information Retrieval query optimizer. Optimize user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
19
+
20
+ <rules>
21
+ 1. Generate search queries that directly include appropriate operators
22
+ 2. Keep base keywords minimal: 2-3 words preferred
23
+ 3. Use exact match quotes for specific phrases that must stay together
24
+ 4. Split queries only when necessary for distinctly different aspects
25
+ 5. Preserve crucial qualifiers while removing fluff words
26
+ 6. Make the query resistant to SEO manipulation
27
+ 7. When necessary, append <query-operators> at the end only when must needed
28
+
29
+
30
+ <query-operators>
31
+ A query can't only have operators; and operators can't be at the start a query;
32
+
33
+ - "phrase" : exact match for phrases
34
+ - +term : must include term; for critical terms that must appear
35
+ - -term : exclude term; exclude irrelevant or ambiguous terms
36
+ - filetype:pdf/doc : specific file type
37
+ - site:example.com : limit to specific site
38
+ - lang:xx : language filter (ISO 639-1 code)
39
+ - loc:xx : location filter (ISO 3166-1 code)
40
+ - intitle:term : term must be in title
41
+ - inbody:term : term must be in body text
42
+ </query-operators>
43
+
44
+ </rules>
45
+
46
+ <examples>
47
+ Input Query: What's the difference between ReactJS and Vue.js for building web applications?
48
+ <think>
49
+ This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
50
+ </think>
51
+ Queries: [
52
+ "react performance",
53
+ "vue performance",
54
+ "react vue comparison",
55
+ ]
56
+
57
+ Input Query: How to fix a leaking kitchen faucet?
58
+ <think>
59
+ This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
60
+ </think>
61
+ Output Queries: [
62
+ "kitchen faucet leak repair",
63
+ "faucet drip fix site:youtube.com",
64
+ "how to repair faucet "
65
+ ]
66
+
67
+ Input Query: What are healthy breakfast options for type 2 diabetes?
68
+ <think>
69
+ This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
70
+ </think>
71
+ Output Queries: [
72
+ "what to eat for type 2 diabetes",
73
+ "type 2 diabetes breakfast guidelines",
74
+ "diabetic breakfast recipes"
75
+ ]
76
+
77
+ Input Query: Latest AWS Lambda features for serverless applications
78
+ <think>
79
+ This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
80
+ </think>
81
+ Output Queries: [
82
+ "aws lambda features site:aws.amazon.com intitle:2025",
83
+ "new features lambda serverless"
84
+ ]
85
+ </examples>
86
+
87
+ Now, process this query:
88
+ Input Query: ${action.searchQuery}
89
+ Intention: ${action.think}
90
+ `;
91
+ }
92
+
93
+ const TOOL_NAME = 'queryRewriter';
94
+
95
+ export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker): Promise<{ queries: string[] }> {
96
+ try {
97
+ const generator = new ObjectGeneratorSafe(tracker);
98
+ const prompt = getPrompt(action);
99
+
100
+ const result = await generator.generateObject({
101
+ model: TOOL_NAME,
102
+ schema: responseSchema,
103
+ prompt,
104
+ });
105
+
106
+ console.log(TOOL_NAME, result.object.queries);
107
+ return { queries: result.object.queries };
108
+ } catch (error) {
109
+ console.error(`Error in ${TOOL_NAME}`, error);
110
+ throw error;
111
+ }
112
+ }
src/tools/read.ts ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import https from 'https';
2
+ import { TokenTracker } from "../utils/token-tracker";
3
+ import { ReadResponse } from '../types';
4
+ import { JINA_API_KEY } from "../config";
5
+
6
+ export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse }> {
7
+ return new Promise((resolve, reject) => {
8
+ if (!url.trim()) {
9
+ reject(new Error('URL cannot be empty'));
10
+ return;
11
+ }
12
+
13
+ const data = JSON.stringify({ url });
14
+
15
+ const options = {
16
+ hostname: 'r.jina.ai',
17
+ port: 443,
18
+ path: '/',
19
+ method: 'POST',
20
+ headers: {
21
+ 'Accept': 'application/json',
22
+ 'Authorization': `Bearer ${JINA_API_KEY}`,
23
+ 'Content-Type': 'application/json',
24
+ 'Content-Length': data.length,
25
+ 'X-Retain-Images': 'none',
26
+ 'X-Return-Format': 'markdown'
27
+ }
28
+ };
29
+
30
+ const req = https.request(options, (res) => {
31
+ let responseData = '';
32
+
33
+ res.on('data', (chunk) => responseData += chunk);
34
+
35
+ res.on('end', () => {
36
+ // Check HTTP status code first
37
+ if (res.statusCode && res.statusCode >= 400) {
38
+ try {
39
+ // Try to parse error message from response if available
40
+ const errorResponse = JSON.parse(responseData);
41
+ if (res.statusCode === 402) {
42
+ reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
43
+ return;
44
+ }
45
+ reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
46
+ } catch (error: unknown) {
47
+ // If parsing fails, just return the status code
48
+ reject(new Error(`HTTP Error ${res.statusCode}`));
49
+ }
50
+ return;
51
+ }
52
+
53
+ // Only parse JSON for successful responses
54
+ let response: ReadResponse;
55
+ try {
56
+ response = JSON.parse(responseData) as ReadResponse;
57
+ } catch (error: unknown) {
58
+ reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
59
+ return;
60
+ }
61
+
62
+ if (!response.data) {
63
+ reject(new Error('Invalid response data'));
64
+ return;
65
+ }
66
+
67
+ console.log('Read:', {
68
+ title: response.data.title,
69
+ url: response.data.url,
70
+ tokens: response.data.usage?.tokens || 0
71
+ });
72
+
73
+ const tokens = response.data.usage?.tokens || 0;
74
+ const tokenTracker = tracker || new TokenTracker();
75
+ tokenTracker.trackUsage('read', {
76
+ totalTokens: tokens,
77
+ promptTokens: url.length,
78
+ completionTokens: tokens
79
+ });
80
+
81
+ resolve({ response });
82
+ });
83
+ });
84
+
85
+ // Add timeout handling
86
+ req.setTimeout(30000, () => {
87
+ req.destroy();
88
+ reject(new Error('Request timed out'));
89
+ });
90
+
91
+ req.on('error', (error: Error) => {
92
+ reject(new Error(`Request failed: ${error.message}`));
93
+ });
94
+
95
+ req.write(data);
96
+ req.end();
97
+ });
98
+ }
99
+
100
+ export function removeAllLineBreaks(text: string) {
101
+ return text.replace(/(\r\n|\n|\r)/gm, " ");
102
+ }
src/types.ts ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Action Types
2
+ import {CoreAssistantMessage, CoreUserMessage, LanguageModelUsage} from "ai";
3
+
4
+ type BaseAction = {
5
+ action: "search" | "answer" | "reflect" | "visit";
6
+ think: string;
7
+ };
8
+
9
+ export type SearchAction = BaseAction & {
10
+ action: "search";
11
+ searchQuery: string;
12
+ };
13
+
14
+ export type AnswerAction = BaseAction & {
15
+ action: "answer";
16
+ answer: string;
17
+ references: Array<{
18
+ exactQuote: string;
19
+ url: string;
20
+ }>;
21
+ isFinal?: boolean;
22
+ };
23
+
24
+
25
+ export type KnowledgeItem = {
26
+ question: string,
27
+ answer: string,
28
+ references?: Array<{
29
+ exactQuote: string;
30
+ url: string;
31
+ }> | Array<any>;
32
+ type: 'qa' | 'side-info' | 'chat-history' | 'url',
33
+ updated: string,
34
+ }
35
+
36
+ export type ReflectAction = BaseAction & {
37
+ action: "reflect";
38
+ questionsToAnswer: string[];
39
+ };
40
+
41
+ export type VisitAction = BaseAction & {
42
+ action: "visit";
43
+ URLTargets: string[];
44
+ };
45
+
46
+ export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
47
+
48
+ export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
49
+ export type EvaluationCriteria = {
50
+ types: EvaluationType[];
51
+ languageStyle: string;
52
+ };
53
+
54
+ // Following Vercel AI SDK's token counting interface
55
+ export interface TokenUsage {
56
+ tool: string;
57
+ usage: LanguageModelUsage;
58
+ }
59
+
60
+ export interface SearchResponse {
61
+ code: number;
62
+ status: number;
63
+ data: Array<{
64
+ title: string;
65
+ description: string;
66
+ url: string;
67
+ content: string;
68
+ usage: { tokens: number; };
69
+ }> | null;
70
+ name?: string;
71
+ message?: string;
72
+ readableMessage?: string;
73
+ }
74
+
75
+ export interface BraveSearchResponse {
76
+ web: {
77
+ results: Array<{
78
+ title: string;
79
+ description: string;
80
+ url: string;
81
+ }>;
82
+ };
83
+ }
84
+
85
+ export type DedupResponse = {
86
+ think: string;
87
+ unique_queries: string[];
88
+ };
89
+
90
+ export interface ReadResponse {
91
+ code: number;
92
+ status: number;
93
+ data?: {
94
+ title: string;
95
+ description: string;
96
+ url: string;
97
+ content: string;
98
+ usage: { tokens: number; };
99
+ };
100
+ name?: string;
101
+ message?: string;
102
+ readableMessage?: string;
103
+ }
104
+
105
+
106
+ export type EvaluationResponse = {
107
+ pass: boolean;
108
+ think: string;
109
+ type?: 'definitive' | 'freshness' | 'plurality' | 'attribution';
110
+ freshness_analysis?: {
111
+ likely_outdated: boolean;
112
+ dates_mentioned: string[];
113
+ current_time: string;
114
+ max_age_days?: number;
115
+ };
116
+ plurality_analysis?: {
117
+ expects_multiple: boolean;
118
+ provides_multiple: boolean;
119
+ count_expected?: number;
120
+ count_provided: number;
121
+ };
122
+ };
123
+
124
+ export type ErrorAnalysisResponse = {
125
+ recap: string;
126
+ blame: string;
127
+ improvement: string;
128
+ questionsToAnswer: string[];
129
+ };
130
+
131
+ export interface SearchResult {
132
+ title: string;
133
+ url: string;
134
+ description: string;
135
+ }
136
+
137
+ export interface QueryResult {
138
+ query: string;
139
+ results: SearchResult[];
140
+ }
141
+
142
+ export interface StepData {
143
+ step: number;
144
+ question: string;
145
+ action: string;
146
+ reasoning: string;
147
+ searchQuery?: string;
148
+ result?: QueryResult[];
149
+ }
150
+
151
+ export type KeywordsResponse = {
152
+ think: string;
153
+ queries: string[];
154
+ };
155
+
156
+ export interface StreamMessage {
157
+ type: 'progress' | 'answer' | 'error';
158
+ data: string | StepAction;
159
+ step?: number;
160
+ budget?: {
161
+ used: number;
162
+ total: number;
163
+ percentage: string;
164
+ };
165
+ }
166
+
167
+ // OpenAI API Types
168
+ export interface Model {
169
+ id: string;
170
+ object: 'model';
171
+ created: number;
172
+ owned_by: string;
173
+ }
174
+
175
+ export interface ChatCompletionRequest {
176
+ model: string;
177
+ messages: Array<CoreUserMessage | CoreAssistantMessage>;
178
+ stream?: boolean;
179
+ reasoning_effort?: 'low' | 'medium' | 'high' | null;
180
+ max_completion_tokens?: number | null;
181
+ }
182
+
183
+ export interface ChatCompletionResponse {
184
+ id: string;
185
+ object: 'chat.completion';
186
+ created: number;
187
+ model: string;
188
+ system_fingerprint: string;
189
+ choices: Array<{
190
+ index: number;
191
+ message: {
192
+ role: 'assistant';
193
+ content: string;
194
+ };
195
+ logprobs: null;
196
+ finish_reason: 'stop';
197
+ }>;
198
+ usage: {
199
+ prompt_tokens: number;
200
+ completion_tokens: number;
201
+ total_tokens: number;
202
+ };
203
+ }
204
+
205
+ export interface ChatCompletionChunk {
206
+ id: string;
207
+ object: 'chat.completion.chunk';
208
+ created: number;
209
+ model: string;
210
+ system_fingerprint: string;
211
+ choices: Array<{
212
+ index: number;
213
+ delta: {
214
+ role?: 'assistant';
215
+ content?: string;
216
+ };
217
+ logprobs: null;
218
+ finish_reason: null | 'stop';
219
+ }>;
220
+ usage?: any;
221
+ }
222
+
223
+ // Tracker Types
224
+ import {TokenTracker} from './utils/token-tracker';
225
+ import {ActionTracker} from './utils/action-tracker';
226
+
227
+ export interface TrackerContext {
228
+ tokenTracker: TokenTracker;
229
+ actionTracker: ActionTracker;
230
+ }