Spaces:

zhengr
/

DeepResearch

Sleeping

App Files Files Community

zhengr commited on Feb 14

Commit

0bcc252

1 Parent(s): cd76fd8

init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +45 -0
LICENSE +193 -0
README2.md +328 -0
config.json +63 -0
docker-compose.yml +14 -0
jest.config.js +6 -0
jest.setup.js +1 -0
jina-ai/.dockerignore +1 -0
jina-ai/Dockerfile +50 -0
jina-ai/config.json +69 -0
jina-ai/package-lock.json +0 -0
jina-ai/package.json +39 -0
jina-ai/src/dto/jina-embeddings-auth.ts +347 -0
jina-ai/src/lib/async-context.ts +9 -0
jina-ai/src/lib/billing.ts +102 -0
jina-ai/src/lib/env-config.ts +59 -0
jina-ai/src/lib/errors.ts +70 -0
jina-ai/src/lib/firestore.ts +223 -0
jina-ai/src/lib/logger.ts +56 -0
jina-ai/src/lib/registry.ts +4 -0
jina-ai/src/patch-express.ts +162 -0
jina-ai/src/rate-limit.ts +278 -0
jina-ai/src/server.ts +56 -0
jina-ai/tsconfig.json +17 -0
package-lock.json +0 -0
package.json +63 -0
src/__tests__/agent.test.ts +51 -0
src/__tests__/docker.test.ts +41 -0
src/__tests__/server.test.ts +300 -0
src/agent.ts +774 -0
src/app.ts +646 -0
src/cli.ts +48 -0
src/config.ts +158 -0
src/evals/batch-evals.ts +209 -0
src/evals/ego-questions.json +82 -0
src/server.ts +15 -0
src/tools/__tests__/error-analyzer.test.ts +31 -0
src/tools/__tests__/evaluator.test.ts +62 -0
src/tools/__tests__/read.test.ts +21 -0
src/tools/__tests__/search.test.ts +24 -0
src/tools/brave-search.ts +22 -0
src/tools/dedup.ts +89 -0
src/tools/error-analyzer.ts +134 -0
src/tools/evaluator.ts +553 -0
src/tools/grounding.ts +38 -0
src/tools/jina-dedup.ts +182 -0
src/tools/jina-search.ts +88 -0
src/tools/query-rewriter.ts +112 -0
src/tools/read.ts +102 -0
src/types.ts +230 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,45 @@

+# ---- BUILD STAGE ----
+FROM node:20-slim AS builder
+# Set working directory
+WORKDIR /app
+# Copy package.json and package-lock.json
+COPY package*.json ./
+# Install dependencies
+RUN npm install --ignore-scripts
+# Copy application code
+COPY . .
+# Build the application
+RUN npm run build --ignore-scripts
+# ---- PRODUCTION STAGE ----
+FROM node:20-slim AS production
+# Set working directory
+WORKDIR /app
+# Copy package.json and package-lock.json
+COPY package*.json ./
+# Install production dependencies only
+RUN npm install --production  --ignore-scripts
+# Copy config.json and built files from builder
+COPY --from=builder /app/config.json ./
+COPY --from=builder /app/dist ./dist
+# Set environment variables (Recommended to set at runtime, avoid hardcoding)
+ENV GEMINI_API_KEY=${GEMINI_API_KEY}
+ENV OPENAI_API_KEY=${OPENAI_API_KEY}
+ENV JINA_API_KEY=${JINA_API_KEY}
+ENV BRAVE_API_KEY=${BRAVE_API_KEY}
+# Expose the port the app runs on
+EXPOSE 3000
+# Set startup command
+CMD ["node", "./dist/server.js"]

LICENSE ADDED Viewed

	@@ -0,0 +1,193 @@

+Copyright 2020-2025 Jina AI Limited.  All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   Copyright 2020-2021 Jina AI Limited
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README2.md ADDED Viewed

	@@ -0,0 +1,328 @@

+# DeepResearch
+[Demo](https://jina.ai/deepsearch#demo) | [API](#official-api) | [Evaluation](#evaluation)
+Keep searching, reading webpages, reasoning until an answer is found (or the token budget is exceeded). Useful for deeply investigating a query.
+```mermaid
+---
+config:
+  theme: mc
+  look: handDrawn
+---
+flowchart LR
+ subgraph Loop["until budget exceed"]
+    direction LR
+        Search["Search"]
+        Read["Read"]
+        Reason["Reason"]
+  end
+    Query(["Query"]) --> Loop
+    Search --> Read
+    Read --> Reason
+    Reason --> Search
+    Loop --> Answer(["Answer"])
+```
+Unlike OpenAI and Gemini's Deep Research capabilities, we focus solely on **delivering accurate answers through our iterative process**. We don't optimize for long-form articles – if you need quick, precise answers from deep search, you're in the right place. If you're looking for AI-generated reports like OpenAI/Gemini do, this isn't for you.
+## Install
+```bash
+git clone https://github.com/jina-ai/node-DeepResearch.git
+cd node-DeepResearch
+npm install
+```
+[安装部署视频教程 on Youtube](https://youtu.be/vrpraFiPUyA)
+It is also available on npm but not recommended for now, as the code is still under active development.
+## Usage
+We use Gemini (latest `gemini-2.0-flash`) / OpenAI / [LocalLLM](#use-local-llm) for reasoning, [Jina Reader](https://jina.ai/reader) for searching and reading webpages, you can get a free API key with 1M tokens from jina.ai.
+```bash
+export GEMINI_API_KEY=...  # for gemini
+# export OPENAI_API_KEY=... # for openai
+# export LLM_PROVIDER=openai # for openai
+export JINA_API_KEY=jina_...  # free jina api key, get from https://jina.ai/reader
+npm run dev $QUERY
+```
+### Official API
+You can also use our official DeepSearch API, hosted and optimized by Jina AI:
+```
+https://deepsearch.jina.ai/v1/chat/completions
+```
+You can use it with any OpenAI-compatible client. For the authentication Bearer, get your Jina API key from https://jina.ai
+#### Client integration guidelines
+If you are building a web/local/mobile client that uses `Jina DeepSearch API`, here are some design guidelines:
+- Our API is fully compatible with [OpenAI API schema](https://platform.openai.com/docs/api-reference/chat/create), this should greatly simplify the integration process. The model name is `jina-deepsearch-v1`.
+- Our DeepSearch API is a reasoning+search grounding LLM, so it's best for questions that require deep reasoning and search.
+- Two special tokens are introduced `<think>...</think>`, `<references>...</references>`. Please render them with care.
+- Guide the user to get a Jina API key from https://jina.ai, with 1M free tokens for new API key.
+- There are rate limits, [between 10RPM to 30RPM depending on the API key tier](https://jina.ai/contact-sales#rate-limit).
+- [Download Jina AI logo here](https://jina.ai/logo-Jina-1024.zip)
+## Demo
+> was recorded with `gemini-1.5-flash`, the latest `gemini-2.0-flash` leads to much better results!
+Query: `"what is the latest blog post's title from jina ai?"`
+3 steps; answer is correct!
+![demo1](.github/visuals/demo.gif)
+Query: `"what is the context length of readerlm-v2?"`
+2 steps; answer is correct!
+![demo1](.github/visuals/demo3.gif)
+Query: `"list all employees from jina ai that u can find, as many as possible"`
+11 steps; partially correct! but im not in the list :(
+![demo1](.github/visuals/demo2.gif)
+Query: `"who will be the biggest competitor of Jina AI"`
+42 steps; future prediction kind, so it's arguably correct! atm Im not seeing `weaviate` as a competitor, but im open for the future "i told you so" moment.
+![demo1](.github/visuals/demo4.gif)
+More examples:
+```
+# example: no tool calling
+npm run dev "1+1="
+npm run dev "what is the capital of France?"
+# example: 2-step
+npm run dev "what is the latest news from Jina AI?"
+# example: 3-step
+npm run dev "what is the twitter account of jina ai's founder"
+# example: 13-step, ambiguious question (no def of "big")
+npm run dev "who is bigger? cohere, jina ai, voyage?"
+# example: open question, research-like, long chain of thoughts
+npm run dev "who will be president of US in 2028?"
+npm run dev "what should be jina ai strategy for 2025?"
+```
+## Use Local LLM
+> Note, not every LLM works with our reasoning flow, we need those who support structured output (sometimes called JSON Schema output, object output) well. Feel free to purpose a PR to add more open-source LLMs to the working list.
+If you use Ollama or LMStudio, you can redirect the reasoning request to your local LLM by setting the following environment variables:
+```bash
+export LLM_PROVIDER=openai  # yes, that's right - for local llm we still use openai client
+export OPENAI_BASE_URL=http://127.0.0.1:1234/v1  # your local llm endpoint
+export OPENAI_API_KEY=whatever  # random string would do, as we don't use it (unless your local LLM has authentication)
+export DEFAULT_MODEL_NAME=qwen2.5-7b  # your local llm model name
+```
+## OpenAI-Compatible Server API
+If you have a GUI client that supports OpenAI API (e.g. [CherryStudio](https://docs.cherry-ai.com/), [Chatbox](https://github.com/Bin-Huang/chatbox)) , you can simply config it to use this server.
+![demo1](.github/visuals/demo6.gif)
+Start the server:
+```bash
+# Without authentication
+npm run serve
+# With authentication (clients must provide this secret as Bearer token)
+npm run serve --secret=your_secret_token
+```
+The server will start on http://localhost:3000 with the following endpoint:
+### POST /v1/chat/completions
+```bash
+# Without authentication
+curl http://localhost:3000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "jina-deepsearch-v1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Hello!"
+      }
+    ]
+  }'
+# With authentication (when server is started with --secret)
+curl http://localhost:3000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your_secret_token" \
+  -d '{
+    "model": "jina-deepsearch-v1",
+    "messages": [
+      {
+        "role": "user",
+        "content": "Hello!"
+      }
+    ],
+    "stream": true
+  }'
+```
+Response format:
+```json
+{
+  "id": "chatcmpl-123",
+  "object": "chat.completion",
+  "created": 1677652288,
+  "model": "jina-deepsearch-v1",
+  "system_fingerprint": "fp_44709d6fcb",
+  "choices": [{
+    "index": 0,
+    "message": {
+      "role": "assistant",
+      "content": "YOUR FINAL ANSWER"
+    },
+    "logprobs": null,
+    "finish_reason": "stop"
+  }],
+  "usage": {
+    "prompt_tokens": 9,
+    "completion_tokens": 12,
+    "total_tokens": 21
+  }
+}
+```
+For streaming responses (stream: true), the server sends chunks in this format:
+```json
+{
+  "id": "chatcmpl-123",
+  "object": "chat.completion.chunk",
+  "created": 1694268190,
+  "model": "jina-deepsearch-v1",
+  "system_fingerprint": "fp_44709d6fcb",
+  "choices": [{
+    "index": 0,
+    "delta": {
+      "content": "..."
+    },
+    "logprobs": null,
+    "finish_reason": null
+  }]
+}
+```
+Note: The think content in streaming responses is wrapped in XML tags:
+```
+<think>
+[thinking steps...]
+</think>
+[final answer]
+```
+## Docker Setup
+### Build Docker Image
+To build the Docker image for the application, run the following command:
+```bash
+docker build -t deepresearch:latest .
+```
+### Run Docker Container
+To run the Docker container, use the following command:
+```bash
+docker run -p 3000:3000 --env GEMINI_API_KEY=your_gemini_api_key --env JINA_API_KEY=your_jina_api_key deepresearch:latest
+```
+### Docker Compose
+You can also use Docker Compose to manage multi-container applications. To start the application with Docker Compose, run:
+```bash
+docker-compose up
+```
+## How Does it Work?
+Not sure a flowchart helps, but here it is:
+```mermaid
+flowchart TD
+    Start([Start]) --> Init[Initialize context & variables]
+    Init --> CheckBudget{Token budget<br/>exceeded?}
+    CheckBudget -->|No| GetQuestion[Get current question<br/>from gaps]
+    CheckBudget -->|Yes| BeastMode[Enter Beast Mode]
+    GetQuestion --> GenPrompt[Generate prompt]
+    GenPrompt --> ModelGen[Generate response<br/>using Gemini]
+    ModelGen --> ActionCheck{Check action<br/>type}
+    ActionCheck -->|answer| AnswerCheck{Is original<br/>question?}
+    AnswerCheck -->|Yes| EvalAnswer[Evaluate answer]
+    EvalAnswer --> IsGoodAnswer{Is answer<br/>definitive?}
+    IsGoodAnswer -->|Yes| HasRefs{Has<br/>references?}
+    HasRefs -->|Yes| End([End])
+    HasRefs -->|No| GetQuestion
+    IsGoodAnswer -->|No| StoreBad[Store bad attempt<br/>Reset context]
+    StoreBad --> GetQuestion
+    AnswerCheck -->|No| StoreKnowledge[Store as intermediate<br/>knowledge]
+    StoreKnowledge --> GetQuestion
+    ActionCheck -->|reflect| ProcessQuestions[Process new<br/>sub-questions]
+    ProcessQuestions --> DedupQuestions{New unique<br/>questions?}
+    DedupQuestions -->|Yes| AddGaps[Add to gaps queue]
+    DedupQuestions -->|No| DisableReflect[Disable reflect<br/>for next step]
+    AddGaps --> GetQuestion
+    DisableReflect --> GetQuestion
+    ActionCheck -->|search| SearchQuery[Execute search]
+    SearchQuery --> NewURLs{New URLs<br/>found?}
+    NewURLs -->|Yes| StoreURLs[Store URLs for<br/>future visits]
+    NewURLs -->|No| DisableSearch[Disable search<br/>for next step]
+    StoreURLs --> GetQuestion
+    DisableSearch --> GetQuestion
+    ActionCheck -->|visit| VisitURLs[Visit URLs]
+    VisitURLs --> NewContent{New content<br/>found?}
+    NewContent -->|Yes| StoreContent[Store content as<br/>knowledge]
+    NewContent -->|No| DisableVisit[Disable visit<br/>for next step]
+    StoreContent --> GetQuestion
+    DisableVisit --> GetQuestion
+    BeastMode --> FinalAnswer[Generate final answer] --> End
+```
+## Evaluation
+I kept the evaluation simple, LLM-as-a-judge and collect some [ego questions](./src/evals/ego-questions.json) for evaluation. These are the questions about Jina AI that I know 100% the answer but LLMs do not.
+I mainly look at 3 things: total steps, total tokens, and the correctness of the final answer.
+```bash
+npm run eval ./src/evals/questions.json
+```
+Here's the table comparing plain `gemini-2.0-flash` and `gemini-2.0-flash + node-deepresearch` on the ego set.
+Plain `gemini-2.0-flash` can be run by setting `tokenBudget` to zero, skipping the while-loop and directly answering the question.
+It should not be surprised that plain `gemini-2.0-flash` has a 0% pass rate, as I intentionally filtered out the questions that LLMs can answer.
+| Metric | gemini-2.0-flash | #188f1bb |
+|--------|------------------|----------|
+| Pass Rate | 0% | 75%      |
+| Average Steps | 1 | 4        |
+| Maximum Steps | 1 | 13       |
+| Minimum Steps | 1 | 2        |
+| Median Steps | 1 | 3        |
+| Average Tokens | 428 | 68,574   |
+| Median Tokens | 434 | 31,541   |
+| Maximum Tokens | 463 | 363,655  |
+| Minimum Tokens | 374 | 7,963    |

config.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "env": {
+    "https_proxy": "",
+    "OPENAI_BASE_URL": "",
+    "GEMINI_API_KEY": "",
+    "OPENAI_API_KEY": "",
+    "JINA_API_KEY": "",
+    "BRAVE_API_KEY": "",
+    "DEFAULT_MODEL_NAME": ""
+  },
+  "defaults": {
+    "search_provider": "jina",
+    "llm_provider": "gemini",
+    "step_sleep": 0
+  },
+  "providers": {
+    "gemini": {
+      "createClient": "createGoogleGenerativeAI"
+    },
+    "openai": {
+      "createClient": "createOpenAI",
+      "clientConfig": {
+        "compatibility": "strict"
+      }
+    }
+  },
+  "models": {
+    "gemini": {
+      "default": {
+        "model": "gemini-2.0-flash",
+        "temperature": 0,
+        "maxTokens": 8000
+      },
+      "tools": {
+        "searchGrounding": { "temperature": 0 },
+        "dedup": { "temperature": 0.1 },
+        "evaluator": {},
+        "errorAnalyzer": {},
+        "queryRewriter": { "temperature": 0.1 },
+        "agent": { "temperature": 0.7 },
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
+      }
+    },
+    "openai": {
+      "default": {
+        "model": "gpt-4o-mini",
+        "temperature": 0,
+        "maxTokens": 8000
+      },
+      "tools": {
+        "searchGrounding": { "temperature": 0 },
+        "dedup": { "temperature": 0.1 },
+        "evaluator": {},
+        "errorAnalyzer": {},
+        "queryRewriter": { "temperature": 0.1 },
+        "agent": { "temperature": 0.7 },
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
+      }
+    }
+  }
+}

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,14 @@

+version: '3.8'
+services:
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    environment:
+      - GEMINI_API_KEY=${GEMINI_API_KEY}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - JINA_API_KEY=${JINA_API_KEY}
+      - BRAVE_API_KEY=${BRAVE_API_KEY}
+    ports:
+      - "3000:3000"

jest.config.js ADDED Viewed

	@@ -0,0 +1,6 @@

+module.exports = {
+  preset: 'ts-jest',
+  testEnvironment: 'node',
+  testMatch: ['**/__tests__/**/*.test.ts'],
+  setupFiles: ['<rootDir>/jest.setup.js'],
+};

jest.setup.js ADDED Viewed

	@@ -0,0 +1 @@


1	+ require('dotenv').config();

jina-ai/.dockerignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ node_modules

jina-ai/Dockerfile ADDED Viewed

	@@ -0,0 +1,50 @@

+# ---- BUILD STAGE ----
+FROM node:20-slim AS builder
+# Set working directory
+WORKDIR /app
+# Copy package.json and package-lock.json
+COPY ./package*.json ./
+COPY ./jina-ai/package*.json ./jina-ai/
+# Install dependencies
+RUN npm ci
+WORKDIR /app/jina-ai
+RUN npm ci
+WORKDIR /app
+# Copy application code
+COPY ./src ./src
+COPY ./tsconfig.json ./tsconfig.json
+COPY ./jina-ai/config.json ./
+RUN npm run build
+COPY ./jina-ai/src ./jina-ai/src
+COPY ./jina-ai/tsconfig.json ./jina-ai/tsconfig.json
+WORKDIR /app/jina-ai
+RUN npm run build
+# ---- PRODUCTION STAGE ----
+FROM node:20 AS production
+# Set working directory
+WORKDIR /app
+COPY --from=builder /app ./
+# Copy config.json and built files from builder
+WORKDIR /app/jina-ai
+# Set environment variables (Recommended to set at runtime, avoid hardcoding)
+ENV GEMINI_API_KEY=${GEMINI_API_KEY}
+ENV OPENAI_API_KEY=${OPENAI_API_KEY}
+ENV JINA_API_KEY=${JINA_API_KEY}
+ENV BRAVE_API_KEY=${BRAVE_API_KEY}
+# Expose the port the app runs on
+EXPOSE 3000
+# Set startup command
+CMD ["node", "./dist/server.js"]

jina-ai/config.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+  "env": {
+    "https_proxy": "",
+    "OPENAI_BASE_URL": "",
+    "GEMINI_API_KEY": "",
+    "OPENAI_API_KEY": "",
+    "JINA_API_KEY": "",
+    "BRAVE_API_KEY": "",
+    "DEFAULT_MODEL_NAME": ""
+  },
+  "defaults": {
+    "search_provider": "brave",
+    "llm_provider": "vertex",
+    "step_sleep": 0
+  },
+  "providers": {
+    "vertex": {
+      "createClient": "createGoogleVertex",
+      "clientConfig": {
+        "location": "us-central1"
+      }
+    },
+    "gemini": {
+      "createClient": "createGoogleGenerativeAI"
+    },
+    "openai": {
+      "createClient": "createOpenAI",
+      "clientConfig": {
+        "compatibility": "strict"
+      }
+    }
+  },
+  "models": {
+    "gemini": {
+      "default": {
+        "model": "gemini-2.0-flash",
+        "temperature": 0,
+        "maxTokens": 8000
+      },
+      "tools": {
+        "searchGrounding": { "temperature": 0 },
+        "dedup": { "temperature": 0.1 },
+        "evaluator": {},
+        "errorAnalyzer": {},
+        "queryRewriter": { "temperature": 0.1 },
+        "agent": { "temperature": 0.7 },
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
+      }
+    },
+    "openai": {
+      "default": {
+        "model": "gpt-4o-mini",
+        "temperature": 0,
+        "maxTokens": 8000
+      },
+      "tools": {
+        "searchGrounding": { "temperature": 0 },
+        "dedup": { "temperature": 0.1 },
+        "evaluator": {},
+        "errorAnalyzer": {},
+        "queryRewriter": { "temperature": 0.1 },
+        "agent": { "temperature": 0.7 },
+        "agentBeastMode": { "temperature": 0.7 },
+        "fallback": { "temperature": 0 }
+      }
+    }
+  }
+}

jina-ai/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

jina-ai/package.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "name": "@jina-ai/node-deepresearch",
+  "version": "1.0.0",
+  "main": "dist/app.js",
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE"
+  ],
+  "scripts": {
+    "build": "tsc",
+    "dev": "npx ts-node src/agent.ts",
+    "search": "npx ts-node src/test-duck.ts",
+    "rewrite": "npx ts-node src/tools/query-rewriter.ts",
+    "lint": "eslint . --ext .ts",
+    "lint:fix": "eslint . --ext .ts --fix",
+    "serve": "ts-node src/server.ts",
+    "eval": "ts-node src/evals/batch-evals.ts",
+    "test": "jest --testTimeout=30000",
+    "test:watch": "jest --watch"
+  },
+  "keywords": [],
+  "author": "Jina AI",
+  "license": "Apache-2.0",
+  "description": "",
+  "dependencies": {
+    "@ai-sdk/google-vertex": "^2.1.12",
+    "@google-cloud/firestore": "^7.11.0",
+    "civkit": "^0.8.3-15926cb",
+    "dayjs": "^1.11.13",
+    "lodash": "^4.17.21",
+    "reflect-metadata": "^0.2.2",
+    "tsyringe": "^4.8.0"
+  },
+  "devDependencies": {
+    "@types/lodash": "^4.17.15",
+    "pino-pretty": "^13.0.0"
+  }
+}

jina-ai/src/dto/jina-embeddings-auth.ts ADDED Viewed

	@@ -0,0 +1,347 @@

+import {
+    Also, AuthenticationFailedError, AuthenticationRequiredError,
+    DownstreamServiceFailureError, RPC_CALL_ENVIRONMENT,
+    ArrayOf, AutoCastable, Prop
+} from 'civkit/civ-rpc';
+import { parseJSONText } from 'civkit/vectorize';
+import { htmlEscape } from 'civkit/escape';
+import { marshalErrorLike } from 'civkit/lang';
+import type express from 'express';
+import logger from '../lib/logger';
+import { AsyncLocalContext } from '../lib/async-context';
+import { InjectProperty } from '../lib/registry';
+import { JinaEmbeddingsDashboardHTTP } from '../lib/billing';
+import envConfig from '../lib/env-config';
+import { FirestoreRecord } from '../lib/firestore';
+import _ from 'lodash';
+import { RateLimitDesc } from '../rate-limit';
+export class JinaWallet extends AutoCastable {
+    @Prop({
+        default: ''
+    })
+    user_id!: string;
+    @Prop({
+        default: 0
+    })
+    trial_balance!: number;
+    @Prop()
+    trial_start?: Date;
+    @Prop()
+    trial_end?: Date;
+    @Prop({
+        default: 0
+    })
+    regular_balance!: number;
+    @Prop({
+        default: 0
+    })
+    total_balance!: number;
+}
+export class JinaEmbeddingsTokenAccount extends FirestoreRecord {
+    static override collectionName = 'embeddingsTokenAccounts';
+    override _id!: string;
+    @Prop({
+        required: true
+    })
+    user_id!: string;
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    email?: string;
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    full_name?: string;
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    customer_id?: string;
+    @Prop({
+        nullable: true,
+        type: String,
+    })
+    avatar_url?: string;
+    // Not keeping sensitive info for now
+    // @Prop()
+    // billing_address?: object;
+    // @Prop()
+    // payment_method?: object;
+    @Prop({
+        required: true
+    })
+    wallet!: JinaWallet;
+    @Prop({
+        type: Object
+    })
+    metadata?: { [k: string]: any; };
+    @Prop({
+        defaultFactory: () => new Date()
+    })
+    lastSyncedAt!: Date;
+    @Prop({
+        dictOf: [ArrayOf(RateLimitDesc)]
+    })
+    customRateLimits?: { [k: string]: RateLimitDesc[]; };
+    static patchedFields = [
+    ];
+    static override from(input: any) {
+        for (const field of this.patchedFields) {
+            if (typeof input[field] === 'string') {
+                input[field] = parseJSONText(input[field]);
+            }
+        }
+        return super.from(input) as JinaEmbeddingsTokenAccount;
+    }
+    override degradeForFireStore() {
+        const copy: any = {
+            ...this,
+            wallet: { ...this.wallet },
+            // Firebase disability
+            customRateLimits: _.mapValues(this.customRateLimits, (v) => v.map((x) => ({ ...x }))),
+        };
+        for (const field of (this.constructor as typeof JinaEmbeddingsTokenAccount).patchedFields) {
+            if (typeof copy[field] === 'object') {
+                copy[field] = JSON.stringify(copy[field]) as any;
+            }
+        }
+        return copy;
+    }
+    [k: string]: any;
+}
+const authDtoLogger = logger.child({ service: 'JinaAuthDTO' });
+export interface FireBaseHTTPCtx {
+    req: express.Request,
+    res: express.Response,
+}
+const THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT = new JinaEmbeddingsDashboardHTTP(envConfig.JINA_EMBEDDINGS_DASHBOARD_API_KEY);
+@Also({
+    openapi: {
+        operation: {
+            parameters: {
+                'Authorization': {
+                    description: htmlEscape`Jina Token for authentication.\n\n` +
+                        htmlEscape`- Member of <JinaEmbeddingsAuthDTO>\n\n` +
+                        `- Authorization: Bearer {YOUR_JINA_TOKEN}`
+                    ,
+                    in: 'header',
+                    schema: {
+                        anyOf: [
+                            { type: 'string', format: 'token' }
+                        ]
+                    }
+                }
+            }
+        }
+    }
+})
+export class JinaEmbeddingsAuthDTO extends AutoCastable {
+    uid?: string;
+    bearerToken?: string;
+    user?: JinaEmbeddingsTokenAccount;
+    @InjectProperty(AsyncLocalContext)
+    ctxMgr!: AsyncLocalContext;
+    jinaEmbeddingsDashboard = THE_VERY_SAME_JINA_EMBEDDINGS_CLIENT;
+    static override from(input: any) {
+        const instance = super.from(input) as JinaEmbeddingsAuthDTO;
+        const ctx = input[RPC_CALL_ENVIRONMENT];
+        const req = (ctx.rawRequest || ctx.req) as express.Request | undefined;
+        if (req) {
+            const authorization = req.get('authorization');
+            if (authorization) {
+                const authToken = authorization.split(' ')[1] || authorization;
+                instance.bearerToken = authToken;
+            }
+        }
+        if (!instance.bearerToken && input._token) {
+            instance.bearerToken = input._token;
+        }
+        return instance;
+    }
+    async getBrief(ignoreCache?: boolean | string) {
+        if (!this.bearerToken) {
+            throw new AuthenticationRequiredError({
+                message: 'Jina API key is required to authenticate. Please get one from https://jina.ai'
+            });
+        }
+        let account;
+        try {
+            account = await JinaEmbeddingsTokenAccount.fromFirestore(this.bearerToken);
+        } catch (err) {
+            // FireStore would not accept any string as input and may throw if not happy with it
+            void 0;
+        }
+        const age = account?.lastSyncedAt ? Date.now() - account.lastSyncedAt.getTime() : Infinity;
+        if (account && !ignoreCache) {
+            if (account && age < 180_000) {
+                this.user = account;
+                this.uid = this.user?.user_id;
+                return account;
+            }
+        }
+        try {
+            const r = await this.jinaEmbeddingsDashboard.validateToken(this.bearerToken);
+            const brief = r.data;
+            const draftAccount = JinaEmbeddingsTokenAccount.from({
+                ...account, ...brief, _id: this.bearerToken,
+                lastSyncedAt: new Date()
+            });
+            await JinaEmbeddingsTokenAccount.save(draftAccount.degradeForFireStore(), undefined, { merge: true });
+            this.user = draftAccount;
+            this.uid = this.user?.user_id;
+            return draftAccount;
+        } catch (err: any) {
+            authDtoLogger.warn(`Failed to get user brief: ${err}`, { err: marshalErrorLike(err) });
+            if (err?.status === 401) {
+                throw new AuthenticationFailedError({
+                    message: 'Invalid API key, please get a new one from https://jina.ai'
+                });
+            }
+            if (account) {
+                this.user = account;
+                this.uid = this.user?.user_id;
+                return account;
+            }
+            throw new DownstreamServiceFailureError(`Failed to authenticate: ${err}`);
+        }
+    }
+    async reportUsage(tokenCount: number, mdl: string, endpoint: string = '/encode') {
+        const user = await this.assertUser();
+        const uid = user.user_id;
+        user.wallet.total_balance -= tokenCount;
+        return this.jinaEmbeddingsDashboard.reportUsage(this.bearerToken!, {
+            model_name: mdl,
+            api_endpoint: endpoint,
+            consumer: {
+                id: uid,
+                user_id: uid,
+            },
+            usage: {
+                total_tokens: tokenCount
+            },
+            labels: {
+                model_name: mdl
+            }
+        }).then((r) => {
+            JinaEmbeddingsTokenAccount.COLLECTION.doc(this.bearerToken!)
+                .update({ 'wallet.total_balance': JinaEmbeddingsTokenAccount.OPS.increment(-tokenCount) })
+                .catch((err) => {
+                    authDtoLogger.warn(`Failed to update cache for ${uid}: ${err}`, { err: marshalErrorLike(err) });
+                });
+            return r;
+        }).catch((err) => {
+            user.wallet.total_balance += tokenCount;
+            authDtoLogger.warn(`Failed to report usage for ${uid}: ${err}`, { err: marshalErrorLike(err) });
+        });
+    }
+    async solveUID() {
+        if (this.uid) {
+            this.ctxMgr.set('uid', this.uid);
+            return this.uid;
+        }
+        if (this.bearerToken) {
+            await this.getBrief();
+            this.ctxMgr.set('uid', this.uid);
+            return this.uid;
+        }
+        return undefined;
+    }
+    async assertUID() {
+        const uid = await this.solveUID();
+        if (!uid) {
+            throw new AuthenticationRequiredError('Authentication failed');
+        }
+        return uid;
+    }
+    async assertUser() {
+        if (this.user) {
+            return this.user;
+        }
+        await this.getBrief();
+        return this.user!;
+    }
+    getRateLimits(...tags: string[]) {
+        const descs = tags.map((x) => this.user?.customRateLimits?.[x] || []).flat().filter((x) => x.isEffective());
+        if (descs.length) {
+            return descs;
+        }
+        return undefined;
+    }
+}

jina-ai/src/lib/async-context.ts ADDED Viewed

	@@ -0,0 +1,9 @@

+import { GlobalAsyncContext } from 'civkit/async-context';
+import { container, singleton } from 'tsyringe';
+@singleton()
+export class AsyncLocalContext extends GlobalAsyncContext {}
+const instance = container.resolve(AsyncLocalContext);
+Reflect.set(process, 'asyncLocalContext', instance);
+export default instance;

jina-ai/src/lib/billing.ts ADDED Viewed

	@@ -0,0 +1,102 @@

+import { HTTPService } from 'civkit';
+import _ from 'lodash';
+export interface JinaWallet {
+    trial_balance: number;
+    trial_start: Date;
+    trial_end: Date;
+    regular_balance: number;
+    total_balance: number;
+}
+export interface JinaUserBrief {
+    user_id: string;
+    email: string | null;
+    full_name: string | null;
+    customer_id: string | null;
+    avatar_url?: string;
+    billing_address: Partial<{
+        address: string;
+        city: string;
+        state: string;
+        country: string;
+        postal_code: string;
+    }>;
+    payment_method: Partial<{
+        brand: string;
+        last4: string;
+        exp_month: number;
+        exp_year: number;
+    }>;
+    wallet: JinaWallet;
+    metadata: {
+        [k: string]: any;
+    };
+}
+export interface JinaUsageReport {
+    model_name: string;
+    api_endpoint: string;
+    consumer: {
+        user_id: string;
+        customer_plan?: string;
+        [k: string]: any;
+    };
+    usage: {
+        total_tokens: number;
+    };
+    labels: {
+        user_type?: string;
+        model_name?: string;
+        [k: string]: any;
+    };
+}
+export class JinaEmbeddingsDashboardHTTP extends HTTPService {
+    name = 'JinaEmbeddingsDashboardHTTP';
+    constructor(
+        public apiKey: string,
+        public baseUri: string = 'https://embeddings-dashboard-api.jina.ai/api'
+    ) {
+        super(baseUri);
+        this.baseOptions.timeout = 30_000;  // 30 sec
+    }
+    async authorization(token: string) {
+        const r = await this.get<JinaUserBrief>('/v1/authorization', {
+            headers: {
+                Authorization: `Bearer ${token}`
+            },
+            responseType: 'json',
+        });
+        return r;
+    }
+    async validateToken(token: string) {
+        const r = await this.getWithSearchParams<JinaUserBrief>('/v1/api_key/user', {
+            api_key: token,
+        }, {
+            responseType: 'json',
+        });
+        return r;
+    }
+    async reportUsage(token: string, query: JinaUsageReport) {
+        const r = await this.postJson('/v1/usage', query, {
+            headers: {
+                Authorization: `Bearer ${token}`,
+                'x-api-key': this.apiKey,
+            },
+            responseType: 'text',
+        });
+        return r;
+    }
+}

jina-ai/src/lib/env-config.ts ADDED Viewed

	@@ -0,0 +1,59 @@

+import { container, singleton } from 'tsyringe';
+export const SPECIAL_COMBINED_ENV_KEY = 'ENV_COMBINED';
+const CONF_ENV = [
+    'OPENAI_API_KEY',
+    'ANTHROPIC_API_KEY',
+    'REPLICATE_API_KEY',
+    'GOOGLE_AI_STUDIO_API_KEY',
+    'JINA_EMBEDDINGS_API_KEY',
+    'JINA_EMBEDDINGS_DASHBOARD_API_KEY',
+    'BRAVE_SEARCH_API_KEY',
+] as const;
+@singleton()
+export class EnvConfig {
+    dynamic!: Record<string, string>;
+    combined: Record<string, string> = {};
+    originalEnv: Record<string, string | undefined> = { ...process.env };
+    constructor() {
+        if (process.env[SPECIAL_COMBINED_ENV_KEY]) {
+            Object.assign(this.combined, JSON.parse(
+                Buffer.from(process.env[SPECIAL_COMBINED_ENV_KEY]!, 'base64').toString('utf-8')
+            ));
+            delete process.env[SPECIAL_COMBINED_ENV_KEY];
+        }
+        // Static config
+        for (const x of CONF_ENV) {
+            const s = this.combined[x] || process.env[x] || '';
+            Reflect.set(this, x, s);
+            if (x in process.env) {
+                delete process.env[x];
+            }
+        }
+        // Dynamic config
+        this.dynamic = new Proxy({
+            get: (_target: any, prop: string) => {
+                return this.combined[prop] || process.env[prop] || '';
+            }
+        }, {}) as any;
+    }
+}
+// eslint-disable-next-line @typescript-eslint/no-empty-interface
+export interface EnvConfig extends Record<typeof CONF_ENV[number], string> { }
+const instance = container.resolve(EnvConfig);
+export default instance;

jina-ai/src/lib/errors.ts ADDED Viewed

	@@ -0,0 +1,70 @@

+import { ApplicationError, Prop, RPC_TRANSFER_PROTOCOL_META_SYMBOL, StatusCode } from 'civkit';
+import _ from 'lodash';
+import dayjs from 'dayjs';
+import utc from 'dayjs/plugin/utc';
+dayjs.extend(utc);
+@StatusCode(50301)
+export class ServiceDisabledError extends ApplicationError { }
+@StatusCode(50302)
+export class ServiceCrashedError extends ApplicationError { }
+@StatusCode(50303)
+export class ServiceNodeResourceDrainError extends ApplicationError { }
+@StatusCode(40104)
+export class EmailUnverifiedError extends ApplicationError { }
+@StatusCode(40201)
+export class InsufficientCreditsError extends ApplicationError { }
+@StatusCode(40202)
+export class FreeFeatureLimitError extends ApplicationError { }
+@StatusCode(40203)
+export class InsufficientBalanceError extends ApplicationError { }
+@StatusCode(40903)
+export class LockConflictError extends ApplicationError { }
+@StatusCode(40904)
+export class BudgetExceededError extends ApplicationError { }
+@StatusCode(45101)
+export class HarmfulContentError extends ApplicationError { }
+@StatusCode(45102)
+export class SecurityCompromiseError extends ApplicationError { }
+@StatusCode(41201)
+export class BatchSizeTooLargeError extends ApplicationError { }
+@StatusCode(42903)
+export class RateLimitTriggeredError extends ApplicationError {
+    @Prop({
+        desc: 'Retry after seconds',
+    })
+    retryAfter?: number;
+    @Prop({
+        desc: 'Retry after date',
+    })
+    retryAfterDate?: Date;
+    protected override get [RPC_TRANSFER_PROTOCOL_META_SYMBOL]() {
+        const retryAfter = this.retryAfter || this.retryAfterDate;
+        if (!retryAfter) {
+            return super[RPC_TRANSFER_PROTOCOL_META_SYMBOL];
+        }
+        return _.merge(_.cloneDeep(super[RPC_TRANSFER_PROTOCOL_META_SYMBOL]), {
+            headers: {
+                'Retry-After': `${retryAfter instanceof Date ? dayjs(retryAfter).utc().format('ddd, DD MMM YYYY HH:mm:ss [GMT]') : retryAfter}`,
+            }
+        });
+    }
+}

jina-ai/src/lib/firestore.ts ADDED Viewed

	@@ -0,0 +1,223 @@

+import _ from 'lodash';
+import { AutoCastable, Prop, RPC_MARSHAL } from 'civkit/civ-rpc';
+import {
+    Firestore, FieldValue, DocumentReference,
+    Query, Timestamp, SetOptions, DocumentSnapshot,
+} from '@google-cloud/firestore';
+// Firestore doesn't support JavaScript objects with custom prototypes (i.e. objects that were created via the \"new\" operator)
+function patchFireStoreArrogance(func: Function) {
+    return function (this: unknown) {
+        const origObjectGetPrototype = Object.getPrototypeOf;
+        Object.getPrototypeOf = function (x) {
+            const r = origObjectGetPrototype.call(this, x);
+            if (!r) {
+                return r;
+            }
+            return Object.prototype;
+        };
+        try {
+            return func.call(this, ...arguments);
+        } finally {
+            Object.getPrototypeOf = origObjectGetPrototype;
+        }
+    };
+}
+Reflect.set(DocumentReference.prototype, 'set', patchFireStoreArrogance(Reflect.get(DocumentReference.prototype, 'set')));
+Reflect.set(DocumentSnapshot, 'fromObject', patchFireStoreArrogance(Reflect.get(DocumentSnapshot, 'fromObject')));
+function mapValuesDeep(v: any, fn: (i: any) => any): any {
+    if (_.isPlainObject(v)) {
+        return _.mapValues(v, (i) => mapValuesDeep(i, fn));
+    } else if (_.isArray(v)) {
+        return v.map((i) => mapValuesDeep(i, fn));
+    } else {
+        return fn(v);
+    }
+}
+export type Constructor<T> = { new(...args: any[]): T; };
+export type Constructed<T> = T extends Partial<infer U> ? U : T extends object ? T : object;
+export function fromFirestore<T extends FirestoreRecord>(
+    this: Constructor<T>, id: string, overrideCollection?: string
+): Promise<T | undefined>;
+export async function fromFirestore(
+    this: any, id: string, overrideCollection?: string
+) {
+    const collection = overrideCollection || this.collectionName;
+    if (!collection) {
+        throw new Error(`Missing collection name to construct ${this.name}`);
+    }
+    const ref = this.DB.collection(overrideCollection || this.collectionName).doc(id);
+    const ptr = await ref.get();
+    if (!ptr.exists) {
+        return undefined;
+    }
+    const doc = this.from(
+        // Fixes non-native firebase types
+        mapValuesDeep(ptr.data(), (i: any) => {
+            if (i instanceof Timestamp) {
+                return i.toDate();
+            }
+            return i;
+        })
+    );
+    Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
+    Object.defineProperty(doc, '_id', { value: ptr.id, enumerable: true });
+    return doc;
+}
+export function fromFirestoreQuery<T extends FirestoreRecord>(
+    this: Constructor<T>, query: Query
+): Promise<T[]>;
+export async function fromFirestoreQuery(this: any, query: Query) {
+    const ptr = await query.get();
+    if (ptr.docs.length) {
+        return ptr.docs.map(doc => {
+            const r = this.from(
+                mapValuesDeep(doc.data(), (i: any) => {
+                    if (i instanceof Timestamp) {
+                        return i.toDate();
+                    }
+                    return i;
+                })
+            );
+            Object.defineProperty(r, '_ref', { value: doc.ref, enumerable: false });
+            Object.defineProperty(r, '_id', { value: doc.id, enumerable: true });
+            return r;
+        });
+    }
+    return [];
+}
+export function setToFirestore<T extends FirestoreRecord>(
+    this: Constructor<T>, doc: T, overrideCollection?: string, setOptions?: SetOptions
+): Promise<T>;
+export async function setToFirestore(
+    this: any, doc: any, overrideCollection?: string, setOptions?: SetOptions
+) {
+    let ref: DocumentReference<any> = doc._ref;
+    if (!ref) {
+        const collection = overrideCollection || this.collectionName;
+        if (!collection) {
+            throw new Error(`Missing collection name to construct ${this.name}`);
+        }
+        const predefinedId = doc._id || undefined;
+        const hdl = this.DB.collection(overrideCollection || this.collectionName);
+        ref = predefinedId ? hdl.doc(predefinedId) : hdl.doc();
+        Object.defineProperty(doc, '_ref', { value: ref, enumerable: false });
+        Object.defineProperty(doc, '_id', { value: ref.id, enumerable: true });
+    }
+    await ref.set(doc, { merge: true, ...setOptions });
+    return doc;
+}
+export function deleteQueryBatch<T extends FirestoreRecord>(
+    this: Constructor<T>, query: Query
+): Promise<T>;
+export async function deleteQueryBatch(this: any, query: Query) {
+    const snapshot = await query.get();
+    const batchSize = snapshot.size;
+    if (batchSize === 0) {
+        return;
+    }
+    // Delete documents in a batch
+    const batch = this.DB.batch();
+    snapshot.docs.forEach((doc) => {
+        batch.delete(doc.ref);
+    });
+    await batch.commit();
+    process.nextTick(() => {
+        this.deleteQueryBatch(query);
+    });
+};
+export function fromFirestoreDoc<T extends FirestoreRecord>(
+    this: Constructor<T>, snapshot: DocumentSnapshot,
+): T | undefined;
+export function fromFirestoreDoc(
+    this: any, snapshot: DocumentSnapshot,
+) {
+    const doc = this.from(
+        // Fixes non-native firebase types
+        mapValuesDeep(snapshot.data(), (i: any) => {
+            if (i instanceof Timestamp) {
+                return i.toDate();
+            }
+            return i;
+        })
+    );
+    Object.defineProperty(doc, '_ref', { value: snapshot.ref, enumerable: false });
+    Object.defineProperty(doc, '_id', { value: snapshot.id, enumerable: true });
+    return doc;
+}
+const defaultFireStore = new Firestore({
+    projectId: process.env.GCLOUD_PROJECT,
+});
+export class FirestoreRecord extends AutoCastable {
+    static collectionName?: string;
+    static OPS = FieldValue;
+    static DB = defaultFireStore;
+    static get COLLECTION() {
+        if (!this.collectionName) {
+            throw new Error('Not implemented');
+        }
+        return this.DB.collection(this.collectionName);
+    }
+    @Prop()
+    _id?: string;
+    _ref?: DocumentReference<Partial<Omit<this, '_ref' | '_id'>>>;
+    static fromFirestore = fromFirestore;
+    static fromFirestoreDoc = fromFirestoreDoc;
+    static fromFirestoreQuery = fromFirestoreQuery;
+    static save = setToFirestore;
+    static deleteQueryBatch = deleteQueryBatch;
+    [RPC_MARSHAL]() {
+        return {
+            ...this,
+            _id: this._id,
+            _ref: this._ref?.path
+        };
+    }
+    degradeForFireStore(): this {
+        return JSON.parse(JSON.stringify(this, function (k, v) {
+            if (k === '') {
+                return v;
+            }
+            if (typeof v === 'object' && v && (typeof v.degradeForFireStore === 'function')) {
+                return v.degradeForFireStore();
+            }
+            return v;
+        }));
+    }
+}

jina-ai/src/lib/logger.ts ADDED Viewed

	@@ -0,0 +1,56 @@

+import { AbstractPinoLogger } from 'civkit/pino-logger';
+import { singleton, container } from 'tsyringe';
+import { threadId } from 'node:worker_threads';
+import { getTraceCtx } from 'civkit/async-context';
+const levelToSeverityMap: { [k: string]: string | undefined; } = {
+    trace: 'DEFAULT',
+    debug: 'DEBUG',
+    info: 'INFO',
+    warn: 'WARNING',
+    error: 'ERROR',
+    fatal: 'CRITICAL',
+};
+@singleton()
+export class GlobalLogger extends AbstractPinoLogger {
+    loggerOptions = {
+        level: 'debug',
+        base: {
+            tid: threadId,
+        }
+    };
+    override init(): void {
+        if (process.env['NODE_ENV']?.startsWith('prod')) {
+            super.init(process.stdout);
+        } else {
+            const PinoPretty = require('pino-pretty').PinoPretty;
+            super.init(PinoPretty({
+                singleLine: true,
+                colorize: true,
+                messageFormat(log: any, messageKey: any) {
+                    return `${log['tid'] ? `[${log['tid']}]` : ''}[${log['service'] || 'ROOT'}] ${log[messageKey]}`;
+                },
+            }));
+        }
+        this.emit('ready');
+    }
+    override log(...args: any[]) {
+        const [levelObj, ...rest] = args;
+        const severity = levelToSeverityMap[levelObj?.level];
+        const traceCtx = getTraceCtx();
+        const patched: any= { ...levelObj, severity };
+        if (traceCtx?.traceId && process.env['GCLOUD_PROJECT']) {
+            patched['logging.googleapis.com/trace'] = `projects/${process.env['GCLOUD_PROJECT']}/traces/${traceCtx.traceId}`;
+        }
+        return super.log(patched, ...rest);
+    }
+}
+const instance = container.resolve(GlobalLogger);
+export default instance;

jina-ai/src/lib/registry.ts ADDED Viewed

	@@ -0,0 +1,4 @@

+import { container } from 'tsyringe';
+import { propertyInjectorFactory } from 'civkit/property-injector';
+export const InjectProperty = propertyInjectorFactory(container);

jina-ai/src/patch-express.ts ADDED Viewed

	@@ -0,0 +1,162 @@

+import { ApplicationError, Prop, RPC_CALL_ENVIRONMENT } from "civkit/civ-rpc";
+import { marshalErrorLike } from "civkit/lang";
+import { randomUUID } from "crypto";
+import { once } from "events";
+import type { NextFunction, Request, Response } from "express";
+import { JinaEmbeddingsAuthDTO } from "./dto/jina-embeddings-auth";
+import rateLimitControl, { API_CALL_STATUS, RateLimitDesc } from "./rate-limit";
+import asyncLocalContext from "./lib/async-context";
+import globalLogger from "./lib/logger";
+import { InsufficientBalanceError } from "./lib/errors";
+import { FirestoreRecord } from "./lib/firestore";
+import cors from "cors";
+globalLogger.serviceReady();
+const logger = globalLogger.child({ service: 'JinaAISaaSMiddleware' });
+const appName = 'DEEPRESEARCH';
+export class KnowledgeItem extends FirestoreRecord {
+    static override collectionName = 'knowledgeItems';
+    @Prop({
+        required: true
+    })
+    traceId!: string;
+    @Prop({
+        required: true
+    })
+    uid!: string;
+    @Prop({
+        default: ''
+    })
+    question!: string;
+    @Prop({
+        default: ''
+    })
+    answer!: string;
+    @Prop({
+        default: ''
+    })
+    type!: string;
+    @Prop({
+        arrayOf: Object,
+        default: []
+    })
+    references!: any[];
+    @Prop({
+        defaultFactory: () => new Date()
+    })
+    createdAt!: Date;
+    @Prop({
+        defaultFactory: () => new Date()
+    })
+    updatedAt!: Date;
+}
+const corsMiddleware = cors();
+export const jinaAiMiddleware = (req: Request, res: Response, next: NextFunction) => {
+    if (req.path === '/ping') {
+        res.status(200).end('pone');
+        return;
+    }
+    if (req.path.startsWith('/v1/models')) {
+        next();
+        return;
+    }
+    if (req.method !== 'POST' && req.method !== 'GET') {
+        next();
+        return;
+    }
+    asyncLocalContext.run(async () => {
+        const googleTraceId = req.get('x-cloud-trace-context')?.split('/')?.[0];
+        const ctx = asyncLocalContext.ctx;
+        ctx.traceId = req.get('x-request-id') || req.get('request-id') || googleTraceId || randomUUID();
+        ctx.traceT0 = new Date();
+        ctx.ip = req?.ip;
+        try {
+            const authDto = JinaEmbeddingsAuthDTO.from({
+                [RPC_CALL_ENVIRONMENT]: { req, res }
+            });
+            const user = await authDto.assertUser();
+            const uid = await authDto.assertUID();
+            if (!(user.wallet.total_balance > 0)) {
+                throw new InsufficientBalanceError(`Account balance not enough to run this query, please recharge.`);
+            }
+            await rateLimitControl.serviceReady();
+            const rateLimitPolicy = authDto.getRateLimits(appName) || [
+                parseInt(user.metadata?.speed_level) >= 2 ?
+                    RateLimitDesc.from({
+                        occurrence: 30,
+                        periodSeconds: 60
+                    }) :
+                    RateLimitDesc.from({
+                        occurrence: 10,
+                        periodSeconds: 60
+                    })
+            ];
+            const criterions = rateLimitPolicy.map((c) => rateLimitControl.rateLimitDescToCriterion(c));
+            await Promise.all(criterions.map(([pointInTime, n]) => rateLimitControl.assertUidPeriodicLimit(uid, pointInTime, n, appName)));
+            const apiRoll = rateLimitControl.record({ uid, tags: [appName] })
+            apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
+            const pResClose = once(res, 'close');
+            next();
+            await pResClose;
+            const chargeAmount = ctx.chargeAmount;
+            if (chargeAmount) {
+                authDto.reportUsage(chargeAmount, `reader-${appName}`).catch((err) => {
+                    logger.warn(`Unable to report usage for ${uid}`, { err: marshalErrorLike(err) });
+                });
+                apiRoll.chargeAmount = chargeAmount;
+            }
+            apiRoll.status = res.statusCode === 200 ? API_CALL_STATUS.SUCCESS : API_CALL_STATUS.ERROR;
+            apiRoll.save().catch((err) => logger.warn(`Failed to save rate limit record`, { err: marshalErrorLike(err) }));
+            logger.info(`HTTP ${res.statusCode} for request ${ctx.traceId} after ${Date.now() - ctx.traceT0.valueOf()}ms`, {
+                uid,
+                chargeAmount,
+            });
+            if (ctx.promptContext?.knowledge?.length) {
+                Promise.all(ctx.promptContext.knowledge.map((x: any) => KnowledgeItem.save(
+                    KnowledgeItem.from({
+                        ...x,
+                        uid,
+                        traceId: ctx.traceId,
+                    })
+                ))).catch((err: any) => {
+                    logger.warn(`Failed to save knowledge`, { err: marshalErrorLike(err) });
+                });
+            }
+        } catch (err: any) {
+            if (!res.headersSent) {
+                corsMiddleware(req, res, () => 'noop');
+                if (err instanceof ApplicationError) {
+                    res.status(parseInt(err.code as string) || 500).json({ error: err.message });
+                    return;
+                }
+                res.status(500).json({ error: 'Internal' });
+            }
+            logger.error(`Error in billing middleware`, { err: marshalErrorLike(err) });
+            if (err.stack) {
+                logger.error(err.stack);
+            }
+        }
+    });
+}

jina-ai/src/rate-limit.ts ADDED Viewed

	@@ -0,0 +1,278 @@

+import { AutoCastable, ResourcePolicyDenyError, Also, Prop } from 'civkit/civ-rpc';
+import { AsyncService } from 'civkit/async-service';
+import { getTraceId } from 'civkit/async-context';
+import { singleton, container } from 'tsyringe';
+import { RateLimitTriggeredError } from './lib/errors';
+import { FirestoreRecord } from './lib/firestore';
+import { GlobalLogger } from './lib/logger';
+export enum API_CALL_STATUS {
+    SUCCESS = 'success',
+    ERROR = 'error',
+    PENDING = 'pending',
+}
+@Also({ dictOf: Object })
+export class APICall extends FirestoreRecord {
+    static override collectionName = 'apiRoll';
+    @Prop({
+        required: true,
+        defaultFactory: () => getTraceId()
+    })
+    traceId!: string;
+    @Prop()
+    uid?: string;
+    @Prop()
+    ip?: string;
+    @Prop({
+        arrayOf: String,
+        default: [],
+    })
+    tags!: string[];
+    @Prop({
+        required: true,
+        defaultFactory: () => new Date(),
+    })
+    createdAt!: Date;
+    @Prop()
+    completedAt?: Date;
+    @Prop({
+        required: true,
+        default: API_CALL_STATUS.PENDING,
+    })
+    status!: API_CALL_STATUS;
+    @Prop({
+        required: true,
+        defaultFactory: () => new Date(Date.now() + 1000 * 60 * 60 * 24 * 90),
+    })
+    expireAt!: Date;
+    [k: string]: any;
+    tag(...tags: string[]) {
+        for (const t of tags) {
+            if (!this.tags.includes(t)) {
+                this.tags.push(t);
+            }
+        }
+    }
+    save() {
+        return (this.constructor as typeof APICall).save(this);
+    }
+}
+export class RateLimitDesc extends AutoCastable {
+    @Prop({
+        default: 1000
+    })
+    occurrence!: number;
+    @Prop({
+        default: 3600
+    })
+    periodSeconds!: number;
+    @Prop()
+    notBefore?: Date;
+    @Prop()
+    notAfter?: Date;
+    isEffective() {
+        const now = new Date();
+        if (this.notBefore && this.notBefore > now) {
+            return false;
+        }
+        if (this.notAfter && this.notAfter < now) {
+            return false;
+        }
+        return true;
+    }
+}
+@singleton()
+export class RateLimitControl extends AsyncService {
+    logger = this.globalLogger.child({ service: this.constructor.name });
+    constructor(
+        protected globalLogger: GlobalLogger,
+    ) {
+        super(...arguments);
+    }
+    override async init() {
+        await this.dependencyReady();
+        this.emit('ready');
+    }
+    async queryByUid(uid: string, pointInTime: Date, ...tags: string[]) {
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('uid', '==', uid);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+        return APICall.fromFirestoreQuery(q);
+    }
+    async queryByIp(ip: string, pointInTime: Date, ...tags: string[]) {
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('ip', '==', ip);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+        return APICall.fromFirestoreQuery(q);
+    }
+    async assertUidPeriodicLimit(uid: string, pointInTime: Date, limit: number, ...tags: string[]) {
+        if (limit <= 0) {
+            throw new ResourcePolicyDenyError(`This UID(${uid}) is not allowed to call this endpoint (rate limit quota is 0).`);
+        }
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('uid', '==', uid);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+        const count = (await q.count().get()).data().count;
+        if (count >= limit) {
+            const r = await APICall.fromFirestoreQuery(q.limit(1));
+            const [r1] = r;
+            const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
+            const dtSec = Math.ceil(dtMs / 1000);
+            throw RateLimitTriggeredError.from({
+                message: `Per UID rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
+                retryAfter: dtSec,
+            });
+        }
+        return count + 1;
+    }
+    async assertIPPeriodicLimit(ip: string, pointInTime: Date, limit: number, ...tags: string[]) {
+        let q = APICall.COLLECTION
+            .orderBy('createdAt', 'asc')
+            .where('createdAt', '>=', pointInTime)
+            .where('status', 'in', [API_CALL_STATUS.SUCCESS, API_CALL_STATUS.PENDING])
+            .where('ip', '==', ip);
+        if (tags.length) {
+            q = q.where('tags', 'array-contains-any', tags);
+        }
+        const count = (await q.count().get()).data().count;
+        if (count >= limit) {
+            const r = await APICall.fromFirestoreQuery(q.limit(1));
+            const [r1] = r;
+            const dtMs = Math.abs(r1.createdAt?.valueOf() - pointInTime.valueOf());
+            const dtSec = Math.ceil(dtMs / 1000);
+            throw RateLimitTriggeredError.from({
+                message: `Per IP rate limit exceeded (${tags.join(',') || 'called'} ${limit} times since ${pointInTime})`,
+                retryAfter: dtSec,
+            });
+        }
+        return count + 1;
+    }
+    record(partialRecord: Partial<APICall>) {
+        const record = APICall.from(partialRecord);
+        const newId = APICall.COLLECTION.doc().id;
+        record._id = newId;
+        return record;
+    }
+    // async simpleRPCUidBasedLimit(rpcReflect: RPCReflection, uid: string, tags: string[] = [],
+    //     ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
+    //     const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
+    //     await Promise.all(criterion.map(([pointInTime, n]) =>
+    //         this.assertUidPeriodicLimit(uid, pointInTime, n, ...tags)));
+    //     const r = this.record({
+    //         uid,
+    //         tags,
+    //     });
+    //     r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     rpcReflect.then(() => {
+    //         r.status = API_CALL_STATUS.SUCCESS;
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+    //     rpcReflect.catch((err) => {
+    //         r.status = API_CALL_STATUS.ERROR;
+    //         r.error = err.toString();
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+    //     return r;
+    // }
+    rateLimitDescToCriterion(rateLimitDesc: RateLimitDesc) {
+        return [new Date(Date.now() - rateLimitDesc.periodSeconds * 1000), rateLimitDesc.occurrence] as [Date, number];
+    }
+    // async simpleRpcIPBasedLimit(rpcReflect: RPCReflection, ip: string, tags: string[] = [],
+    //     ...inputCriterion: RateLimitDesc[] | [Date, number][]) {
+    //     const criterion = inputCriterion.map((c) => { return Array.isArray(c) ? c : this.rateLimitDescToCriterion(c); });
+    //     await Promise.all(criterion.map(([pointInTime, n]) =>
+    //         this.assertIPPeriodicLimit(ip, pointInTime, n, ...tags)));
+    //     const r = this.record({
+    //         ip,
+    //         tags,
+    //     });
+    //     r.save().catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     rpcReflect.then(() => {
+    //         r.status = API_CALL_STATUS.SUCCESS;
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+    //     rpcReflect.catch((err) => {
+    //         r.status = API_CALL_STATUS.ERROR;
+    //         r.error = err.toString();
+    //         r.save()
+    //             .catch((err) => this.logger.warn(`Failed to save rate limit record`, { err }));
+    //     });
+    //     return r;
+    // }
+}
+const instance = container.resolve(RateLimitControl);
+export default instance;

jina-ai/src/server.ts ADDED Viewed

	@@ -0,0 +1,56 @@

+import 'reflect-metadata'
+import express from 'express';
+import { jinaAiMiddleware } from "./patch-express";
+import { Server } from 'http';
+const app = require('../..').default;
+const rootApp = express();
+rootApp.use(jinaAiMiddleware, app);
+const port = process.env.PORT || 3000;
+let server: Server | undefined;
+// Export server startup function for better testing
+export function startServer() {
+  return rootApp.listen(port, () => {
+    console.log(`Server running at http://localhost:${port}`);
+  });
+}
+// Start server if running directly
+if (process.env.NODE_ENV !== 'test') {
+  server = startServer();
+}
+process.on('unhandledRejection', (_err) => `Is false alarm`);
+process.on('uncaughtException', (err) => {
+  console.log('Uncaught exception', err);
+  // Looks like Firebase runtime does not handle error properly.
+  // Make sure to quit the process.
+  process.nextTick(() => process.exit(1));
+  console.error('Uncaught exception, process quit.');
+  throw err;
+});
+const sigHandler = (signal: string) => {
+  console.log(`Received ${signal}, exiting...`);
+  if (server && server.listening) {
+    console.log(`Shutting down gracefully...`);
+    console.log(`Waiting for the server to drain and close...`);
+    server.close((err) => {
+      if (err) {
+        console.error('Error while closing server', err);
+        return;
+      }
+      process.exit(0);
+    });
+    server.closeIdleConnections();
+  }
+}
+process.on('SIGTERM', sigHandler);
+process.on('SIGINT', sigHandler);

jina-ai/tsconfig.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "compilerOptions": {
+      "target": "ES2020",
+      "module": "node16",
+      "outDir": "./dist",
+      "rootDir": "./src",
+      "sourceMap": true,
+      "esModuleInterop": true,
+      "skipLibCheck": true,
+      "forceConsistentCasingInFileNames": true,
+      "strict": true,
+      "experimentalDecorators": true,
+      "emitDecoratorMetadata": true,
+      "resolveJsonModule": true
+    }
+  }

package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

package.json ADDED Viewed

	@@ -0,0 +1,63 @@

+{
+  "name": "node-deepresearch",
+  "version": "1.0.0",
+  "main": "dist/app.js",
+  "files": [
+    "dist",
+    "README.md",
+    "LICENSE"
+  ],
+  "scripts": {
+    "build": "tsc",
+    "dev": "npx ts-node src/agent.ts",
+    "search": "npx ts-node src/test-duck.ts",
+    "rewrite": "npx ts-node src/tools/query-rewriter.ts",
+    "lint": "eslint . --ext .ts",
+    "lint:fix": "eslint . --ext .ts --fix",
+    "serve": "ts-node src/server.ts",
+    "start": "ts-node src/server.ts",
+    "eval": "ts-node src/evals/batch-evals.ts",
+    "test": "jest --testTimeout=30000",
+    "test:watch": "jest --watch",
+    "test:docker": "jest src/__tests__/docker.test.ts --testTimeout=300000"
+  },
+  "keywords": [],
+  "author": "Jina AI",
+  "license": "Apache-2.0",
+  "description": "",
+  "dependencies": {
+    "@ai-sdk/google": "^1.0.0",
+    "@ai-sdk/openai": "^1.1.9",
+    "ai": "^4.1.26",
+    "axios": "^1.7.9",
+    "commander": "^13.1.0",
+    "cors": "^2.8.5",
+    "dotenv": "^16.4.7",
+    "duck-duck-scrape": "^2.2.7",
+    "express": "^4.21.2",
+    "node-fetch": "^3.3.2",
+    "undici": "^7.3.0",
+    "zod": "^3.22.4",
+    "zod-to-json-schema": "^3.24.1"
+  },
+  "devDependencies": {
+    "@types/commander": "^2.12.0",
+    "@types/cors": "^2.8.17",
+    "@types/express": "^5.0.0",
+    "@types/jest": "^29.5.14",
+    "@types/node": "^22.10.10",
+    "@types/node-fetch": "^2.6.12",
+    "@types/supertest": "^6.0.2",
+    "@typescript-eslint/eslint-plugin": "^7.0.1",
+    "@typescript-eslint/parser": "^7.0.1",
+    "eslint": "^8.56.0",
+    "jest": "^29.7.0",
+    "supertest": "^7.0.0",
+    "ts-jest": "^29.2.5",
+    "ts-node": "^10.9.2",
+    "typescript": "^5.7.3"
+  },
+  "optionalDependencies": {
+    "@ai-sdk/google-vertex": "^2.1.12"
+  }
+}

src/__tests__/agent.test.ts ADDED Viewed

	@@ -0,0 +1,51 @@

+import { getResponse } from '../agent';
+import { generateObject } from 'ai';
+import { search } from '../tools/jina-search';
+import { readUrl } from '../tools/read';
+// Mock external dependencies
+jest.mock('ai', () => ({
+  generateObject: jest.fn()
+}));
+jest.mock('../tools/jina-search', () => ({
+  search: jest.fn()
+}));
+jest.mock('../tools/read', () => ({
+  readUrl: jest.fn()
+}));
+describe('getResponse', () => {
+  beforeEach(() => {
+    // Mock generateObject to return a valid response
+    (generateObject as jest.Mock).mockResolvedValue({
+      object: { action: 'answer', answer: 'mocked response', references: [], think: 'mocked thought' },
+      usage: { totalTokens: 100 }
+    });
+    // Mock search to return empty results
+    (search as jest.Mock).mockResolvedValue({
+      response: { data: [] }
+    });
+    // Mock readUrl to return empty content
+    (readUrl as jest.Mock).mockResolvedValue({
+      response: { data: { content: '', url: 'test-url' } },
+      tokens: 0
+    });
+  });
+  afterEach(() => {
+    jest.useRealTimers();
+    jest.clearAllMocks();
+  });
+  it('should handle search action', async () => {
+    const result = await getResponse('What is TypeScript?', 50000); // Increased token budget to handle real-world usage
+    expect(result.result.action).toBeDefined();
+    expect(result.context).toBeDefined();
+    expect(result.context.tokenTracker).toBeDefined();
+    expect(result.context.actionTracker).toBeDefined();
+  }, 30000);
+});

src/__tests__/docker.test.ts ADDED Viewed

	@@ -0,0 +1,41 @@

+import { exec } from 'child_process';
+import { promisify } from 'util';
+const execAsync = promisify(exec);
+describe('Docker build', () => {
+  jest.setTimeout(300000); // 5 minutes for build
+  it('should build Docker image successfully', async () => {
+    const { stderr } = await execAsync('docker build -t node-deepresearch-test .');
+    expect(stderr).not.toContain('error');
+  });
+  it('should start container and respond to health check', async () => {
+    // Start container with mock API keys
+    await execAsync(
+      'docker run -d --name test-container -p 3001:3000 ' +
+      '-e GEMINI_API_KEY=mock_key ' +
+      '-e JINA_API_KEY=mock_key ' +
+      'node-deepresearch-test'
+    );
+    // Wait for container to start
+    await new Promise(resolve => setTimeout(resolve, 5000));
+    try {
+      // Check if server responds
+      const { stdout } = await execAsync('curl -s http://localhost:3001/health');
+      expect(stdout).toContain('ok');
+    } finally {
+      // Cleanup
+      await execAsync('docker rm -f test-container').catch(console.error);
+    }
+  });
+  afterAll(async () => {
+    // Clean up any leftover containers
+    await execAsync('docker rm -f test-container').catch(() => {});
+    await execAsync('docker rmi node-deepresearch-test').catch(() => {});
+  });
+});

src/__tests__/server.test.ts ADDED Viewed

	@@ -0,0 +1,300 @@

+import request from 'supertest';
+import { EventEmitter } from 'events';
+import type { Express } from 'express';
+const TEST_SECRET = 'test-secret';
+let app: Express;
+describe('/v1/chat/completions', () => {
+  jest.setTimeout(120000); // Increase timeout for all tests in this suite
+  beforeEach(async () => {
+    // Set up test environment
+    process.env.NODE_ENV = 'test';
+    process.env.LLM_PROVIDER = 'openai'; // Use OpenAI provider for tests
+    process.env.OPENAI_API_KEY = 'test-key';
+    process.env.JINA_API_KEY = 'test-key';
+    // Clean up any existing secret
+    const existingSecretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
+    if (existingSecretIndex !== -1) {
+      process.argv.splice(existingSecretIndex, 1);
+    }
+    // Set up test secret and import server module
+    process.argv.push(`--secret=${TEST_SECRET}`);
+    // Import server module (jest.resetModules() is called automatically before each test)
+    const { default: serverModule } = await require('../app');
+    app = serverModule;
+  });
+  afterEach(async () => {
+    // Clean up environment variables
+    delete process.env.OPENAI_API_KEY;
+    delete process.env.JINA_API_KEY;
+    // Clean up any remaining event listeners
+    const emitter = EventEmitter.prototype;
+    emitter.removeAllListeners();
+    emitter.setMaxListeners(emitter.getMaxListeners() + 1);
+    // Clean up test secret
+    const secretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
+    if (secretIndex !== -1) {
+      process.argv.splice(secretIndex, 1);
+    }
+    // Wait for any pending promises to settle
+    await new Promise(resolve => setTimeout(resolve, 500));
+    // Reset module cache to ensure clean state
+    jest.resetModules();
+  });
+  it('should require authentication when secret is set', async () => {
+    // Note: secret is already set in beforeEach
+    const response = await request(app)
+      .post('/v1/chat/completions')
+      .send({
+        model: 'test-model',
+        messages: [{ role: 'user', content: 'test' }]
+      });
+    expect(response.status).toBe(401);
+  });
+  it('should allow requests without auth when no secret is set', async () => {
+    // Remove secret for this test
+    const secretIndex = process.argv.findIndex(arg => arg.startsWith('--secret='));
+    if (secretIndex !== -1) {
+      process.argv.splice(secretIndex, 1);
+    }
+    // Reset module cache to ensure clean state
+    jest.resetModules();
+    // Reload server module without secret
+    const { default: serverModule } = await require('../app');
+    app = serverModule;
+    const response = await request(app)
+      .post('/v1/chat/completions')
+      .send({
+        model: 'test-model',
+        messages: [{ role: 'user', content: 'test' }]
+      });
+    expect(response.status).toBe(200);
+  });
+  it('should reject requests without user message', async () => {
+    const response = await request(app)
+      .post('/v1/chat/completions')
+      .set('Authorization', `Bearer ${TEST_SECRET}`)
+      .send({
+        model: 'test-model',
+        messages: [{ role: 'developer', content: 'test' }]
+      });
+    expect(response.status).toBe(400);
+    expect(response.body.error).toBe('Last message must be from user');
+  });
+  it('should handle non-streaming request', async () => {
+    const response = await request(app)
+      .post('/v1/chat/completions')
+      .set('Authorization', `Bearer ${TEST_SECRET}`)
+      .send({
+        model: 'test-model',
+        messages: [{ role: 'user', content: 'test' }]
+      });
+    expect(response.status).toBe(200);
+    expect(response.body).toMatchObject({
+      object: 'chat.completion',
+      choices: [{
+        message: {
+          role: 'assistant'
+        }
+      }]
+    });
+  });
+  it('should handle streaming request and track tokens correctly', async () => {
+    return new Promise<void>((resolve, reject) => {
+      let isDone = false;
+      let totalCompletionTokens = 0;
+      const cleanup = () => {
+        clearTimeout(timeoutHandle);
+        isDone = true;
+        resolve();
+      };
+      const timeoutHandle = setTimeout(() => {
+        if (!isDone) {
+          cleanup();
+          reject(new Error('Test timed out'));
+        }
+      }, 30000);
+      request(app)
+        .post('/v1/chat/completions')
+        .set('Authorization', `Bearer ${TEST_SECRET}`)
+        .send({
+          model: 'test-model',
+          messages: [{ role: 'user', content: 'test' }],
+          stream: true
+        })
+        .buffer(true)
+        .parse((res, callback) => {
+          const response = res as unknown as {
+            on(event: 'data', listener: (chunk: Buffer) => void): void;
+            on(event: 'end', listener: () => void): void;
+            on(event: 'error', listener: (err: Error) => void): void;
+          };
+          let responseData = '';
+          response.on('error', (err) => {
+            cleanup();
+            callback(err, null);
+          });
+          response.on('data', (chunk) => {
+            responseData += chunk.toString();
+          });
+          response.on('end', () => {
+            try {
+              callback(null, responseData);
+            } catch (err) {
+              cleanup();
+              callback(err instanceof Error ? err : new Error(String(err)), null);
+            }
+          });
+        })
+        .end((err, res) => {
+          if (err) return reject(err);
+          expect(res.status).toBe(200);
+          expect(res.headers['content-type']).toBe('text/event-stream');
+          // Verify stream format and content
+          if (isDone) return; // Prevent multiple resolves
+          const responseText = res.body as string;
+          const chunks = responseText
+            .split('\n\n')
+            .filter((line: string) => line.startsWith('data: '))
+            .map((line: string) => JSON.parse(line.replace('data: ', '')));
+          // Process all chunks
+          expect(chunks.length).toBeGreaterThan(0);
+          // Verify initial chunk format
+          expect(chunks[0]).toMatchObject({
+            id: expect.any(String),
+            object: 'chat.completion.chunk',
+            choices: [{
+              index: 0,
+              delta: { role: 'assistant' },
+              logprobs: null,
+              finish_reason: null
+            }]
+          });
+          // Verify content chunks have content
+          chunks.slice(1).forEach(chunk => {
+            const content = chunk.choices[0].delta.content;
+            if (content && content.trim()) {
+              totalCompletionTokens += 1; // Count 1 token per chunk as per Vercel convention
+            }
+            expect(chunk).toMatchObject({
+              object: 'chat.completion.chunk',
+              choices: [{
+                delta: expect.objectContaining({
+                  content: expect.any(String)
+                })
+              }]
+            });
+          });
+          // Verify final chunk format if present
+          const lastChunk = chunks[chunks.length - 1];
+          if (lastChunk?.choices?.[0]?.finish_reason === 'stop') {
+            expect(lastChunk).toMatchObject({
+              object: 'chat.completion.chunk',
+              choices: [{
+                delta: {},
+                finish_reason: 'stop'
+              }]
+            });
+          }
+          // Verify we tracked some completion tokens
+          expect(totalCompletionTokens).toBeGreaterThan(0);
+          // Clean up and resolve
+          if (!isDone) {
+            cleanup();
+          }
+        });
+    });
+  });
+  it('should track tokens correctly in error response', async () => {
+    const response = await request(app)
+      .post('/v1/chat/completions')
+      .set('Authorization', `Bearer ${TEST_SECRET}`)
+      .send({
+        model: 'test-model',
+        messages: [] // Invalid messages array
+      });
+    expect(response.status).toBe(400);
+    expect(response.body).toHaveProperty('error');
+    expect(response.body.error).toBe('Messages array is required and must not be empty');
+    // Make another request to verify token tracking after error
+    const validResponse = await request(app)
+      .post('/v1/chat/completions')
+      .set('Authorization', `Bearer ${TEST_SECRET}`)
+      .send({
+        model: 'test-model',
+        messages: [{ role: 'user', content: 'test' }]
+      });
+    // Verify token tracking still works after error
+    expect(validResponse.body.usage).toMatchObject({
+      prompt_tokens: expect.any(Number),
+      completion_tokens: expect.any(Number),
+      total_tokens: expect.any(Number)
+    });
+    // Basic token tracking structure should be present
+    expect(validResponse.body.usage.total_tokens).toBe(
+      validResponse.body.usage.prompt_tokens + validResponse.body.usage.completion_tokens
+    );
+  });
+  it('should provide token usage in Vercel AI SDK format', async () => {
+    const response = await request(app)
+      .post('/v1/chat/completions')
+      .set('Authorization', `Bearer ${TEST_SECRET}`)
+      .send({
+        model: 'test-model',
+        messages: [{ role: 'user', content: 'test' }]
+      });
+    expect(response.status).toBe(200);
+    const usage = response.body.usage;
+    expect(usage).toMatchObject({
+      prompt_tokens: expect.any(Number),
+      completion_tokens: expect.any(Number),
+      total_tokens: expect.any(Number)
+    });
+    // Basic token tracking structure should be present
+    expect(usage.total_tokens).toBe(
+      usage.prompt_tokens + usage.completion_tokens
+    );
+  });
+});

src/agent.ts ADDED Viewed

	@@ -0,0 +1,774 @@

+import {z, ZodObject} from 'zod';
+import {CoreAssistantMessage, CoreUserMessage} from 'ai';
+import {SEARCH_PROVIDER, STEP_SLEEP} from "./config";
+import {readUrl, removeAllLineBreaks} from "./tools/read";
+import fs from 'fs/promises';
+import {SafeSearchType, search as duckSearch} from "duck-duck-scrape";
+import {braveSearch} from "./tools/brave-search";
+import {rewriteQuery} from "./tools/query-rewriter";
+import {dedupQueries} from "./tools/jina-dedup";
+import {evaluateAnswer, evaluateQuestion} from "./tools/evaluator";
+import {analyzeSteps} from "./tools/error-analyzer";
+import {TokenTracker} from "./utils/token-tracker";
+import {ActionTracker} from "./utils/action-tracker";
+import {StepAction, AnswerAction, KnowledgeItem, EvaluationCriteria} from "./types";
+import {TrackerContext} from "./types";
+import {search} from "./tools/jina-search";
+// import {grounding} from "./tools/grounding";
+import {zodToJsonSchema} from "zod-to-json-schema";
+import {ObjectGeneratorSafe} from "./utils/safe-generator";
+async function sleep(ms: number) {
+  const seconds = Math.ceil(ms / 1000);
+  console.log(`Waiting ${seconds}s...`);
+  return new Promise(resolve => setTimeout(resolve, ms));
+}
+function getSchema(allowReflect: boolean, allowRead: boolean, allowAnswer: boolean, allowSearch: boolean, languageStyle: string = 'same language as the question') {
+  const actions: string[] = [];
+  const properties: Record<string, z.ZodTypeAny> = {
+    action: z.enum(['placeholder']), // Will update later with actual actions
+    think: z.string().describe("Explain why choose this action, what's the thought process behind choosing this action")
+  };
+  if (allowSearch) {
+    actions.push("search");
+    properties.searchQuery = z.string().max(30)
+      .describe("Required when action='search'. Must be a short, keyword-based query that BM25, tf-idf based search engines can understand. Existing queries must be avoided").optional();
+  }
+  if (allowAnswer) {
+    actions.push("answer");
+    properties.answer = z.string()
+      .describe(`Required when action='answer'. Must in ${languageStyle}`).optional();
+    properties.references = z.array(
+      z.object({
+        exactQuote: z.string().describe("Exact relevant quote from the document"),
+        url: z.string().describe("source URL; must be directly from the context")
+      }).required()
+    ).describe("Required when action='answer'. Must be an array of references that support the answer, each reference must contain an exact quote and the URL of the document").optional();
+  }
+  if (allowReflect) {
+    actions.push("reflect");
+    properties.questionsToAnswer = z.array(
+      z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
+    ).max(2)
+      .describe("Required when action='reflect'. List of most important questions to fill the knowledge gaps of finding the answer to the original question").optional();
+  }
+  if (allowRead) {
+    actions.push("visit");
+    properties.URLTargets = z.array(z.string())
+      .max(2)
+      .describe("Required when action='visit'. Must be an array of URLs, choose up the most relevant 2 URLs to visit").optional();
+  }
+  // Update the enum values after collecting all actions
+  properties.action = z.enum(actions as [string, ...string[]])
+    .describe("Must match exactly one action type");
+  return z.object(properties);
+}
+function getPrompt(
+  question: string,
+  context?: string[],
+  allQuestions?: string[],
+  allKeywords?: string[],
+  allowReflect: boolean = true,
+  allowAnswer: boolean = true,
+  allowRead: boolean = true,
+  allowSearch: boolean = true,
+  badContext?: { question: string, answer: string, evaluation: string, recap: string; blame: string; improvement: string; }[],
+  knowledge?: KnowledgeItem[],
+  allURLs?: Record<string, string>,
+  beastMode?: boolean,
+  languageStyle?: string
+): string {
+  const sections: string[] = [];
+  const actionSections: string[] = [];
+  // Add header section
+  sections.push(`Current date: ${new Date().toUTCString()}
+You are an advanced AI research agent from Jina AI. You are specialized in multistep reasoning. Using your training data and prior lessons learned, answer the following question with absolute certainty:
+<question>
+${question}
+</question>
+`);
+  // Add context section if exists
+  if (context?.length) {
+    sections.push(`
+You have conducted the following actions:
+<context>
+${context.join('\n')}
+</context>
+`);
+  }
+  // Add knowledge section if exists
+  if (knowledge?.length) {
+    const knowledgeItems = knowledge
+      .map((k, i) => `
+<knowledge-${i + 1}>
+<question>
+${k.question}
+</question>
+<answer>
+${k.answer}
+</answer>
+${k.references ? `
+<references>
+${JSON.stringify(k.references)}
+</references>
+` : ''}
+</knowledge-${i + 1}>
+`)
+      .join('\n\n');
+    sections.push(`
+You have successfully gathered some knowledge which might be useful for answering the original question. Here is the knowledge you have gathered so far:
+<knowledge>
+${knowledgeItems}
+</knowledge>
+`);
+  }
+  // Add bad context section if exists
+  if (badContext?.length) {
+    const attempts = badContext
+      .map((c, i) => `
+<attempt-${i + 1}>
+- Question: ${c.question}
+- Answer: ${c.answer}
+- Reject Reason: ${c.evaluation}
+- Actions Recap: ${c.recap}
+- Actions Blame: ${c.blame}
+</attempt-${i + 1}>
+`)
+      .join('\n\n');
+    const learnedStrategy = badContext.map(c => c.improvement).join('\n');
+    sections.push(`
+Your have tried the following actions but failed to find the answer to the question:
+<bad-attempts>
+${attempts}
+</bad-attempts>
+Based on the failed attempts, you have learned the following strategy:
+<learned-strategy>
+${learnedStrategy}
+</learned-strategy>
+`);
+  }
+  // Build actions section
+  if (allowRead) {
+    let urlList = '';
+    if (allURLs && Object.keys(allURLs).length > 0) {
+      urlList = Object.entries(allURLs)
+        .map(([url, desc]) => `  + "${url}": "${desc}"`)
+        .join('\n');
+    }
+    actionSections.push(`
+<action-visit>
+- This allows you to access the full content behind any URLs.
+- If the <question> contains a URL, you must visit the URL to gather more information.
+${urlList ? `
+- Visit any URLs from below to gather external knowledge, choose the most relevant URLs that might contain the answer
+<url-list>
+${urlList}
+</url-list>
+`.trim() : ''}
+</action-visit>
+`);
+  }
+  if (allowSearch) {
+    actionSections.push(`
+<action-search>
+${allKeywords?.length ? `
+- Avoid the searched queries below as they do not give any useful information, you need to think out of the box and propose queries from a completely different angle:
+<bad-queries>
+${allKeywords.join('\n')}
+</bad-queries>
+`.trim() : ''}
+- Propose some unique new queries that might help you find the answer to the question
+- Focus on solving one specific aspect of the original question
+- Only use keywords, not full sentences
+</action-search>
+`);
+  }
+  if (allowAnswer) {
+    actionSections.push(`
+<action-answer>
+- If <question> is a simple greeting, chit-chat, or general knowledge, provide the answer directly;
+- Must provide "references" and each must specify "exactQuote" and "url";
+- In the answer, use markdown footnote syntax like [^1], [^2] to refer to the references;
+- Responses must be definitive (no ambiguity, uncertainty, or disclaimers) and in the style of ${languageStyle};
+- Provide final response only when 100% certain;${allowReflect ? '\n- If doubts remain, use <action-reflect> instead' : ''}
+</action-answer>
+`);
+  }
+  if (beastMode) {
+    actionSections.push(`
+<action-answer>
+- Any answer is better than no answer
+- Partial answers are allowed, but make sure they are based on the context and knowledge you have gathered
+- When uncertain, educated guess based on the context and knowledge is allowed and encouraged.
+- Responses must be definitive (no ambiguity, uncertainty, or disclaimers)
+</action-answer>
+`);
+  }
+  if (allowReflect) {
+    actionSections.push(`
+<action-reflect>
+- Perform critical analysis through hypothetical scenarios or systematic breakdowns
+- Identify knowledge gaps and formulate essential clarifying questions
+- Questions must be:
+  - Original (not variations of existing questions)
+  - Focused on single concepts
+  - Under 20 words
+  - Non-compound/non-complex
+</action-reflect>
+`);
+  }
+  sections.push(`
+Based on the current context, you must choose one of the following actions:
+<actions>
+${actionSections.join('\n\n')}
+</actions>
+`);
+  // Add footer
+  sections.push(`Respond exclusively in valid JSON format matching exact JSON schema.
+Critical Requirements:
+- Include ONLY ONE action type
+- Never add unsupported keys
+- Exclude all non-JSON text, markdown, or explanations
+- Maintain strict JSON syntax`);
+  return removeExtraLineBreaks(sections.join('\n\n'));
+}
+const removeExtraLineBreaks = (text: string) => {
+  return text.replace(/\n{2,}/gm, '\n\n');
+}
+const allContext: StepAction[] = [];  // all steps in the current session, including those leads to wrong results
+function updateContext(step: any) {
+  allContext.push(step)
+}
+function removeHTMLtags(text: string) {
+  return text.replace(/<[^>]*>?/gm, '');
+}
+export async function getResponse(question: string,
+                                  tokenBudget: number = 1_000_000,
+                                  maxBadAttempts: number = 3,
+                                  existingContext?: Partial<TrackerContext>,
+                                  historyMessages?: Array<CoreAssistantMessage | CoreUserMessage>
+): Promise<{ result: StepAction; context: TrackerContext }> {
+  const context: TrackerContext = {
+    tokenTracker: existingContext?.tokenTracker || new TokenTracker(tokenBudget),
+    actionTracker: existingContext?.actionTracker || new ActionTracker()
+  };
+  let step = 0;
+  let totalStep = 0;
+  let badAttempts = 0;
+  let schema: ZodObject<any> = getSchema(true, true, true, true)
+  question = question.trim()
+  const gaps: string[] = [question];  // All questions to be answered including the orginal question
+  const allQuestions = [question];
+  const allKeywords = [];
+  const allKnowledge: KnowledgeItem[] = [];  // knowledge are intermedidate questions that are answered
+  // iterate over historyMessages
+  // if role is user and content is question, add to allQuestions, the next assistant content should be the answer
+  // put this pair to the allKnowledge
+  historyMessages?.forEach((message, i) => {
+    if (message.role === 'user' && message.content && historyMessages[i + 1]?.role === 'assistant') {
+      allQuestions.push(message.content as string)
+      allKnowledge.push({
+        question: message.content as string,
+        answer: (historyMessages[i + 1]?.content || '') as string,
+        type: 'chat-history',
+        updated: new Date().toISOString()
+      });
+    }
+  })
+  const badContext = [];
+  let diaryContext = [];
+  let allowAnswer = true;
+  let allowSearch = true;
+  let allowRead = true;
+  let allowReflect = true;
+  let prompt = '';
+  let thisStep: StepAction = {action: 'answer', answer: '', references: [], think: '', isFinal: false};
+  const allURLs: Record<string, string> = {};
+  const visitedURLs: string[] = [];
+  const evaluationMetrics: Record<string, EvaluationCriteria> = {};
+  while (context.tokenTracker.getTotalUsage().totalTokens < tokenBudget && badAttempts <= maxBadAttempts) {
+    // add 1s delay to avoid rate limiting
+    await sleep(STEP_SLEEP);
+    step++;
+    totalStep++;
+    const budgetPercentage = (context.tokenTracker.getTotalUsage().totalTokens / tokenBudget * 100).toFixed(2);
+    console.log(`Step ${totalStep} / Budget used ${budgetPercentage}%`);
+    console.log('Gaps:', gaps);
+    allowReflect = allowReflect && (gaps.length <= 1);
+    const currentQuestion = gaps.length > 0 ? gaps.shift()! : question
+    if (!evaluationMetrics[currentQuestion]) {
+      evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
+    }
+    // update all urls with buildURLMap
+    // allowRead = allowRead && (Object.keys(allURLs).length > 0);
+    allowSearch = allowSearch && (Object.keys(allURLs).length < 50);  // disable search when too many urls already
+    // generate prompt for this step
+    prompt = getPrompt(
+      currentQuestion,
+      diaryContext,
+      allQuestions,
+      allKeywords,
+      allowReflect,
+      allowAnswer,
+      allowRead,
+      allowSearch,
+      badContext,
+      allKnowledge,
+      allURLs,
+      false,
+      evaluationMetrics[currentQuestion].languageStyle
+    );
+    schema = getSchema(allowReflect, allowRead, allowAnswer, allowSearch,
+      evaluationMetrics[currentQuestion].languageStyle)
+    const generator = new ObjectGeneratorSafe(context.tokenTracker);
+    const result = await generator.generateObject({
+      model: 'agent',
+      schema,
+      prompt,
+    });
+    thisStep = result.object as StepAction;
+    // print allowed and chose action
+    const actionsStr = [allowSearch, allowRead, allowAnswer, allowReflect].map((a, i) => a ? ['search', 'read', 'answer', 'reflect'][i] : null).filter(a => a).join(', ');
+    console.log(`${thisStep.action} <- [${actionsStr}]`);
+    console.log(thisStep)
+    context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
+    // reset allowAnswer to true
+    allowAnswer = true;
+    allowReflect = true;
+    allowRead = true;
+    allowSearch = true;
+    // execute the step and action
+    if (thisStep.action === 'answer') {
+      if (step === 1) {
+        // LLM is so confident and answer immediately, skip all evaluations
+        thisStep.isFinal = true;
+        break
+      }
+      updateContext({
+        totalStep,
+        question: currentQuestion,
+        ...thisStep,
+      });
+      const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
+        evaluationMetrics[currentQuestion], context.tokenTracker);
+      if (currentQuestion.trim() === question) {
+        if (evaluation.pass) {
+          diaryContext.push(`
+At step ${step}, you took **answer** action and finally found the answer to the original question:
+Original question:
+${currentQuestion}
+Your answer:
+${thisStep.answer}
+The evaluator thinks your answer is good because:
+${evaluation.think}
+Your journey ends here. You have successfully answered the original question. Congratulations! 🎉
+`);
+          thisStep.isFinal = true;
+          break
+        } else {
+          if (badAttempts >= maxBadAttempts) {
+            thisStep.isFinal = false;
+            break
+          } else {
+            diaryContext.push(`
+At step ${step}, you took **answer** action but evaluator thinks it is not a good answer:
+Original question:
+${currentQuestion}
+Your answer:
+${thisStep.answer}
+The evaluator thinks your answer is bad because:
+${evaluation.think}
+`);
+            // store the bad context and reset the diary context
+            const {response: errorAnalysis} = await analyzeSteps(diaryContext, context.tokenTracker);
+            allKnowledge.push({
+              question: currentQuestion,
+              answer: thisStep.answer,
+              references: thisStep.references,
+              type: 'qa',
+              updated: new Date().toISOString()
+            });
+            badContext.push({
+              question: currentQuestion,
+              answer: thisStep.answer,
+              evaluation: evaluation.think,
+              ...errorAnalysis
+            });
+            if (errorAnalysis.questionsToAnswer) {
+              // reranker? maybe
+              gaps.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
+              allQuestions.push(...errorAnalysis.questionsToAnswer.slice(0, 2));
+              gaps.push(question);  // always keep the original question in the gaps
+            }
+            badAttempts++;
+            allowAnswer = false;  // disable answer action in the immediate next step
+            diaryContext = [];
+            step = 0;
+          }
+        }
+      } else if (evaluation.pass) {
+        diaryContext.push(`
+At step ${step}, you took **answer** action. You found a good answer to the sub-question:
+Sub-question:
+${currentQuestion}
+Your answer:
+${thisStep.answer}
+The evaluator thinks your answer is good because:
+${evaluation.think}
+Although you solved a sub-question, you still need to find the answer to the original question. You need to keep going.
+`);
+        allKnowledge.push({
+          question: currentQuestion,
+          answer: thisStep.answer,
+          references: thisStep.references,
+          type: 'qa',
+          updated: new Date().toISOString()
+        });
+      }
+    } else if (thisStep.action === 'reflect' && thisStep.questionsToAnswer) {
+      let newGapQuestions = thisStep.questionsToAnswer
+      const oldQuestions = newGapQuestions;
+      newGapQuestions = (await dedupQueries(newGapQuestions, allQuestions, context.tokenTracker)).unique_queries;
+      if (newGapQuestions.length > 0) {
+        // found new gap questions
+        diaryContext.push(`
+At step ${step}, you took **reflect** and think about the knowledge gaps. You found some sub-questions are important to the question: "${currentQuestion}"
+You realize you need to know the answers to the following sub-questions:
+${newGapQuestions.map((q: string) => `- ${q}`).join('\n')}
+You will now figure out the answers to these sub-questions and see if they can help you find the answer to the original question.
+`);
+        gaps.push(...newGapQuestions.slice(0, 2));
+        allQuestions.push(...newGapQuestions.slice(0, 2));
+        gaps.push(question);  // always keep the original question in the gaps
+      } else {
+        diaryContext.push(`
+At step ${step}, you took **reflect** and think about the knowledge gaps. You tried to break down the question "${currentQuestion}" into gap-questions like this: ${oldQuestions.join(', ')}
+But then you realized you have asked them before. You decided to to think out of the box or cut from a completely different angle.
+`);
+        updateContext({
+          totalStep,
+          ...thisStep,
+          result: 'You have tried all possible questions and found no useful information. You must think out of the box or different angle!!!'
+        });
+        allowReflect = false;
+      }
+    } else if (thisStep.action === 'search' && thisStep.searchQuery) {
+      // rewrite queries
+      let {queries: keywordsQueries} = await rewriteQuery(thisStep, context.tokenTracker);
+      const oldKeywords = keywordsQueries;
+      // avoid exisitng searched queries
+      const {unique_queries: dedupedQueries} = await dedupQueries(keywordsQueries, allKeywords, context.tokenTracker);
+      keywordsQueries = dedupedQueries;
+      if (keywordsQueries.length > 0) {
+        // let googleGrounded = '';
+        const searchResults = [];
+        context.actionTracker.trackThink(`Let me search for "${keywordsQueries.join(', ')}" to gather more information.`)
+        for (const query of keywordsQueries) {
+          console.log(`Search query: ${query}`);
+          let results;
+          switch (SEARCH_PROVIDER) {
+            case 'jina':
+              // use jinaSearch
+              results = {results: (await search(query, context.tokenTracker)).response?.data || []};
+              // if (LLM_PROVIDER === 'gemini') {
+              //   googleGrounded = await grounding(query, context.tokenTracker);
+              // }
+              break;
+            case 'duck':
+              results = await duckSearch(query, {safeSearch: SafeSearchType.STRICT});
+              break;
+            case 'brave':
+              try {
+                const {response} = await braveSearch(query);
+                results = {
+                  results: response.web?.results?.map(r => ({
+                    title: r.title,
+                    url: r.url,
+                    description: r.description
+                  })) || []
+                };
+              } catch (error) {
+                console.error('Brave search failed:', error);
+                results = {results: []};
+              }
+              await sleep(STEP_SLEEP)
+              break;
+            default:
+              results = {results: []};
+          }
+          const minResults = results.results.map(r => ({
+            title: r.title,
+            url: r.url,
+            description: r.description
+          }));
+          Object.assign(allURLs, Object.fromEntries(
+            minResults.map(r => [r.url, r.title])
+          ));
+          searchResults.push({query, results: minResults});
+          allKeywords.push(query);
+        }
+        allKnowledge.push({
+          question: `What do Internet say about ${thisStep.searchQuery}?`,
+          answer: removeHTMLtags(searchResults.map(r => r.results.map(r => r.description).join('; ')).join('; ')),
+          // answer: googleGrounded + removeHTMLtags(searchResults.map(r => r.results.map(r => r.description).join('; ')).join('; ')),
+          type: 'side-info',
+          updated: new Date().toISOString()
+        });
+        diaryContext.push(`
+At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
+In particular, you tried to search for the following keywords: "${keywordsQueries.join(', ')}".
+You found quite some information and add them to your URL list and **visit** them later when needed.
+`);
+        updateContext({
+          totalStep,
+          question: currentQuestion,
+          ...thisStep,
+          result: searchResults
+        });
+      } else {
+        diaryContext.push(`
+At step ${step}, you took the **search** action and look for external information for the question: "${currentQuestion}".
+In particular, you tried to search for the following keywords: ${oldKeywords.join(', ')}.
+But then you realized you have already searched for these keywords before.
+You decided to think out of the box or cut from a completely different angle.
+`);
+        updateContext({
+          totalStep,
+          ...thisStep,
+          result: 'You have tried all possible queries and found no new information. You must think out of the box or different angle!!!'
+        });
+        allowSearch = false;
+      }
+    } else if (thisStep.action === 'visit' && thisStep.URLTargets?.length) {
+      let uniqueURLs = thisStep.URLTargets;
+      if (visitedURLs.length > 0) {
+        // check duplicate urls
+        uniqueURLs = uniqueURLs.filter((url: string) => !visitedURLs.includes(url));
+      }
+      if (uniqueURLs.length > 0) {
+        context.actionTracker.trackThink(`Let me read ${uniqueURLs.join(', ')} to gather more information.`);
+        const urlResults = await Promise.all(
+          uniqueURLs.map(async (url: string) => {
+            try {
+              const {response} = await readUrl(url, context.tokenTracker);
+              allKnowledge.push({
+                question: `What is in ${response.data?.url || 'the URL'}?`,
+                answer: removeAllLineBreaks(response.data?.content || 'No content available'),
+                references: [response.data?.url],
+                type: 'url',
+                updated: new Date().toISOString()
+              });
+              visitedURLs.push(url);
+              delete allURLs[url];
+              return {url, result: response};
+            } catch (error) {
+              console.error('Error reading URL:', error);
+            }
+          })
+        );
+        diaryContext.push(`
+At step ${step}, you took the **visit** action and deep dive into the following URLs:
+${urlResults.map(r => r?.url).join('\n')}
+You found some useful information on the web and add them to your knowledge for future reference.
+`);
+        updateContext({
+          totalStep,
+          question: currentQuestion,
+          ...thisStep,
+          result: urlResults
+        });
+      } else {
+        diaryContext.push(`
+At step ${step}, you took the **visit** action and try to visit the following URLs:
+${thisStep.URLTargets.join('\n')}
+But then you realized you have already visited these URLs and you already know very well about their contents.
+You decided to think out of the box or cut from a completely different angle.`);
+        updateContext({
+          totalStep,
+          ...thisStep,
+          result: 'You have visited all possible URLs and found no new information. You must think out of the box or different angle!!!'
+        });
+        allowRead = false;
+      }
+    }
+    await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
+  }
+  await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
+  if (!(thisStep as AnswerAction).isFinal) {
+    console.log('Enter Beast mode!!!')
+    // any answer is better than no answer, humanity last resort
+    step++;
+    totalStep++;
+    const prompt = getPrompt(
+      question,
+      diaryContext,
+      allQuestions,
+      allKeywords,
+      false,
+      false,
+      false,
+      false,
+      badContext,
+      allKnowledge,
+      allURLs,
+      true,
+      evaluationMetrics[question]?.languageStyle || 'same language as the question'
+    );
+    schema = getSchema(false, false, true, false,
+      evaluationMetrics[question]?.languageStyle || 'same language as the question');
+    const generator = new ObjectGeneratorSafe(context.tokenTracker);
+    const result = await generator.generateObject({
+      model: 'agentBeastMode',
+      schema,
+      prompt,
+    });
+    thisStep = result.object as AnswerAction;
+    (thisStep as AnswerAction).isFinal = true;
+    context.actionTracker.trackAction({totalStep, thisStep, gaps, badAttempts});
+  }
+  console.log(thisStep)
+  await storeContext(prompt, schema, [allContext, allKeywords, allQuestions, allKnowledge], totalStep);
+  return {result: thisStep, context};
+}
+async function storeContext(prompt: string, schema: any, memory: any[][], step: number) {
+  if ((process as any).asyncLocalContext?.available?.()) {
+    const [context, keywords, questions, knowledge] = memory;
+    (process as any).asyncLocalContext.ctx.promptContext = {
+      prompt,
+      schema,
+      context,
+      keywords,
+      questions,
+      knowledge,
+      step
+    };
+    return;
+  }
+  try {
+    await fs.writeFile(`prompt-${step}.txt`, `
+Prompt:
+${prompt}
+JSONSchema:
+${JSON.stringify(zodToJsonSchema(schema), null, 2)}
+`);
+    const [context, keywords, questions, knowledge] = memory;
+    await fs.writeFile('context.json', JSON.stringify(context, null, 2));
+    await fs.writeFile('queries.json', JSON.stringify(keywords, null, 2));
+    await fs.writeFile('questions.json', JSON.stringify(questions, null, 2));
+    await fs.writeFile('knowledge.json', JSON.stringify(knowledge, null, 2));
+  } catch (error) {
+    console.error('Context storage failed:', error);
+  }
+}
+export async function main() {
+  const question = process.argv[2] || "";
+  const {
+    result: finalStep,
+    context: tracker
+  } = await getResponse(question) as { result: AnswerAction; context: TrackerContext };
+  console.log('Final Answer:', finalStep.answer);
+  tracker.tokenTracker.printSummary();
+}
+if (require.main === module) {
+  main().catch(console.error);
+}

src/app.ts ADDED Viewed

	@@ -0,0 +1,646 @@

+import express, {Request, Response, RequestHandler} from 'express';
+import cors from 'cors';
+import {getResponse} from './agent';
+import {
+  TrackerContext,
+  ChatCompletionRequest,
+  ChatCompletionResponse,
+  ChatCompletionChunk,
+  AnswerAction,
+  Model, StepAction
+} from './types';
+import {TokenTracker} from "./utils/token-tracker";
+import {ActionTracker} from "./utils/action-tracker";
+const app = express();
+// Get secret from command line args for optional authentication
+const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];
+app.use(cors());
+app.use(express.json({
+  limit: '10mb'
+}));
+// Add health check endpoint for Docker container verification
+app.get('/health', (req, res) => {
+  res.json({status: 'ok'});
+});
+function buildMdFromAnswer(answer: AnswerAction) {
+  if (!answer.references?.length || !answer.references.some(ref => ref.url.startsWith('http'))) {
+    return answer.answer;
+  }
+  const references = answer.references.map((ref, i) => {
+    const escapedQuote = ref.exactQuote
+      .replace(/([[\]_*`])/g, '\\$1')
+      .replace(/\n/g, ' ')
+      .trim();
+    return `[^${i + 1}]: [${escapedQuote}](${ref.url})`;
+  }).join('\n\n');
+  return `
+${answer.answer.replace(/\(REF_(\d+)\)/g, (_, num) => `[^${num}]`)}
+${references}
+`.trim();
+}
+async function* streamTextNaturally(text: string, streamingState: StreamingState) {
+  // Split text into chunks that preserve CJK characters, URLs, and regular words
+  const chunks = splitTextIntoChunks(text);
+  let burstMode = false;
+  let consecutiveShortItems = 0;
+  for (const chunk of chunks) {
+    if (!streamingState.currentlyStreaming) {
+      yield chunks.slice(chunks.indexOf(chunk)).join('');
+      return;
+    }
+    const delay = calculateDelay(chunk, burstMode);
+    // Handle consecutive short items
+    if (getEffectiveLength(chunk) <= 3 && chunk.trim().length > 0) {
+      consecutiveShortItems++;
+      if (consecutiveShortItems >= 3) {
+        burstMode = true;
+      }
+    } else {
+      consecutiveShortItems = 0;
+      burstMode = false;
+    }
+    await new Promise(resolve => setTimeout(resolve, delay));
+    yield chunk;
+  }
+}
+function splitTextIntoChunks(text: string): string[] {
+  const chunks: string[] = [];
+  let currentChunk = '';
+  let inURL = false;
+  const pushCurrentChunk = () => {
+    if (currentChunk) {
+      chunks.push(currentChunk);
+      currentChunk = '';
+    }
+  };
+  for (let i = 0; i < text.length; i++) {
+    const char = text[i];
+    const nextChar = text[i + 1] || '';
+    // URL detection
+    if (char === 'h' && text.slice(i, i + 8).match(/https?:\/\//)) {
+      pushCurrentChunk();
+      inURL = true;
+    }
+    if (inURL) {
+      currentChunk += char;
+      // End of URL detection (whitespace or certain punctuation)
+      if (/[\s\])}"']/.test(nextChar) || i === text.length - 1) {
+        pushCurrentChunk();
+        inURL = false;
+      }
+      continue;
+    }
+    // CJK character detection (including kana and hangul)
+    if (/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(char)) {
+      pushCurrentChunk();
+      chunks.push(char);
+      continue;
+    }
+    // Whitespace handling
+    if (/\s/.test(char)) {
+      pushCurrentChunk();
+      chunks.push(char);
+      continue;
+    }
+    // Regular word building
+    currentChunk += char;
+    // Break on punctuation
+    if (/[.!?,;:]/.test(nextChar)) {
+      pushCurrentChunk();
+    }
+  }
+  pushCurrentChunk();
+  return chunks.filter(chunk => chunk !== '');
+}
+function calculateDelay(chunk: string, burstMode: boolean): number {
+  const trimmedChunk = chunk.trim();
+  // Handle whitespace
+  if (trimmedChunk.length === 0) {
+    return Math.random() * 20 + 10;
+  }
+  // Special handling for URLs
+  if (chunk.match(/^https?:\/\//)) {
+    return Math.random() * 50 + 100; // Slower typing for URLs
+  }
+  // Special handling for CJK characters
+  if (/^[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]$/.test(chunk)) {
+    return Math.random() * 100 + 150; // Longer delay for individual CJK characters
+  }
+  // Base delay calculation
+  let baseDelay;
+  if (burstMode) {
+    baseDelay = Math.random() * 30 + 20;
+  } else {
+    const effectiveLength = getEffectiveLength(chunk);
+    const perCharacterDelay = Math.max(10, 40 - effectiveLength * 2);
+    baseDelay = Math.random() * perCharacterDelay + perCharacterDelay;
+  }
+  // Add variance based on chunk characteristics
+  if (/[A-Z]/.test(chunk[0])) {
+    baseDelay += Math.random() * 20 + 10;
+  }
+  if (/[^a-zA-Z\s]/.test(chunk)) {
+    baseDelay += Math.random() * 30 + 15;
+  }
+  // Add pauses for punctuation
+  if (/[.!?]$/.test(chunk)) {
+    baseDelay += Math.random() * 350 + 200;
+  } else if (/[,;:]$/.test(chunk)) {
+    baseDelay += Math.random() * 150 + 100;
+  }
+  return baseDelay;
+}
+function getEffectiveLength(chunk: string): number {
+  // Count CJK characters as 2 units
+  const cjkCount = (chunk.match(/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/g) || []).length;
+  const regularCount = chunk.length - cjkCount;
+  return regularCount + (cjkCount * 2);
+}
+// Helper function to emit remaining content immediately
+async function emitRemainingContent(
+  res: Response,
+  requestId: string,
+  created: number,
+  model: string,
+  content: string,
+) {
+  if (!content) return;
+  const chunk: ChatCompletionChunk = {
+    id: requestId,
+    object: 'chat.completion.chunk',
+    created,
+    model: model,
+    system_fingerprint: 'fp_' + requestId,
+    choices: [{
+      index: 0,
+      delta: {content},
+      logprobs: null,
+      finish_reason: null
+    }],
+  };
+  res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+}
+interface StreamingState {
+  currentlyStreaming: boolean;
+  currentGenerator: AsyncGenerator<string> | null;
+  remainingContent: string;
+  isEmitting: boolean;
+  queue: { content: string; resolve: () => void }[];
+  processingQueue: boolean;
+}
+function getTokenBudgetAndMaxAttempts(
+  reasoningEffort: 'low' | 'medium' | 'high' | null = 'medium',
+  maxCompletionTokens: number | null = null
+): { tokenBudget: number, maxBadAttempts: number } {
+  if (maxCompletionTokens !== null) {
+    return {
+      tokenBudget: maxCompletionTokens,
+      maxBadAttempts: 3 // Default to medium setting for max attempts
+    };
+  }
+  switch (reasoningEffort) {
+    case 'low':
+      return {tokenBudget: 100000, maxBadAttempts: 1};
+    case 'high':
+      return {tokenBudget: 1000000, maxBadAttempts: 3};
+    case 'medium':
+    default:
+      return {tokenBudget: 500000, maxBadAttempts: 2};
+  }
+}
+async function completeCurrentStreaming(
+  streamingState: StreamingState,
+  res: Response,
+  requestId: string,
+  created: number,
+  model: string
+) {
+  if (streamingState.currentlyStreaming && streamingState.remainingContent) {
+    // Force completion of current streaming
+    await emitRemainingContent(
+      res,
+      requestId,
+      created,
+      model,
+      streamingState.remainingContent
+    );
+    // Reset streaming state
+    streamingState.currentlyStreaming = false;
+    streamingState.remainingContent = '';
+    streamingState.currentGenerator = null;
+  }
+}
+// OpenAI-compatible chat completions endpoint
+// Models API endpoints
+app.get('/v1/models', (async (_req: Request, res: Response) => {
+  const models: Model[] = [{
+    id: 'jina-deepsearch-v1',
+    object: 'model',
+    created: 1686935002,
+    owned_by: 'jina-ai'
+  }];
+  res.json({
+    object: 'list',
+    data: models
+  });
+}) as RequestHandler);
+app.get('/v1/models/:model', (async (req: Request, res: Response) => {
+  const modelId = req.params.model;
+  if (modelId === 'jina-deepsearch-v1') {
+    res.json({
+      id: 'jina-deepsearch-v1',
+      object: 'model',
+      created: 1686935002,
+      owned_by: 'jina-ai'
+    });
+  } else {
+    res.status(404).json({
+      error: {
+        message: `Model '${modelId}' not found`,
+        type: 'invalid_request_error',
+        param: null,
+        code: 'model_not_found'
+      }
+    });
+  }
+}) as RequestHandler);
+if (secret) {
+  // Check authentication only if secret is set
+  app.use((req, res, next) => {
+    const authHeader = req.headers.authorization;
+    if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
+      console.log('[chat/completions] Unauthorized request');
+      res.status(401).json({error: 'Unauthorized'});
+      return;
+    }
+    return next();
+  });
+}
+async function processQueue(streamingState: StreamingState, res: Response, requestId: string, created: number, model: string) {
+  if (streamingState.processingQueue) return;
+  streamingState.processingQueue = true;
+  while (streamingState.queue.length > 0) {
+    const current = streamingState.queue[0];
+    // Reset streaming state for new content
+    streamingState.currentlyStreaming = true;
+    streamingState.remainingContent = current.content;
+    streamingState.isEmitting = true;
+    try {
+      for await (const word of streamTextNaturally(current.content, streamingState)) {
+        const chunk: ChatCompletionChunk = {
+          id: requestId,
+          object: 'chat.completion.chunk',
+          created,
+          model,
+          system_fingerprint: 'fp_' + requestId,
+          choices: [{
+            index: 0,
+            delta: {content: word},
+            logprobs: null,
+            finish_reason: null
+          }]
+        };
+        res.write(`data: ${JSON.stringify(chunk)}\n\n`);
+        // Small delay between words
+        await new Promise(resolve => setTimeout(resolve, 30));
+      }
+      // Add newline after content
+      const newlineChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created,
+        model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: '\n'},
+          logprobs: null,
+          finish_reason: null
+        }]
+      };
+      res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);
+    } catch (error) {
+      console.error('Error in streaming:', error);
+    } finally {
+      // Reset state and remove from queue
+      streamingState.isEmitting = false;
+      streamingState.currentlyStreaming = false;
+      streamingState.remainingContent = '';
+      streamingState.queue.shift();
+      current.resolve();
+      // Small delay between queue items
+      await new Promise(resolve => setTimeout(resolve, 50));
+    }
+  }
+  streamingState.processingQueue = false;
+}
+app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
+  // Check authentication only if secret is set
+  if (secret) {
+    const authHeader = req.headers.authorization;
+    if (!authHeader || !authHeader.startsWith('Bearer ') || authHeader.split(' ')[1] !== secret) {
+      console.log('[chat/completions] Unauthorized request');
+      res.status(401).json({error: 'Unauthorized'});
+      return;
+    }
+  }
+  // Log request details (excluding sensitive data)
+  console.log('[chat/completions] Request:', {
+    model: req.body.model,
+    stream: req.body.stream,
+    messageCount: req.body.messages?.length,
+    hasAuth: !!req.headers.authorization,
+    requestId: Date.now().toString()
+  });
+  const body = req.body as ChatCompletionRequest;
+  if (!body.messages?.length) {
+    return res.status(400).json({error: 'Messages array is required and must not be empty'});
+  }
+  const lastMessage = body.messages[body.messages.length - 1];
+  if (lastMessage.role !== 'user') {
+    return res.status(400).json({error: 'Last message must be from user'});
+  }
+  const {tokenBudget, maxBadAttempts} = getTokenBudgetAndMaxAttempts(
+    body.reasoning_effort,
+    body.max_completion_tokens
+  );
+  const requestId = Date.now().toString();
+  const created = Math.floor(Date.now() / 1000);
+  const context: TrackerContext = {
+    tokenTracker: new TokenTracker(),
+    actionTracker: new ActionTracker()
+  };
+  // Add this inside the chat completions endpoint, before setting up the action listener
+  const streamingState: StreamingState = {
+    currentlyStreaming: false,
+    currentGenerator: null,
+    remainingContent: '',
+    isEmitting: false,
+    queue: [],
+    processingQueue: false
+  };
+  if (body.stream) {
+    res.setHeader('Content-Type', 'text/event-stream');
+    res.setHeader('Cache-Control', 'no-cache');
+    res.setHeader('Connection', 'keep-alive');
+    // Send initial chunk with opening think tag
+    const initialChunk: ChatCompletionChunk = {
+      id: requestId,
+      object: 'chat.completion.chunk',
+      created,
+      model: body.model,
+      system_fingerprint: 'fp_' + requestId,
+      choices: [{
+        index: 0,
+        delta: {role: 'assistant', content: '<think>'},
+        logprobs: null,
+        finish_reason: null
+      }]
+    };
+    res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);
+    // Set up progress listener with cleanup
+    const actionListener = async (step: StepAction) => {
+      // Add content to queue for both thinking steps and final answer
+      if (step.think) {
+        const content = step.think;
+        await new Promise<void>(resolve => {
+          streamingState.queue.push({
+            content,
+            resolve
+          });
+          // Single call to process queue is sufficient
+          processQueue(streamingState, res, requestId, created, body.model);
+        });
+      }
+    };
+    context.actionTracker.on('action', actionListener);
+    // Make sure to update the cleanup code
+    res.on('finish', () => {
+      streamingState.currentlyStreaming = false;
+      streamingState.currentGenerator = null;
+      streamingState.remainingContent = '';
+      context.actionTracker.removeListener('action', actionListener);
+    });
+  }
+  try {
+    const {result: finalStep} = await getResponse(lastMessage.content as string, tokenBudget, maxBadAttempts, context, body.messages)
+    const usage = context.tokenTracker.getTotalUsageSnakeCase();
+    if (body.stream) {
+      // Complete any ongoing streaming before sending final answer
+      await completeCurrentStreaming(streamingState, res, requestId, created, body.model);
+      const finalAnswer = buildMdFromAnswer(finalStep as AnswerAction);
+      // Send closing think tag
+      const closeThinkChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created,
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: `</think>\n\n${finalAnswer}`},
+          logprobs: null,
+          finish_reason: null
+        }]
+      };
+      res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
+      // After the content is fully streamed, send the final chunk with finish_reason and usage
+      const finalChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created,
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: ''},
+          logprobs: null,
+          finish_reason: 'stop'
+        }],
+        usage
+      };
+      res.write(`data: ${JSON.stringify(finalChunk)}\n\n`);
+      res.end();
+    } else {
+      const response: ChatCompletionResponse = {
+        id: requestId,
+        object: 'chat.completion',
+        created,
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          message: {
+            role: 'assistant',
+            content: finalStep.action === 'answer' ? buildMdFromAnswer(finalStep) : finalStep.think
+          },
+          logprobs: null,
+          finish_reason: 'stop'
+        }],
+        usage
+      };
+      // Log final response (excluding full content for brevity)
+      console.log('[chat/completions] Response:', {
+        id: response.id,
+        status: 200,
+        contentLength: response.choices[0].message.content.length,
+        usage: response.usage
+      });
+      res.json(response);
+    }
+  } catch (error: any) {
+    // Log error details
+    console.error('[chat/completions] Error:', {
+      message: error?.message || 'An error occurred',
+      stack: error?.stack,
+      type: error?.constructor?.name,
+      requestId
+    });
+    // Track error as rejected tokens with Vercel token counting
+    const errorMessage = error?.message || 'An error occurred';
+    // Clean up event listeners
+    context.actionTracker.removeAllListeners('action');
+    // Get token usage in OpenAI API format
+    const usage = context.tokenTracker.getTotalUsageSnakeCase();
+    if (body.stream && res.headersSent) {
+      // For streaming responses that have already started, send error as a chunk
+      // First send closing think tag if we're in the middle of thinking
+      const closeThinkChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created,
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: '</think>'},
+          logprobs: null,
+          finish_reason: null
+        }],
+        usage
+      };
+      res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);
+      const errorChunk: ChatCompletionChunk = {
+        id: requestId,
+        object: 'chat.completion.chunk',
+        created,
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          delta: {content: errorMessage},
+          logprobs: null,
+          finish_reason: 'stop'
+        }],
+        usage
+      };
+      res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
+      res.end();
+    } else {
+      // For non-streaming or not-yet-started responses, send error as JSON
+      const response: ChatCompletionResponse = {
+        id: requestId,
+        object: 'chat.completion',
+        created,
+        model: body.model,
+        system_fingerprint: 'fp_' + requestId,
+        choices: [{
+          index: 0,
+          message: {
+            role: 'assistant',
+            content: `Error: ${errorMessage}`
+          },
+          logprobs: null,
+          finish_reason: 'stop'
+        }],
+        usage
+      };
+      res.json(response);
+    }
+  }
+}) as RequestHandler);
+export default app;

src/cli.ts ADDED Viewed

	@@ -0,0 +1,48 @@

+#!/usr/bin/env node
+import { Command } from 'commander';
+import { getResponse } from './agent';
+import { version } from '../package.json';
+const program = new Command();
+program
+  .name('deepresearch')
+  .description('AI-powered research assistant that keeps searching until it finds the answer')
+  .version(version)
+  .argument('<query>', 'The research query to investigate')
+  .option('-t, --token-budget <number>', 'Maximum token budget', (val) => {
+    const num = parseInt(val);
+    if (isNaN(num)) throw new Error('Invalid token budget: must be a number');
+    return num;
+  }, 1000000)
+  .option('-m, --max-attempts <number>', 'Maximum bad attempts before giving up', (val) => {
+    const num = parseInt(val);
+    if (isNaN(num)) throw new Error('Invalid max attempts: must be a number');
+    return num;
+  }, 3)
+  .option('-v, --verbose', 'Show detailed progress')
+  .action(async (query: string, options: any) => {
+    try {
+      const { result } = await getResponse(
+        query,
+        parseInt(options.tokenBudget),
+        parseInt(options.maxAttempts)
+      );
+      if (result.action === 'answer') {
+        console.log('\nAnswer:', result.answer);
+        if (result.references?.length) {
+          console.log('\nReferences:');
+          result.references.forEach(ref => {
+            console.log(`- ${ref.url}`);
+            console.log(`  "${ref.exactQuote}"`);
+          });
+        }
+      }
+    } catch (error) {
+      console.error('Error:', error instanceof Error ? error.message : String(error));
+      process.exit(1);
+    }
+  });
+program.parse();

src/config.ts ADDED Viewed

	@@ -0,0 +1,158 @@

+import dotenv from 'dotenv';
+import { ProxyAgent, setGlobalDispatcher } from 'undici';
+import { createGoogleGenerativeAI } from '@ai-sdk/google';
+import { createOpenAI, OpenAIProviderSettings } from '@ai-sdk/openai';
+import configJson from '../config.json';
+// Load environment variables
+dotenv.config();
+// Types
+export type LLMProvider = 'openai' | 'gemini' | 'vertex';
+export type ToolName = keyof typeof configJson.models.gemini.tools;
+// Type definitions for our config structure
+type EnvConfig = typeof configJson.env;
+interface ProviderConfig {
+  createClient: string;
+  clientConfig?: Record<string, any>;
+}
+// Environment setup
+const env: EnvConfig = { ...configJson.env };
+(Object.keys(env) as (keyof EnvConfig)[]).forEach(key => {
+  if (process.env[key]) {
+    env[key] = process.env[key] || env[key];
+  }
+});
+// Setup proxy if present
+if (env.https_proxy) {
+  try {
+    const proxyUrl = new URL(env.https_proxy).toString();
+    const dispatcher = new ProxyAgent({ uri: proxyUrl });
+    setGlobalDispatcher(dispatcher);
+  } catch (error) {
+    console.error('Failed to set proxy:', error);
+  }
+}
+// Export environment variables
+export const OPENAI_BASE_URL = env.OPENAI_BASE_URL;
+export const GEMINI_API_KEY = env.GEMINI_API_KEY;
+export const OPENAI_API_KEY = env.OPENAI_API_KEY;
+export const JINA_API_KEY = env.JINA_API_KEY;
+export const BRAVE_API_KEY = env.BRAVE_API_KEY;
+export const SEARCH_PROVIDER = configJson.defaults.search_provider;
+export const STEP_SLEEP = configJson.defaults.step_sleep;
+// Determine LLM provider
+export const LLM_PROVIDER: LLMProvider = (() => {
+  const provider = process.env.LLM_PROVIDER || configJson.defaults.llm_provider;
+  if (!isValidProvider(provider)) {
+    throw new Error(`Invalid LLM provider: ${provider}`);
+  }
+  return provider;
+})();
+function isValidProvider(provider: string): provider is LLMProvider {
+  return provider === 'openai' || provider === 'gemini' || provider === 'vertex';
+}
+interface ToolConfig {
+  model: string;
+  temperature: number;
+  maxTokens: number;
+}
+interface ToolOverrides {
+  temperature?: number;
+  maxTokens?: number;
+}
+// Get tool configuration
+export function getToolConfig(toolName: ToolName): ToolConfig {
+  const providerConfig = configJson.models[LLM_PROVIDER === 'vertex' ? 'gemini' : LLM_PROVIDER];
+  const defaultConfig = providerConfig.default;
+  const toolOverrides = providerConfig.tools[toolName] as ToolOverrides;
+  return {
+    model: process.env.DEFAULT_MODEL_NAME || defaultConfig.model,
+    temperature: toolOverrides.temperature ?? defaultConfig.temperature,
+    maxTokens: toolOverrides.maxTokens ?? defaultConfig.maxTokens
+  };
+}
+export function getMaxTokens(toolName: ToolName): number {
+  return getToolConfig(toolName).maxTokens;
+}
+// Get model instance
+export function getModel(toolName: ToolName) {
+  const config = getToolConfig(toolName);
+  const providerConfig = (configJson.providers as Record<string, ProviderConfig | undefined>)[LLM_PROVIDER];
+  if (LLM_PROVIDER === 'openai') {
+    if (!OPENAI_API_KEY) {
+      throw new Error('OPENAI_API_KEY not found');
+    }
+    const opt: OpenAIProviderSettings = {
+      apiKey: OPENAI_API_KEY,
+      compatibility: providerConfig?.clientConfig?.compatibility
+    };
+    if (OPENAI_BASE_URL) {
+      opt.baseURL = OPENAI_BASE_URL;
+    }
+    return createOpenAI(opt)(config.model);
+  }
+  if (LLM_PROVIDER === 'vertex') {
+    const createVertex = require('@ai-sdk/google-vertex').createVertex;
+    if (toolName === 'searchGrounding') {
+      return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model, { useSearchGrounding: true });
+    }
+    return createVertex({ project: process.env.GCLOUD_PROJECT, ...providerConfig?.clientConfig })(config.model);
+  }
+  if (!GEMINI_API_KEY) {
+    throw new Error('GEMINI_API_KEY not found');
+  }
+  if (toolName === 'searchGrounding') {
+    return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model, { useSearchGrounding: true });
+  }
+  return createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })(config.model);
+}
+// Validate required environment variables
+if (LLM_PROVIDER === 'gemini' && !GEMINI_API_KEY) throw new Error("GEMINI_API_KEY not found");
+if (LLM_PROVIDER === 'openai' && !OPENAI_API_KEY) throw new Error("OPENAI_API_KEY not found");
+if (!JINA_API_KEY) throw new Error("JINA_API_KEY not found");
+// Log all configurations
+const configSummary = {
+  provider: {
+    name: LLM_PROVIDER,
+    model: LLM_PROVIDER === 'openai'
+      ? configJson.models.openai.default.model
+      : configJson.models.gemini.default.model,
+    ...(LLM_PROVIDER === 'openai' && { baseUrl: OPENAI_BASE_URL })
+  },
+  search: {
+    provider: SEARCH_PROVIDER
+  },
+  tools: Object.fromEntries(
+    Object.keys(configJson.models[LLM_PROVIDER === 'vertex' ? 'gemini' : LLM_PROVIDER].tools).map(name => [
+      name,
+      getToolConfig(name as ToolName)
+    ])
+  ),
+  defaults: {
+    stepSleep: STEP_SLEEP
+  }
+};
+console.log('Configuration Summary:', JSON.stringify(configSummary, null, 2));

src/evals/batch-evals.ts ADDED Viewed

	@@ -0,0 +1,209 @@

+import fs from 'fs/promises';
+import {exec} from 'child_process';
+import {promisify} from 'util';
+import {getResponse} from '../agent';
+import {generateObject} from 'ai';
+import {GEMINI_API_KEY} from '../config';
+import {z} from 'zod';
+import {AnswerAction, TrackerContext} from "../types";
+import {createGoogleGenerativeAI} from "@ai-sdk/google";
+const execAsync = promisify(exec);
+interface Question {
+  question: string;
+  answer: string;
+}
+interface EvaluationResult {
+  pass: boolean;
+  reason: string;
+  total_steps: number;
+  total_tokens: number;
+  question: string;
+  expected_answer: string;
+  actual_answer: string;
+}
+interface EvaluationStats {
+  model_name: string;
+  pass_rate: number;
+  avg_steps: number;
+  max_steps: number;
+  min_steps: number;
+  median_steps: number;
+  avg_tokens: number;
+  median_tokens: number;
+  max_tokens: number;
+  min_tokens: number;
+}
+function calculateMedian(numbers: number[]): number {
+  const sorted = [...numbers].sort((a, b) => a - b);
+  const middle = Math.floor(sorted.length / 2);
+  if (sorted.length % 2 === 0) {
+    return (sorted[middle - 1] + sorted[middle]) / 2;
+  }
+  return sorted[middle];
+}
+function calculateStats(results: EvaluationResult[], modelName: string): EvaluationStats {
+  const steps = results.map(r => r.total_steps);
+  const tokens = results.map(r => r.total_tokens);
+  const passCount = results.filter(r => r.pass).length;
+  return {
+    model_name: modelName,
+    pass_rate: (passCount / results.length) * 100,
+    avg_steps: steps.reduce((a, b) => a + b, 0) / steps.length,
+    max_steps: Math.max(...steps),
+    min_steps: Math.min(...steps),
+    median_steps: calculateMedian(steps),
+    avg_tokens: tokens.reduce((a, b) => a + b, 0) / tokens.length,
+    median_tokens: calculateMedian(tokens),
+    max_tokens: Math.max(...tokens),
+    min_tokens: Math.min(...tokens)
+  };
+}
+function printStats(stats: EvaluationStats): void {
+  console.log('\n=== Evaluation Statistics ===');
+  console.log(`Model: ${stats.model_name}`);
+  console.log(`Pass Rate: ${stats.pass_rate.toFixed(0)}%`);
+  console.log(`Average Steps: ${stats.avg_steps.toFixed(0)}`);
+  console.log(`Maximum Steps: ${stats.max_steps}`);
+  console.log(`Minimum Steps: ${stats.min_steps}`);
+  console.log(`Median Steps: ${stats.median_steps.toFixed(0)}`);
+  console.log(`Average Tokens: ${stats.avg_tokens.toFixed(0)}`);
+  console.log(`Median Tokens: ${stats.median_tokens.toFixed(0)}`);
+  console.log(`Maximum Tokens: ${stats.max_tokens}`);
+  console.log(`Minimum Tokens: ${stats.min_tokens}`);
+  console.log('===========================\n');
+}
+async function getCurrentGitCommit(): Promise<string> {
+  try {
+    const {stdout} = await execAsync('git rev-parse --short HEAD');
+    return stdout.trim();
+  } catch (error) {
+    console.error('Error getting git commit:', error);
+    return 'unknown';
+  }
+}
+async function evaluateAnswer(expectedAnswer: string, actualAnswer: string): Promise<{ pass: boolean; reason: string }> {
+  const prompt = `You are a deterministic evaluator with zero temperature. Compare the following expected answer with the actual answer and determine if they convey the same information.
+Expected answer: ${expectedAnswer}
+Actual answer: ${actualAnswer}
+Minor wording differences are acceptable as long as the core information of the expected answer is preserved in the actual answer.'`;
+  const schema = z.object({
+    pass: z.boolean().describe('Whether the actual answer matches the expected answer'),
+    reason: z.string().describe('Detailed explanation of why the evaluation passed or failed')
+  });
+  try {
+    const result = await generateObject({
+      model: createGoogleGenerativeAI({ apiKey: GEMINI_API_KEY })('gemini-2.0-flash'),  // fix to gemini-2.0-flash for evaluation
+      schema,
+      prompt,
+      maxTokens: 1000,
+      temperature: 0  // Setting temperature to 0 for deterministic output
+    });
+    return result.object;
+  } catch (error) {
+    console.error('Evaluation failed:', error);
+    return {
+      pass: false,
+      reason: `Evaluation error: ${error}`
+    };
+  }
+}
+async function batchEvaluate(inputFile: string): Promise<void> {
+  // Read and parse input file
+  const questions: Question[] = JSON.parse(await fs.readFile(inputFile, 'utf-8'));
+  const results: EvaluationResult[] = [];
+  const gitCommit = await getCurrentGitCommit();
+  const modelName = process.env.DEFAULT_MODEL_NAME || 'unknown';
+  const outputFile = `eval-${gitCommit}-${modelName}.json`;
+  // Process each question
+  for (let i = 0; i < questions.length; i++) {
+    const {question, answer: expectedAnswer} = questions[i];
+    console.log(`\nProcessing question ${i + 1}/${questions.length}: ${question}`);
+    try {
+      // Get response using the agent
+      const {
+        result: response,
+        context
+      } = await getResponse(question) as { result: AnswerAction; context: TrackerContext };
+      // Get response using the streaming agent
+      // const {
+      //   result: response,
+      //   context
+      // } = await getResponseStreamingAgent(question) as { result: AnswerAction; context: TrackerContext };
+      const actualAnswer = response.answer;
+      // Evaluate the response
+      const evaluation = await evaluateAnswer(expectedAnswer, actualAnswer);
+      // Record results
+      results.push({
+        pass: evaluation.pass,
+        reason: evaluation.reason,
+        total_steps: context.actionTracker.getState().totalStep,
+        total_tokens: context.tokenTracker.getTotalUsage().totalTokens,
+        question,
+        expected_answer: expectedAnswer,
+        actual_answer: actualAnswer
+      });
+      console.log(`Evaluation: ${evaluation.pass ? 'PASS' : 'FAIL'}`);
+      console.log(`Reason: ${evaluation.reason}`);
+    } catch (error) {
+      console.error(`Error processing question: ${question}`, error);
+      results.push({
+        pass: false,
+        reason: `Error: ${error}`,
+        total_steps: 0,
+        total_tokens: 0,
+        question,
+        expected_answer: expectedAnswer,
+        actual_answer: 'Error occurred'
+      });
+    }
+  }
+  // Calculate and print statistics
+  const stats = calculateStats(results, modelName);
+  printStats(stats);
+  // Save results
+  await fs.writeFile(outputFile, JSON.stringify({
+    results,
+    statistics: stats
+  }, null, 2));
+  console.log(`\nEvaluation results saved to ${outputFile}`);
+}
+// Run batch evaluation if this is the main module
+if (require.main === module) {
+  const inputFile = process.argv[2];
+  if (!inputFile) {
+    console.error('Please provide an input file path');
+    process.exit(1);
+  }
+  batchEvaluate(inputFile).catch(console.error);
+}
+export {batchEvaluate};

src/evals/ego-questions.json ADDED Viewed

	@@ -0,0 +1,82 @@

+[
+  {
+    "question": "what did jina ai ceo say about deepseek that went viral and become a meme?",
+    "answer": "a side project"
+  },
+  {
+    "question": "when was jina ai founded, month and year?",
+    "answer": "feb 2020"
+  },
+  {
+    "question": "what is the latest model published by jina ai?",
+    "answer": "ReaderLM-2.0"
+  },
+  {
+    "question": "what is the latest blog post that jina ai published?",
+    "answer": "A Practical Guide to Deploying Search Foundation Models in Production"
+  },
+  {
+    "question": "what is the context length of readerlm-v2?",
+    "answer": "512K"
+  },
+  {
+    "question": "how many employees does jina ai have right now?",
+    "answer": "30"
+  },
+  {
+    "question": "when was jina reader api released?",
+    "answer": "April 2024"
+  },
+  {
+    "question": "How many offices do Jina AI have and where are they?",
+    "answer": "four: sunnyvale, berlin, beijing, shenzhen"
+  },
+  {
+    "question": "what exactly jina-colbert-v2 improves over jina-colbert-v1?",
+    "answer": "v2 add multilingual support"
+  },
+  {
+    "question": "who are the authors of jina-clip-v2 paper?",
+    "answer": "Andreas Koukounas, Georgios Mastrapas, Bo Wang, Mohammad Kalim Akram, Sedigheh Eslami, Michael Günther, Isabelle Mohr, Saba Sturua, Scott Martens, Nan Wang, Han Xiao"
+  },
+  {
+    "question": "who created the node-deepresearch project?",
+    "answer": "Han Xiao / jina ai"
+  },
+  {
+    "question": "Which countries are the investors of Jina AI from?",
+    "answer": "USA and China only, no German investors"
+  },
+  {
+    "question": "what is the grounding api endpoint of jina ai?",
+    "answer": "g.jina.ai"
+  },
+  {
+    "question": "which of the following models do not support Matryoshka representation? jina-embeddings-v3, jina-embeddings-v2-base-en, jina-clip-v2, jina-clip-v1",
+    "answer": "jina-embeddings-v2-base-en and jina-clip-v1"
+  },
+  {
+    "question": "Can I purchase the 2024 yearbook that jina ai published today?",
+    "answer": "No it is sold out."
+  },
+  {
+    "question": "How many free tokens do you get from a new jina api key?",
+    "answer": "1 million."
+  },
+  {
+    "question": "Who is the legal signatory of Jina AI gmbh?",
+    "answer": "Jiao Liu"
+  },
+  {
+    "question": "what is the key idea behind node-deepresearch project?",
+    "answer": "It keeps searching, reading webpages, reasoning until an answer is found."
+  },
+  {
+    "question": "what is the name of the jina ai's mascot?",
+    "answer": "No, Jina AI does not have a mascot."
+  },
+  {
+    "question": "Does late chunking work with cls pooling?",
+    "answer": "No. late chunking only works with mean pooling."
+  }
+]

src/server.ts ADDED Viewed

	@@ -0,0 +1,15 @@

+import app from "./app";
+const port = process.env.PORT || 3000;
+// Export server startup function for better testing
+export function startServer() {
+  return app.listen(port, () => {
+    console.log(`Server running at http://localhost:${port}`);
+  });
+}
+// Start server if running directly
+if (process.env.NODE_ENV !== 'test') {
+  startServer();
+}

src/tools/__tests__/error-analyzer.test.ts ADDED Viewed

	@@ -0,0 +1,31 @@

+import { analyzeSteps } from '../error-analyzer';
+import { LLMProvider } from '../../config';
+describe('analyzeSteps', () => {
+  const providers: Array<LLMProvider> = ['openai', 'gemini'];
+  const originalEnv = process.env;
+  beforeEach(() => {
+    jest.resetModules();
+    process.env = { ...originalEnv };
+  });
+  afterEach(() => {
+    process.env = originalEnv;
+  });
+  providers.forEach(provider => {
+    describe(`with ${provider} provider`, () => {
+      beforeEach(() => {
+        process.env.LLM_PROVIDER = provider;
+      });
+      it('should analyze error steps', async () => {
+        const { response } = await analyzeSteps(['Step 1: Search failed', 'Step 2: Invalid query']);
+        expect(response).toHaveProperty('recap');
+        expect(response).toHaveProperty('blame');
+        expect(response).toHaveProperty('improvement');
+      }, 30000);
+    });
+  });
+});

src/tools/__tests__/evaluator.test.ts ADDED Viewed

	@@ -0,0 +1,62 @@

+import { evaluateAnswer } from '../evaluator';
+import { TokenTracker } from '../../utils/token-tracker';
+import { LLMProvider } from '../../config';
+describe('evaluateAnswer', () => {
+  const providers: Array<LLMProvider> = ['openai', 'gemini'];
+  const originalEnv = process.env;
+  beforeEach(() => {
+    jest.resetModules();
+    process.env = { ...originalEnv };
+  });
+  afterEach(() => {
+    process.env = originalEnv;
+  });
+  providers.forEach(provider => {
+    describe(`with ${provider} provider`, () => {
+      beforeEach(() => {
+        process.env.LLM_PROVIDER = provider;
+      });
+      it('should evaluate answer definitiveness', async () => {
+        const tokenTracker = new TokenTracker();
+        const { response } = await evaluateAnswer(
+          'What is TypeScript?',
+          {
+            action: "answer",
+            think: "Providing a clear definition of TypeScript",
+            answer: "TypeScript is a strongly typed programming language that builds on JavaScript.",
+            references: []
+          },
+          ['definitive'],
+          tokenTracker
+        );
+        expect(response).toHaveProperty('pass');
+        expect(response).toHaveProperty('think');
+        expect(response.type).toBe('definitive');
+      });
+      it('should evaluate answer plurality', async () => {
+        const tokenTracker = new TokenTracker();
+        const { response } = await evaluateAnswer(
+          'List three programming languages.',
+          {
+            action: "answer",
+            think: "Providing an example of a programming language",
+            answer: "Python is a programming language.",
+            references: []
+          },
+          ['plurality'],
+          tokenTracker
+        );
+        expect(response).toHaveProperty('pass');
+        expect(response).toHaveProperty('think');
+        expect(response.type).toBe('plurality');
+        expect(response.plurality_analysis?.expects_multiple).toBe(true);
+      });
+    });
+  });
+});

src/tools/__tests__/read.test.ts ADDED Viewed

	@@ -0,0 +1,21 @@

+import { readUrl } from '../read';
+import { TokenTracker } from '../../utils/token-tracker';
+describe('readUrl', () => {
+  it.skip('should read and parse URL content (skipped due to insufficient balance)', async () => {
+    const tokenTracker = new TokenTracker();
+    const { response } = await readUrl('https://www.typescriptlang.org', tokenTracker);
+    expect(response).toHaveProperty('code');
+    expect(response).toHaveProperty('status');
+    expect(response.data).toHaveProperty('content');
+    expect(response.data).toHaveProperty('title');
+  }, 15000);
+  it.skip('should handle invalid URLs (skipped due to insufficient balance)', async () => {
+    await expect(readUrl('invalid-url')).rejects.toThrow();
+  }, 15000);
+  beforeEach(() => {
+    jest.setTimeout(15000);
+  });
+});

src/tools/__tests__/search.test.ts ADDED Viewed

	@@ -0,0 +1,24 @@

+import { search } from '../jina-search';
+import { TokenTracker } from '../../utils/token-tracker';
+describe('search', () => {
+  it.skip('should perform search with Jina API (skipped due to insufficient balance)', async () => {
+    const tokenTracker = new TokenTracker();
+    const { response } = await search('TypeScript programming', tokenTracker);
+    expect(response).toBeDefined();
+    expect(response.data).toBeDefined();
+    if (response.data === null) {
+      throw new Error('Response data is null');
+    }
+    expect(Array.isArray(response.data)).toBe(true);
+    expect(response.data.length).toBeGreaterThan(0);
+  }, 15000);
+  it('should handle empty query', async () => {
+    await expect(search('')).rejects.toThrow();
+  }, 15000);
+  beforeEach(() => {
+    jest.setTimeout(15000);
+  });
+});

src/tools/brave-search.ts ADDED Viewed

	@@ -0,0 +1,22 @@

+import axios from 'axios';
+import {BRAVE_API_KEY} from "../config";
+import { BraveSearchResponse } from '../types';
+export async function braveSearch(query: string): Promise<{ response: BraveSearchResponse }> {
+  const response = await axios.get<BraveSearchResponse>('https://api.search.brave.com/res/v1/web/search', {
+    params: {
+      q: query,
+      count: 10,
+      safesearch: 'off'
+    },
+    headers: {
+      'Accept': 'application/json',
+      'X-Subscription-Token': BRAVE_API_KEY
+    },
+    timeout: 10000
+  });
+  // Maintain the same return structure as the original code
+  return { response: response.data };
+}

src/tools/dedup.ts ADDED Viewed

	@@ -0,0 +1,89 @@

+import {z} from 'zod';
+import {TokenTracker} from "../utils/token-tracker";
+import {ObjectGeneratorSafe} from "../utils/safe-generator";
+const responseSchema = z.object({
+  think: z.string().describe('Strategic reasoning about the overall deduplication approach'),
+  unique_queries: z.array(z.string().describe('Unique query that passed the deduplication process, must be less than 30 characters'))
+    .describe('Array of semantically unique queries').max(3)
+});
+function getPrompt(newQueries: string[], existingQueries: string[]): string {
+  return `You are an expert in semantic similarity analysis. Given a set of queries (setA) and a set of queries (setB)
+<rules>
+Function FilterSetA(setA, setB, threshold):
+    filteredA = empty set
+    for each candidateQuery in setA:
+        isValid = true
+        // Check similarity with already accepted queries in filteredA
+        for each acceptedQuery in filteredA:
+            similarity = calculateSimilarity(candidateQuery, acceptedQuery)
+            if similarity >= threshold:
+                isValid = false
+                break
+        // If passed first check, compare with set B
+        if isValid:
+            for each queryB in setB:
+                similarity = calculateSimilarity(candidateQuery, queryB)
+                if similarity >= threshold:
+                    isValid = false
+                    break
+        // If passed all checks, add to filtered set
+        if isValid:
+            add candidateQuery to filteredA
+    return filteredA
+</rules>
+<similarity-definition>
+1. Consider semantic meaning and query intent, not just lexical similarity
+2. Account for different phrasings of the same information need
+3. Queries with same base keywords but different operators are NOT duplicates
+4. Different aspects or perspectives of the same topic are not duplicates
+5. Consider query specificity - a more specific query is not a duplicate of a general one
+6. Search operators that make queries behave differently:
+   - Different site: filters (e.g., site:youtube.com vs site:github.com)
+   - Different file types (e.g., filetype:pdf vs filetype:doc)
+   - Different language/location filters (e.g., lang:en vs lang:es)
+   - Different exact match phrases (e.g., "exact phrase" vs no quotes)
+   - Different inclusion/exclusion (+/- operators)
+   - Different title/body filters (intitle: vs inbody:)
+</similarity-definition>
+Now with threshold set to 0.2; run FilterSetA on the following:
+SetA: ${JSON.stringify(newQueries)}
+SetB: ${JSON.stringify(existingQueries)}`;
+}
+const TOOL_NAME = 'dedup';
+export async function dedupQueries(
+  newQueries: string[],
+  existingQueries: string[],
+  tracker?: TokenTracker
+): Promise<{ unique_queries: string[] }> {
+  try {
+    const generator = new ObjectGeneratorSafe(tracker);
+    const prompt = getPrompt(newQueries, existingQueries);
+    const result = await generator.generateObject({
+      model: TOOL_NAME,
+      schema: responseSchema,
+      prompt,
+    });
+    console.log(TOOL_NAME, result.object.unique_queries);
+    return {unique_queries: result.object.unique_queries};
+  } catch (error) {
+    console.error(`Error in ${TOOL_NAME}`, error);
+    throw error;
+  }
+}

src/tools/error-analyzer.ts ADDED Viewed

	@@ -0,0 +1,134 @@

+import {z} from 'zod';
+import {TokenTracker} from "../utils/token-tracker";
+import {ErrorAnalysisResponse} from '../types';
+import {ObjectGeneratorSafe} from "../utils/safe-generator";
+const responseSchema = z.object({
+  recap: z.string().describe('Recap of the actions taken and the steps conducted'),
+  blame: z.string().describe('Which action or the step was the root cause of the answer rejection'),
+  improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.'),
+  questionsToAnswer: z.array(
+    z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
+  ).max(2)
+    .describe("List of most important reflect questions to fill the knowledge gaps"),
+});
+function getPrompt(diaryContext: string[]): string {
+  return `You are an expert at analyzing search and reasoning processes. Your task is to analyze the given sequence of steps and identify what went wrong in the search process.
+<rules>
+1. The sequence of actions taken
+2. The effectiveness of each step
+3. The logic between consecutive steps
+4. Alternative approaches that could have been taken
+5. Signs of getting stuck in repetitive patterns
+6. Whether the final answer matches the accumulated information
+Analyze the steps and provide detailed feedback following these guidelines:
+- In the recap: Summarize key actions chronologically, highlight patterns, and identify where the process started to go wrong
+- In the blame: Point to specific steps or patterns that led to the inadequate answer
+- In the improvement: Provide actionable suggestions that could have led to a better outcome
+Generate a JSON response following JSON schema.
+</rules>
+<example>
+<input>
+<steps>
+At step 1, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: "jina ai ceo age".
+You found quite some information and add them to your URL list and **visit** them later when needed.
+At step 2, you took the **visit** action and deep dive into the following URLs:
+https://www.linkedin.com/in/hxiao87
+https://www.crunchbase.com/person/han-xiao
+You found some useful information on the web and add them to your knowledge for future reference.
+At step 3, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: "Han Xiao birthdate, Jina AI founder birthdate".
+You found quite some information and add them to your URL list and **visit** them later when needed.
+At step 4, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: han xiao birthday.
+But then you realized you have already searched for these keywords before.
+You decided to think out of the box or cut from a completely different angle.
+At step 5, you took the **search** action and look for external information for the question: "how old is jina ai ceo?".
+In particular, you tried to search for the following keywords: han xiao birthday.
+But then you realized you have already searched for these keywords before.
+You decided to think out of the box or cut from a completely different angle.
+At step 6, you took the **visit** action and deep dive into the following URLs:
+https://kpopwall.com/han-xiao/
+https://www.idolbirthdays.net/han-xiao
+You found some useful information on the web and add them to your knowledge for future reference.
+At step 7, you took **answer** action but evaluator thinks it is not a good answer:
+</steps>
+Original question:
+how old is jina ai ceo?
+Your answer:
+The age of the Jina AI CEO cannot be definitively determined from the provided information.
+The evaluator thinks your answer is bad because:
+The answer is not definitive and fails to provide the requested information.  Lack of information is unacceptable, more search and deep reasoning is needed.
+</input>
+<output>
+{
+  "recap": "The search process consisted of 7 steps with multiple search and visit actions. The initial searches focused on basic biographical information through LinkedIn and Crunchbase (steps 1-2). When this didn't yield the specific age information, additional searches were conducted for birthdate information (steps 3-5). The process showed signs of repetition in steps 4-5 with identical searches. Final visits to entertainment websites (step 6) suggested a loss of focus on reliable business sources.",
+  "blame": "The root cause of failure was getting stuck in a repetitive search pattern without adapting the strategy. Steps 4-5 repeated the same search, and step 6 deviated to less reliable entertainment sources instead of exploring business journals, news articles, or professional databases. Additionally, the process didn't attempt to triangulate age through indirect information like education history or career milestones.",
+  "improvement": "1. Avoid repeating identical searches and implement a strategy to track previously searched terms. 2. When direct age/birthdate searches fail, try indirect approaches like: searching for earliest career mentions, finding university graduation years, or identifying first company founding dates. 3. Focus on high-quality business sources and avoid entertainment websites for professional information. 4. Consider using industry event appearances or conference presentations where age-related context might be mentioned. 5. If exact age cannot be determined, provide an estimated range based on career timeline and professional achievements.",
+  "questionsToAnswer": [
+    "What alternative professional databases or news archives could provide reliable biographical information?",
+    "How can we use education history or career milestones to estimate age range?"
+  ]
+}
+</output>
+</example>
+Review the steps below carefully and generate your analysis following this format.
+${diaryContext.join('\n')}
+`;
+}
+const TOOL_NAME = 'errorAnalyzer';
+export async function analyzeSteps(
+  diaryContext: string[],
+  tracker?: TokenTracker
+): Promise<{ response: ErrorAnalysisResponse }> {
+  try {
+    const generator = new ObjectGeneratorSafe(tracker);
+    const prompt = getPrompt(diaryContext);
+    const result = await generator.generateObject({
+      model: TOOL_NAME,
+      schema: responseSchema,
+      prompt,
+    });
+    console.log(TOOL_NAME, result.object);
+    return { response: result.object };
+  } catch (error) {
+    console.error(`Error in ${TOOL_NAME}`, error);
+    throw error;
+  }
+}

src/tools/evaluator.ts ADDED Viewed

	@@ -0,0 +1,553 @@

+import {z} from 'zod';
+import {GenerateObjectResult} from 'ai';
+import {TokenTracker} from "../utils/token-tracker";
+import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
+import {readUrl, removeAllLineBreaks} from "./read";
+import {ObjectGeneratorSafe} from "../utils/safe-generator";
+const baseSchema = {
+  pass: z.boolean().describe('Whether the answer passes the evaluation criteria defined by the evaluator'),
+  think: z.string().describe('Explanation the thought process why the answer does not pass the evaluation criteria')
+};
+const definitiveSchema = z.object({
+  ...baseSchema,
+  type: z.literal('definitive')
+});
+const freshnessSchema = z.object({
+  ...baseSchema,
+  type: z.literal('freshness'),
+  freshness_analysis: z.object({
+    likely_outdated: z.boolean().describe('Whether the answer content is likely outdated based on dates and current time'),
+    dates_mentioned: z.array(z.string()).describe('All dates mentioned in the answer'),
+    current_time: z.string().describe('Current system time when evaluation was performed'),
+    max_age_days: z.number().optional().describe('Maximum allowed age in days before content is considered outdated')
+  })
+});
+const pluralitySchema = z.object({
+  ...baseSchema,
+  type: z.literal('plurality'),
+  plurality_analysis: z.object({
+    expects_multiple: z.boolean().describe('Whether the question asks for multiple items'),
+    provides_multiple: z.boolean().describe('Whether the answer provides multiple items'),
+    count_expected: z.number().optional().describe('Number of items expected if specified in question'),
+    count_provided: z.number().describe('Number of items provided in answer')
+  })
+});
+const attributionSchema = z.object({
+  ...baseSchema,
+  type: z.literal('attribution'),
+  attribution_analysis: z.object({
+    sources_provided: z.boolean().describe('Whether the answer provides source references'),
+    sources_verified: z.boolean().describe('Whether the provided sources contain the claimed information'),
+    quotes_accurate: z.boolean().describe('Whether the quotes accurately represent the source content')
+  })
+});
+function getAttributionPrompt(question: string, answer: string, sourceContent: string): string {
+  return `You are an evaluator that verifies if answer content is properly attributed to and supported by the provided sources.
+<rules>
+1. Source Verification:
+   - Check if answer claims are supported by the provided source content
+   - Verify that quotes are accurate and in proper context
+   - Ensure numerical data and statistics match the source
+   - Flag any claims that go beyond what the sources support
+2. Attribution Analysis:
+   - Check if answer properly references its sources
+   - Verify that important claims have clear source attribution
+   - Ensure quotes are properly marked and cited
+   - Check for any unsupported generalizations
+3. Accuracy Requirements:
+   - Direct quotes must match source exactly
+   - Paraphrasing must maintain original meaning
+   - Statistics and numbers must be precise
+   - Context must be preserved
+</rules>
+<examples>
+Question: "What are Jina AI's main products?"
+Answer: "According to Jina AI's website, their main products are DocArray and Jina Framework."
+Source Content: "Jina AI's flagship products include DocArray, Jina Framework, and JCloud, offering a complete ecosystem for neural search applications."
+Evaluation: {
+  "pass": false,
+  "think": "The answer omits JCloud which is mentioned as a main product in the source. The information provided is incomplete and potentially misleading as it fails to mention a significant product from the company's ecosystem.",
+  "attribution_analysis": {
+    "sources_provided": true,
+    "sources_verified": false,
+    "quotes_accurate": false
+  }
+}
+Question: "When was Python first released?"
+Answer: "Python was first released in 1991 by Guido van Rossum."
+Source Content: "Python was first released in 1991 by Guido van Rossum while working at CWI."
+Evaluation: {
+  "pass": true,
+  "think": "The answer accurately reflects the core information from the source about Python's release date and creator, though it omits the additional context about CWI which isn't essential to the question.",
+  "attribution_analysis": {
+    "sources_provided": true,
+    "sources_verified": true,
+    "quotes_accurate": true
+  }
+}
+</examples>
+Now evaluate this pair:
+Question: ${JSON.stringify(question)}
+Answer: ${JSON.stringify(answer)}
+Source Content: ${JSON.stringify(sourceContent)}`;
+}
+function getDefinitivePrompt(question: string, answer: string): string {
+  return `You are an evaluator of answer definitiveness. Analyze if the given answer provides a definitive response or not.
+<rules>
+First, if the answer is not a direct response to the question, it must return false.
+Definitiveness is the king! The following types of responses are NOT definitive and must return false:
+  1. Expressions of uncertainty: "I don't know", "not sure", "might be", "probably"
+  2. Lack of information statements: "doesn't exist", "lack of information", "could not find"
+  3. Inability statements: "I cannot provide", "I am unable to", "we cannot"
+  4. Negative statements that redirect: "However, you can...", "Instead, try..."
+  5. Non-answers that suggest alternatives
+</rules>
+<examples>
+Question: "What are the system requirements for running Python 3.9?"
+Answer: "I'm not entirely sure, but I think you need a computer with some RAM."
+Evaluation: {
+  "pass": false,
+  "think": "The answer contains uncertainty markers like 'not entirely sure' and 'I think', making it non-definitive."
+}
+Question: "What are the system requirements for running Python 3.9?"
+Answer: "Python 3.9 requires Windows 7 or later, macOS 10.11 or later, or Linux."
+Evaluation: {
+  "pass": true,
+  "think": "The answer makes clear, definitive statements without uncertainty markers or ambiguity."
+}
+Question: "Who will be the president of the United States in 2032?"
+Answer: "I cannot predict the future, it depends on the election results."
+Evaluation: {
+  "pass": false,
+  "think": "The answer contains a statement of inability to predict the future, making it non-definitive."
+}
+Question: "Who is the sales director at Company X?"
+Answer: "I cannot provide the name of the sales director, but you can contact their sales team at [email protected]"
+Evaluation: {
+  "pass": false,
+  "think": "The answer starts with 'I cannot provide' and redirects to an alternative contact method instead of answering the original question."
+}
+Question: "what is the twitter account of jina ai's founder?"
+Answer: "The provided text does not contain the Twitter account of Jina AI's founder."
+Evaluation: {
+  "pass": false,
+  "think": "The answer indicates a lack of information rather than providing a definitive response."
+}
+</examples>
+Now evaluate this pair:
+Question: ${JSON.stringify(question)}
+Answer: ${JSON.stringify(answer)}`;
+}
+function getFreshnessPrompt(question: string, answer: string, currentTime: string): string {
+  return `You are an evaluator that analyzes if answer content is likely outdated based on mentioned dates and current time.
+<rules>
+1. Date Analysis:
+   - Extract all dates mentioned in the answer
+   - Compare against current system time: ${currentTime}
+   - Consider content outdated if:
+     * It refers to a "latest" or "current" state from more than 30 days ago
+     * It mentions specific dates/events that have been superseded
+     * It contains time-sensitive information (e.g., "current CEO", "latest version") from more than 60 days ago
+   - For product versions, releases, or announcements, max age is 30 days
+   - For company positions, leadership, or general facts, max age is 60 days
+2. Context Hints:
+   - Words indicating recency: "latest", "current", "newest", "just released", "recently"
+   - Time-sensitive terms: "CEO", "price", "version", "release"
+   - Future dates should be ignored in outdated calculation
+</rules>
+<examples>
+Question: "What is Jina AI's latest embedding model?"
+Answer: "The latest embedding model from Jina AI is jina-embeddings-v2, released on March 15, 2024."
+Current Time: "2024-10-06T00:00:00Z"
+Evaluation: {
+  "pass": false,
+  "think": "The answer refers to a 'latest' model release from over 6 months ago, which is likely outdated for product version information",
+  "freshness_analysis": {
+    "likely_outdated": true,
+    "dates_mentioned": ["2024-03-15"],
+    "current_time": "2024-10-06T00:00:00Z",
+    "max_age_days": 30
+  }
+}
+Question: "Who is OpenAI's CEO?"
+Answer: "Sam Altman is the CEO of OpenAI as of December 2023."
+Current Time: "2024-02-06T00:00:00Z"
+Evaluation: {
+  "pass": true,
+  "think": "The answer is about company leadership and is within the 60-day threshold for such information",
+  "freshness_analysis": {
+    "likely_outdated": false,
+    "dates_mentioned": ["2023-12"],
+    "current_time": "2024-02-06T00:00:00Z",
+    "max_age_days": 60
+  }
+}
+</examples>
+Now evaluate this pair:
+Question: ${JSON.stringify(question)}
+Answer: ${JSON.stringify(answer)}`;
+}
+function getPluralityPrompt(question: string, answer: string): string {
+  return `You are an evaluator that analyzes if answers provide the appropriate number of items requested in the question.
+<rules>
+1. Question Analysis:
+   - Check if question asks for multiple items using indicators like:
+     * Plural nouns: "companies", "people", "names"
+     * Quantifiers: "all", "many", "several", "various", "multiple"
+     * List requests: "list", "enumerate", "name all", "give me all"
+     * Numbers: "5 examples", "top 10"
+   - Otherwise skip the analysis and return pass to true
+2. Answer Analysis:
+   - Count distinct items provided in the answer
+   - Check if answer uses limiting words like "only", "just", "single"
+   - Identify if answer acknowledges there are more items but only provides some
+3. Definitiveness Rules:
+   - If question asks for multiple items but answer provides only one → NOT definitive
+   - If question asks for specific number (e.g., "top 5") but answer provides fewer → NOT definitive
+   - If answer clearly states it's providing a partial list → NOT definitive
+   - If question asks for "all" or "every" but answer seems incomplete → NOT definitive
+</rules>
+<examples>
+Question: "Who works in Jina AI's sales team?"
+Answer: "John Smith is a sales representative at Jina AI."
+Evaluation: {
+  "pass": true,
+  "think": "The question doesn't specifically ask for multiple team members, so a single name can be considered a definitive answer.",
+  "plurality_analysis": {
+    "expects_multiple": false,
+    "provides_multiple": false,
+    "count_provided": 1
+  }
+}
+Question: "List all the salespeople who work at Jina AI"
+Answer: "John Smith is a sales representative at Jina AI."
+Evaluation: {
+  "pass": false,
+  "think": "The question asks for 'all salespeople' but the answer only provides one name without indicating if this is the complete list.",
+  "plurality_analysis": {
+    "expects_multiple": true,
+    "provides_multiple": false,
+    "count_provided": 1
+  }
+}
+Question: "Name the top 3 products sold by Jina AI"
+Answer: "Jina AI's product lineup includes DocArray and Jina."
+Evaluation: {
+  "pass": false,
+  "think": "The question asks for top 3 products but only 2 are provided.",
+  "plurality_analysis": {
+    "expects_multiple": true,
+    "provides_multiple": true,
+    "count_expected": 3,
+    "count_provided": 2
+  }
+}
+Question: "List as many AI companies in Berlin as you can find"
+Answer: "Here are several AI companies in Berlin: Ada Health, Merantix, DeepL, Understand.ai, and Zeitgold. There are many more AI companies in Berlin, but these are some notable examples."
+Evaluation: {
+  "pass": false,
+  "think": "While the answer provides multiple companies, it explicitly states it's an incomplete list when the question asks to list as many as possible.",
+  "plurality_analysis": {
+    "expects_multiple": true,
+    "provides_multiple": true,
+    "count_provided": 5
+  }
+}
+</examples>
+Now evaluate this pair:
+Question: ${JSON.stringify(question)}
+Answer: ${JSON.stringify(answer)}`;
+}
+const questionEvaluationSchema = z.object({
+  needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
+  needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
+  reasoning: z.string().describe('Explanation of why these checks are needed or not needed'),
+  languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question'),
+});
+function getQuestionEvaluationPrompt(question: string): string {
+  return `You are an evaluator that determines if a question requires freshness and/or plurality checks in addition to the required definitiveness check.
+<evaluation_types>
+1. freshness - Checks if the question is time-sensitive or requires very recent information
+2. plurality - Checks if the question asks for multiple items or a specific count or enumeration
+3. language style - Identifies both the language used and the overall vibe of the question
+</evaluation_types>
+<rules>
+If question is a simple greeting, chit-chat, or general knowledge, provide the answer directly.
+1. Freshness Evaluation:
+   - Required for questions about current state, recent events, or time-sensitive information
+   - Required for: prices, versions, leadership positions, status updates
+   - Look for terms: "current", "latest", "recent", "now", "today", "new"
+   - Consider company positions, product versions, market data time-sensitive
+2. Plurality Evaluation:
+   - Required when question asks for multiple items or specific counts
+   - Check for: numbers ("5 examples"), plural nouns, list requests
+   - Look for: "all", "list", "enumerate", "examples", plural forms
+   - Required when question implies completeness ("all the reasons", "every factor")
+3. Language Style Analysis:
+  Combine both language and emotional vibe in a descriptive phrase, considering:
+  - Language: The primary language or mix of languages used
+  - Emotional tone: panic, excitement, frustration, curiosity, etc.
+  - Formality level: academic, casual, professional, etc.
+  - Domain context: technical, academic, social, etc.
+</rules>
+<examples>
+Question: "fam PLEASE help me calculate the eigenvalues of this 4x4 matrix ASAP!! [matrix details] got an exam tmrw 😭"
+Evaluation: {
+    "needsFreshness": false,
+    "needsPlurality": true,
+    "reasoning": "Multiple eigenvalues needed but no time-sensitive information required",
+    "languageStyle": "panicked student English with math jargon"
+}
+Question: "Can someone explain how tf did Ferrari mess up their pit stop strategy AGAIN?! 🤦‍♂️ #MonacoGP"
+Evaluation: {
+    "needsFreshness": true,
+    "needsPlurality": true,
+    "reasoning": "Refers to recent race event and requires analysis of multiple strategic decisions",
+    "languageStyle": "frustrated fan English with F1 terminology"
+}
+Question: "肖老师您好，请您介绍一下最近��子计算领域的三个重大突破，特别是它们在密码学领域的应用价值吗？🤔"
+Evaluation: {
+    "needsFreshness": true,
+    "needsPlurality": true,
+    "reasoning": "Asks for recent breakthroughs (freshness) and specifically requests three examples (plurality)",
+    "languageStyle": "formal technical Chinese with academic undertones"
+}
+Question: "Bruder krass, kannst du mir erklären warum meine neural network training loss komplett durchdreht? Hab schon alles probiert 😤"
+Evaluation: {
+    "needsFreshness": false,
+    "needsPlurality": true,
+    "reasoning": "Requires comprehensive debugging analysis of multiple potential issues",
+    "languageStyle": "frustrated German-English tech slang"
+}
+Question: "Does anyone have insights into the sociopolitical implications of GPT-4's emergence in the Global South, particularly regarding indigenous knowledge systems and linguistic diversity? Looking for a nuanced analysis."
+Evaluation: {
+    "needsFreshness": true,
+    "needsPlurality": true,
+    "reasoning": "Requires analysis of current impacts (freshness) across multiple dimensions: sociopolitical, cultural, and linguistic (plurality)",
+    "languageStyle": "formal academic English with sociological terminology"
+}
+</examples>
+Now evaluate this question:
+Question: ${JSON.stringify(question)}`;
+}
+const TOOL_NAME = 'evaluator';
+export async function evaluateQuestion(
+  question: string,
+  tracker?: TokenTracker
+): Promise<EvaluationCriteria> {
+  try {
+    const generator = new ObjectGeneratorSafe(tracker);
+    const result = await generator.generateObject({
+      model: TOOL_NAME,
+      schema: questionEvaluationSchema,
+      prompt: getQuestionEvaluationPrompt(question),
+    });
+    console.log('Question Evaluation:', result.object);
+    // Always include definitive in types
+    const types: EvaluationType[] = ['definitive'];
+    if (result.object.needsFreshness) types.push('freshness');
+    if (result.object.needsPlurality) types.push('plurality');
+    console.log('Question Metrics:', types);
+    // Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
+    return {types, languageStyle: result.object.languageStyle};
+  } catch (error) {
+    console.error('Error in question evaluation:', error);
+    // Default to all evaluation types in case of error
+    return {types: ['definitive', 'freshness', 'plurality'], languageStyle: 'plain English'};
+  }
+}
+async function performEvaluation<T>(
+  evaluationType: EvaluationType,
+  params: {
+    schema: z.ZodType<T>;
+    prompt: string;
+  },
+  tracker?: TokenTracker
+): Promise<GenerateObjectResult<T>> {
+  const generator = new ObjectGeneratorSafe(tracker);
+  const result = await generator.generateObject({
+    model: TOOL_NAME,
+    schema: params.schema,
+    prompt: params.prompt,
+  });
+  console.log(`${evaluationType} ${TOOL_NAME}`, result.object);
+  return result as GenerateObjectResult<any>;
+}
+// Main evaluation function
+export async function evaluateAnswer(
+  question: string,
+  action: AnswerAction,
+  evaluationCri: EvaluationCriteria,
+  tracker?: TokenTracker
+): Promise<{ response: EvaluationResponse }> {
+  let result;
+  // Only add attribution if we have valid references
+  if (action.references && action.references.length > 0) {
+    evaluationCri.types = ['attribution', ...evaluationCri.types];
+  }
+  for (const evaluationType of evaluationCri.types) {
+    switch (evaluationType) {
+      case 'attribution': {
+        // Safely handle references and ensure we have content
+        const urls = action.references?.map(ref => ref.url) ?? [];
+        const uniqueURLs = [...new Set(urls)];
+        const allKnowledge = await fetchSourceContent(uniqueURLs, tracker);
+        if (!allKnowledge.trim()) {
+          return {
+            response: {
+              pass: false,
+              think: "The answer does not provide any valid attribution references that could be verified. No accessible source content was found to validate the claims made in the answer.",
+              type: 'attribution',
+            }
+          };
+        }
+        result = await performEvaluation(
+          'attribution',
+          {
+            schema: attributionSchema,
+            prompt: getAttributionPrompt(question, action.answer, allKnowledge),
+          },
+          tracker
+        );
+        break;
+      }
+      case 'definitive':
+        result = await performEvaluation(
+          'definitive',
+          {
+            schema: definitiveSchema,
+            prompt: getDefinitivePrompt(question, action.answer),
+          },
+          tracker
+        );
+        break;
+      case 'freshness':
+        result = await performEvaluation(
+          'freshness',
+          {
+            schema: freshnessSchema,
+            prompt: getFreshnessPrompt(question, action.answer, new Date().toISOString()),
+          },
+          tracker
+        );
+        break;
+      case 'plurality':
+        result = await performEvaluation(
+          'plurality',
+          {
+            schema: pluralitySchema,
+            prompt: getPluralityPrompt(question, action.answer),
+          },
+          tracker
+        );
+        break;
+    }
+    if (!result?.object.pass) {
+      return {response: result.object};
+    }
+  }
+  return {response: result!.object};
+}
+// Helper function to fetch and combine source content
+async function fetchSourceContent(urls: string[], tracker?: TokenTracker): Promise<string> {
+  if (!urls.length) return '';
+  try {
+    const results = await Promise.all(
+      urls.map(async (url) => {
+        try {
+          const {response} = await readUrl(url, tracker);
+          const content = response?.data?.content || '';
+          return removeAllLineBreaks(content);
+        } catch (error) {
+          console.error('Error reading URL:', error);
+          return '';
+        }
+      })
+    );
+    // Filter out empty results and join with proper separation
+    return results
+      .filter(content => content.trim())
+      .join('\n\n');
+  } catch (error) {
+    console.error('Error fetching source content:', error);
+    return '';
+  }
+}

src/tools/grounding.ts ADDED Viewed

	@@ -0,0 +1,38 @@

+import { generateText } from 'ai';
+import {getModel} from "../config";
+import { GoogleGenerativeAIProviderMetadata } from '@ai-sdk/google';
+import {TokenTracker} from "../utils/token-tracker";
+const model = getModel('searchGrounding')
+export async function grounding(query: string, tracker?: TokenTracker): Promise<string> {
+  try {
+    const { text, experimental_providerMetadata, usage } = await generateText({
+      model,
+      prompt:
+      `Current date is ${new Date().toISOString()}. Find the latest answer to the following question:
+<query>
+${query}
+</query>
+Must include the date and time of the latest answer.`,
+    });
+    const metadata = experimental_providerMetadata?.google as
+  | GoogleGenerativeAIProviderMetadata
+  | undefined;
+    const groundingMetadata = metadata?.groundingMetadata;
+    // Extract and concatenate all groundingSupport text into a single line
+    const groundedText = groundingMetadata?.groundingSupports
+      ?.map(support => support.segment.text)
+      .join(' ') || '';
+    (tracker || new TokenTracker()).trackUsage('grounding', usage);
+    console.log('Grounding:', {text, groundedText});
+    return text + '|' + groundedText;
+  } catch (error) {
+    console.error('Error in search:', error);
+    throw error;
+  }
+}

src/tools/jina-dedup.ts ADDED Viewed

	@@ -0,0 +1,182 @@

+import axios, {AxiosError} from 'axios';
+import {TokenTracker} from "../utils/token-tracker";
+import {JINA_API_KEY} from "../config";
+const JINA_API_URL = 'https://api.jina.ai/v1/embeddings';
+const SIMILARITY_THRESHOLD = 0.93; // Adjustable threshold for cosine similarity
+const JINA_API_CONFIG = {
+  MODEL: 'jina-embeddings-v3',
+  TASK: 'text-matching',
+  DIMENSIONS: 1024,
+  EMBEDDING_TYPE: 'float',
+  LATE_CHUNKING: false
+} as const;
+// Types for Jina API
+interface JinaEmbeddingRequest {
+  model: string;
+  task: string;
+  late_chunking: boolean;
+  dimensions: number;
+  embedding_type: string;
+  input: string[];
+}
+interface JinaEmbeddingResponse {
+  model: string;
+  object: string;
+  usage: {
+    total_tokens: number;
+    prompt_tokens: number;
+  };
+  data: Array<{
+    object: string;
+    index: number;
+    embedding: number[];
+  }>;
+}
+// Compute cosine similarity between two vectors
+function cosineSimilarity(vecA: number[], vecB: number[]): number {
+  const dotProduct = vecA.reduce((sum, a, i) => sum + a * vecB[i], 0);
+  const normA = Math.sqrt(vecA.reduce((sum, a) => sum + a * a, 0));
+  const normB = Math.sqrt(vecB.reduce((sum, b) => sum + b * b, 0));
+  return dotProduct / (normA * normB);
+}
+// Get embeddings for all queries in one batch
+async function getEmbeddings(queries: string[]): Promise<{ embeddings: number[][], tokens: number }> {
+  if (!JINA_API_KEY) {
+    throw new Error('JINA_API_KEY is not set');
+  }
+  const request: JinaEmbeddingRequest = {
+    model: JINA_API_CONFIG.MODEL,
+    task: JINA_API_CONFIG.TASK,
+    late_chunking: JINA_API_CONFIG.LATE_CHUNKING,
+    dimensions: JINA_API_CONFIG.DIMENSIONS,
+    embedding_type: JINA_API_CONFIG.EMBEDDING_TYPE,
+    input: queries
+  };
+  try {
+    const response = await axios.post<JinaEmbeddingResponse>(
+      JINA_API_URL,
+      request,
+      {
+        headers: {
+          'Content-Type': 'application/json',
+          'Authorization': `Bearer ${JINA_API_KEY}`
+        }
+      }
+    );
+    // Validate response format
+    if (!response.data.data || response.data.data.length !== queries.length) {
+      console.error('Invalid response from Jina API:', response.data);
+      return {
+        embeddings: [],
+        tokens: 0
+      };
+    }
+    // Sort embeddings by index to maintain original order
+    const embeddings = response.data.data
+      .sort((a, b) => a.index - b.index)
+      .map(item => item.embedding);
+    return {
+      embeddings,
+      tokens: response.data.usage.total_tokens
+    };
+  } catch (error) {
+    console.error('Error getting embeddings from Jina:', error);
+    if (error instanceof AxiosError && error.response?.status === 402) {
+      return {
+        embeddings: [],
+        tokens: 0
+      };
+    }
+    throw error;
+  }
+}
+export async function dedupQueries(
+  newQueries: string[],
+  existingQueries: string[],
+  tracker?: TokenTracker
+): Promise<{ unique_queries: string[] }> {
+  try {
+    // Quick return for single new query with no existing queries
+    if (newQueries.length === 1 && existingQueries.length === 0) {
+      return {
+        unique_queries: newQueries,
+      };
+    }
+    // Get embeddings for all queries in one batch
+    const allQueries = [...newQueries, ...existingQueries];
+    const {embeddings: allEmbeddings, tokens} = await getEmbeddings(allQueries);
+    // If embeddings is empty (due to 402 error), return all new queries
+    if (!allEmbeddings.length) {
+      return {
+        unique_queries: newQueries,
+      };
+    }
+    // Split embeddings back into new and existing
+    const newEmbeddings = allEmbeddings.slice(0, newQueries.length);
+    const existingEmbeddings = allEmbeddings.slice(newQueries.length);
+    const uniqueQueries: string[] = [];
+    const usedIndices = new Set<number>();
+    // Compare each new query against existing queries and already accepted queries
+    for (let i = 0; i < newQueries.length; i++) {
+      let isUnique = true;
+      // Check against existing queries
+      for (let j = 0; j < existingQueries.length; j++) {
+        const similarity = cosineSimilarity(newEmbeddings[i], existingEmbeddings[j]);
+        if (similarity >= SIMILARITY_THRESHOLD) {
+          isUnique = false;
+          break;
+        }
+      }
+      // Check against already accepted queries
+      if (isUnique) {
+        for (const usedIndex of usedIndices) {
+          const similarity = cosineSimilarity(newEmbeddings[i], newEmbeddings[usedIndex]);
+          if (similarity >= SIMILARITY_THRESHOLD) {
+            isUnique = false;
+            break;
+          }
+        }
+      }
+      // Add to unique queries if passed all checks
+      if (isUnique) {
+        uniqueQueries.push(newQueries[i]);
+        usedIndices.add(i);
+      }
+    }
+    // Track token usage from the API
+    (tracker || new TokenTracker()).trackUsage('dedup', {
+        promptTokens: tokens,
+        completionTokens: 0,
+        totalTokens: tokens
+    });
+    console.log('Dedup:', uniqueQueries);
+    return {
+      unique_queries: uniqueQueries,
+    };
+  } catch (error) {
+    console.error('Error in deduplication analysis:', error);
+    throw error;
+  }
+}

src/tools/jina-search.ts ADDED Viewed

	@@ -0,0 +1,88 @@

+import https from 'https';
+import { TokenTracker } from "../utils/token-tracker";
+import { SearchResponse } from '../types';
+import { JINA_API_KEY } from "../config";
+export function search(query: string, tracker?: TokenTracker): Promise<{ response: SearchResponse}> {
+  return new Promise((resolve, reject) => {
+    if (!query.trim()) {
+      reject(new Error('Query cannot be empty'));
+      return;
+    }
+    const options = {
+      hostname: 's.jina.ai',
+      port: 443,
+      path: `/${encodeURIComponent(query)}?count=0`,
+      method: 'GET',
+      headers: {
+        'Accept': 'application/json',
+        'Authorization': `Bearer ${JINA_API_KEY}`,
+        'X-Retain-Images': 'none'
+      }
+    };
+    const req = https.request(options, (res) => {
+      let responseData = '';
+      res.on('data', (chunk) => responseData += chunk);
+      res.on('end', () => {
+        // Check HTTP status code first
+        if (res.statusCode && res.statusCode >= 400) {
+          try {
+            // Try to parse error message from response if available
+            const errorResponse = JSON.parse(responseData);
+            if (res.statusCode === 402) {
+              reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
+              return;
+            }
+            reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
+          } catch {
+            // If parsing fails, just return the status code
+            reject(new Error(`HTTP Error ${res.statusCode}`));
+          }
+          return;
+        }
+        // Only parse JSON for successful responses
+        let response: SearchResponse;
+        try {
+          response = JSON.parse(responseData) as SearchResponse;
+        } catch (error: unknown) {
+          reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
+          return;
+        }
+        if (!response.data || !Array.isArray(response.data)) {
+          reject(new Error('Invalid response format'));
+          return;
+        }
+        const totalTokens = response.data.reduce((sum, item) => sum + (item.usage?.tokens || 0), 0);
+        console.log('Total URLs:', response.data.length);
+        const tokenTracker = tracker || new TokenTracker();
+        tokenTracker.trackUsage('search', {
+          totalTokens,
+          promptTokens: query.length,
+          completionTokens: totalTokens
+        });
+        resolve({ response });
+      });
+    });
+    // Add timeout handling
+    req.setTimeout(30000, () => {
+      req.destroy();
+      reject(new Error('Request timed out'));
+    });
+    req.on('error', (error) => {
+      reject(new Error(`Request failed: ${error.message}`));
+    });
+    req.end();
+  });
+}

src/tools/query-rewriter.ts ADDED Viewed

	@@ -0,0 +1,112 @@

+import { z } from 'zod';
+import { TokenTracker } from "../utils/token-tracker";
+import { SearchAction } from '../types';
+import {ObjectGeneratorSafe} from "../utils/safe-generator";
+const responseSchema = z.object({
+  think: z.string().describe('Strategic reasoning about query complexity and search approach'),
+  queries: z.array(z.string().describe('Search query, must be less than 30 characters'))
+    .min(1)
+    .max(3)
+    .describe('Array of search queries, orthogonal to each other')
+});
+function getPrompt(action: SearchAction): string {
+  return `You are an expert Information Retrieval query optimizer. Optimize user queries into precise keyword combinations with strategic reasoning and appropriate search operators.
+<rules>
+1. Generate search queries that directly include appropriate operators
+2. Keep base keywords minimal: 2-3 words preferred
+3. Use exact match quotes for specific phrases that must stay together
+4. Split queries only when necessary for distinctly different aspects
+5. Preserve crucial qualifiers while removing fluff words
+6. Make the query resistant to SEO manipulation
+7. When necessary, append <query-operators> at the end only when must needed
+<query-operators>
+A query can't only have operators; and operators can't be at the start a query;
+- "phrase" : exact match for phrases
+- +term : must include term; for critical terms that must appear
+- -term : exclude term; exclude irrelevant or ambiguous terms
+- filetype:pdf/doc : specific file type
+- site:example.com : limit to specific site
+- lang:xx : language filter (ISO 639-1 code)
+- loc:xx : location filter (ISO 3166-1 code)
+- intitle:term : term must be in title
+- inbody:term : term must be in body text
+</query-operators>
+</rules>
+<examples>
+Input Query: What's the difference between ReactJS and Vue.js for building web applications?
+<think>
+This is a comparison query. User is likely looking for technical evaluation and objective feature comparisons, possibly for framework selection decisions. We'll split this into separate queries to capture both high-level differences and specific technical aspects.
+</think>
+Queries: [
+  "react performance",
+  "vue performance",
+  "react vue comparison",
+]
+Input Query: How to fix a leaking kitchen faucet?
+<think>
+This is a how-to query seeking practical solutions. User likely wants step-by-step guidance and visual demonstrations for DIY repair. We'll target both video tutorials and written guides.
+</think>
+Output Queries: [
+  "kitchen faucet leak repair",
+  "faucet drip fix site:youtube.com",
+  "how to repair faucet "
+]
+Input Query: What are healthy breakfast options for type 2 diabetes?
+<think>
+This is a health-specific informational query. User needs authoritative medical advice combined with practical meal suggestions. Splitting into medical guidelines and recipes will provide comprehensive coverage.
+</think>
+Output Queries: [
+  "what to eat for type 2 diabetes",
+  "type 2 diabetes breakfast guidelines",
+  "diabetic breakfast recipes"
+]
+Input Query: Latest AWS Lambda features for serverless applications
+<think>
+This is a product research query focused on recent updates. User wants current information about specific technology features, likely for implementation purposes. We'll target official docs and community insights.
+</think>
+Output Queries: [
+  "aws lambda features site:aws.amazon.com intitle:2025",
+  "new features lambda serverless"
+]
+</examples>
+Now, process this query:
+Input Query: ${action.searchQuery}
+Intention: ${action.think}
+`;
+}
+const TOOL_NAME = 'queryRewriter';
+export async function rewriteQuery(action: SearchAction, tracker?: TokenTracker): Promise<{ queries: string[] }> {
+  try {
+    const generator = new ObjectGeneratorSafe(tracker);
+    const prompt = getPrompt(action);
+    const result = await generator.generateObject({
+      model: TOOL_NAME,
+      schema: responseSchema,
+      prompt,
+    });
+    console.log(TOOL_NAME, result.object.queries);
+    return { queries: result.object.queries };
+  } catch (error) {
+    console.error(`Error in ${TOOL_NAME}`, error);
+    throw error;
+  }
+}

src/tools/read.ts ADDED Viewed

	@@ -0,0 +1,102 @@

+import https from 'https';
+import { TokenTracker } from "../utils/token-tracker";
+import { ReadResponse } from '../types';
+import { JINA_API_KEY } from "../config";
+export function readUrl(url: string, tracker?: TokenTracker): Promise<{ response: ReadResponse }> {
+  return new Promise((resolve, reject) => {
+    if (!url.trim()) {
+      reject(new Error('URL cannot be empty'));
+      return;
+    }
+    const data = JSON.stringify({ url });
+    const options = {
+      hostname: 'r.jina.ai',
+      port: 443,
+      path: '/',
+      method: 'POST',
+      headers: {
+        'Accept': 'application/json',
+        'Authorization': `Bearer ${JINA_API_KEY}`,
+        'Content-Type': 'application/json',
+        'Content-Length': data.length,
+        'X-Retain-Images': 'none',
+        'X-Return-Format': 'markdown'
+      }
+    };
+    const req = https.request(options, (res) => {
+      let responseData = '';
+      res.on('data', (chunk) => responseData += chunk);
+      res.on('end', () => {
+        // Check HTTP status code first
+        if (res.statusCode && res.statusCode >= 400) {
+          try {
+            // Try to parse error message from response if available
+            const errorResponse = JSON.parse(responseData);
+            if (res.statusCode === 402) {
+              reject(new Error(errorResponse.readableMessage || 'Insufficient balance'));
+              return;
+            }
+            reject(new Error(errorResponse.readableMessage || `HTTP Error ${res.statusCode}`));
+          } catch (error: unknown) {
+            // If parsing fails, just return the status code
+            reject(new Error(`HTTP Error ${res.statusCode}`));
+          }
+          return;
+        }
+        // Only parse JSON for successful responses
+        let response: ReadResponse;
+        try {
+          response = JSON.parse(responseData) as ReadResponse;
+        } catch (error: unknown) {
+          reject(new Error(`Failed to parse response: ${error instanceof Error ? error.message : 'Unknown error'}`));
+          return;
+        }
+        if (!response.data) {
+          reject(new Error('Invalid response data'));
+          return;
+        }
+        console.log('Read:', {
+          title: response.data.title,
+          url: response.data.url,
+          tokens: response.data.usage?.tokens || 0
+        });
+        const tokens = response.data.usage?.tokens || 0;
+        const tokenTracker = tracker || new TokenTracker();
+        tokenTracker.trackUsage('read', {
+            totalTokens: tokens,
+            promptTokens: url.length,
+            completionTokens: tokens
+        });
+        resolve({ response });
+      });
+    });
+    // Add timeout handling
+    req.setTimeout(30000, () => {
+      req.destroy();
+      reject(new Error('Request timed out'));
+    });
+    req.on('error', (error: Error) => {
+      reject(new Error(`Request failed: ${error.message}`));
+    });
+    req.write(data);
+    req.end();
+  });
+}
+export function removeAllLineBreaks(text: string) {
+  return text.replace(/(\r\n|\n|\r)/gm, " ");
+}

src/types.ts ADDED Viewed

	@@ -0,0 +1,230 @@

+// Action Types
+import {CoreAssistantMessage, CoreUserMessage, LanguageModelUsage} from "ai";
+type BaseAction = {
+  action: "search" | "answer" | "reflect" | "visit";
+  think: string;
+};
+export type SearchAction = BaseAction & {
+  action: "search";
+  searchQuery: string;
+};
+export type AnswerAction = BaseAction & {
+  action: "answer";
+  answer: string;
+  references: Array<{
+    exactQuote: string;
+    url: string;
+  }>;
+  isFinal?: boolean;
+};
+export type KnowledgeItem = {
+  question: string,
+  answer: string,
+  references?: Array<{
+    exactQuote: string;
+    url: string;
+  }> | Array<any>;
+  type: 'qa' | 'side-info' | 'chat-history' | 'url',
+  updated: string,
+}
+export type ReflectAction = BaseAction & {
+  action: "reflect";
+  questionsToAnswer: string[];
+};
+export type VisitAction = BaseAction & {
+  action: "visit";
+  URLTargets: string[];
+};
+export type StepAction = SearchAction | AnswerAction | ReflectAction | VisitAction;
+export type EvaluationType = 'definitive' | 'freshness' | 'plurality' | 'attribution';
+export type EvaluationCriteria = {
+  types: EvaluationType[];
+  languageStyle: string;
+};
+// Following Vercel AI SDK's token counting interface
+export interface TokenUsage {
+  tool: string;
+  usage: LanguageModelUsage;
+}
+export interface SearchResponse {
+  code: number;
+  status: number;
+  data: Array<{
+    title: string;
+    description: string;
+    url: string;
+    content: string;
+    usage: { tokens: number; };
+  }> | null;
+  name?: string;
+  message?: string;
+  readableMessage?: string;
+}
+export interface BraveSearchResponse {
+  web: {
+    results: Array<{
+      title: string;
+      description: string;
+      url: string;
+    }>;
+  };
+}
+export type DedupResponse = {
+  think: string;
+  unique_queries: string[];
+};
+export interface ReadResponse {
+  code: number;
+  status: number;
+  data?: {
+    title: string;
+    description: string;
+    url: string;
+    content: string;
+    usage: { tokens: number; };
+  };
+  name?: string;
+  message?: string;
+  readableMessage?: string;
+}
+export type EvaluationResponse = {
+  pass: boolean;
+  think: string;
+  type?: 'definitive' | 'freshness' | 'plurality' | 'attribution';
+  freshness_analysis?: {
+    likely_outdated: boolean;
+    dates_mentioned: string[];
+    current_time: string;
+    max_age_days?: number;
+  };
+  plurality_analysis?: {
+    expects_multiple: boolean;
+    provides_multiple: boolean;
+    count_expected?: number;
+    count_provided: number;
+  };
+};
+export type ErrorAnalysisResponse = {
+  recap: string;
+  blame: string;
+  improvement: string;
+  questionsToAnswer: string[];
+};
+export interface SearchResult {
+  title: string;
+  url: string;
+  description: string;
+}
+export interface QueryResult {
+  query: string;
+  results: SearchResult[];
+}
+export interface StepData {
+  step: number;
+  question: string;
+  action: string;
+  reasoning: string;
+  searchQuery?: string;
+  result?: QueryResult[];
+}
+export type KeywordsResponse = {
+  think: string;
+  queries: string[];
+};
+export interface StreamMessage {
+  type: 'progress' | 'answer' | 'error';
+  data: string | StepAction;
+  step?: number;
+  budget?: {
+    used: number;
+    total: number;
+    percentage: string;
+  };
+}
+// OpenAI API Types
+export interface Model {
+  id: string;
+  object: 'model';
+  created: number;
+  owned_by: string;
+}
+export interface ChatCompletionRequest {
+  model: string;
+  messages: Array<CoreUserMessage | CoreAssistantMessage>;
+  stream?: boolean;
+  reasoning_effort?: 'low' | 'medium' | 'high' | null;
+  max_completion_tokens?: number | null;
+}
+export interface ChatCompletionResponse {
+  id: string;
+  object: 'chat.completion';
+  created: number;
+  model: string;
+  system_fingerprint: string;
+  choices: Array<{
+    index: number;
+    message: {
+      role: 'assistant';
+      content: string;
+    };
+    logprobs: null;
+    finish_reason: 'stop';
+  }>;
+  usage: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+export interface ChatCompletionChunk {
+  id: string;
+  object: 'chat.completion.chunk';
+  created: number;
+  model: string;
+  system_fingerprint: string;
+  choices: Array<{
+    index: number;
+    delta: {
+      role?: 'assistant';
+      content?: string;
+    };
+    logprobs: null;
+    finish_reason: null | 'stop';
+  }>;
+  usage?: any;
+}
+// Tracker Types
+import {TokenTracker} from './utils/token-tracker';
+import {ActionTracker} from './utils/action-tracker';
+export interface TrackerContext {
+  tokenTracker: TokenTracker;
+  actionTracker: ActionTracker;
+}