Spaces:

zhengr
/

DeepResearch

Sleeping

App Files Files Community

DeepResearch / src /app.ts

zhengr

init

0bcc252 about 2 months ago

raw

history blame

18.2 kB

	import express, {Request, Response, RequestHandler} from 'express';
	import cors from 'cors';
	import {getResponse} from './agent';
	import {
	TrackerContext,
	ChatCompletionRequest,
	ChatCompletionResponse,
	ChatCompletionChunk,
	AnswerAction,
	Model, StepAction
	} from './types';
	import {TokenTracker} from "./utils/token-tracker";
	import {ActionTracker} from "./utils/action-tracker";

	const app = express();

	// Get secret from command line args for optional authentication
	const secret = process.argv.find(arg => arg.startsWith('--secret='))?.split('=')[1];

	app.use(cors());
	app.use(express.json({
	limit: '10mb'
	}));

	// Add health check endpoint for Docker container verification
	app.get('/health', (req, res) => {
	res.json({status: 'ok'});
	});

	function buildMdFromAnswer(answer: AnswerAction) {
	if (!answer.references?.length \|\| !answer.references.some(ref => ref.url.startsWith('http'))) {
	return answer.answer;
	}

	const references = answer.references.map((ref, i) => {
	const escapedQuote = ref.exactQuote
	.replace(/([[\]_*`])/g, '\\$1')
	.replace(/\n/g, ' ')
	.trim();

	return `[^${i + 1}]: [${escapedQuote}](${ref.url})`;
	}).join('\n\n');

	return `
	${answer.answer.replace(/$REF_(\d+)$/g, (_, num) => `[^${num}]`)}


	${references}

	`.trim();
	}

	async function* streamTextNaturally(text: string, streamingState: StreamingState) {
	// Split text into chunks that preserve CJK characters, URLs, and regular words
	const chunks = splitTextIntoChunks(text);
	let burstMode = false;
	let consecutiveShortItems = 0;

	for (const chunk of chunks) {
	if (!streamingState.currentlyStreaming) {
	yield chunks.slice(chunks.indexOf(chunk)).join('');
	return;
	}

	const delay = calculateDelay(chunk, burstMode);

	// Handle consecutive short items
	if (getEffectiveLength(chunk) <= 3 && chunk.trim().length > 0) {
	consecutiveShortItems++;
	if (consecutiveShortItems >= 3) {
	burstMode = true;
	}
	} else {
	consecutiveShortItems = 0;
	burstMode = false;
	}

	await new Promise(resolve => setTimeout(resolve, delay));
	yield chunk;
	}
	}

	function splitTextIntoChunks(text: string): string[] {
	const chunks: string[] = [];
	let currentChunk = '';
	let inURL = false;

	const pushCurrentChunk = () => {
	if (currentChunk) {
	chunks.push(currentChunk);
	currentChunk = '';
	}
	};

	for (let i = 0; i < text.length; i++) {
	const char = text[i];
	const nextChar = text[i + 1] \|\| '';

	// URL detection
	if (char === 'h' && text.slice(i, i + 8).match(/https?:\/\//)) {
	pushCurrentChunk();
	inURL = true;
	}

	if (inURL) {
	currentChunk += char;
	// End of URL detection (whitespace or certain punctuation)
	if (/[\s\])}"']/.test(nextChar) \|\| i === text.length - 1) {
	pushCurrentChunk();
	inURL = false;
	}
	continue;
	}

	// CJK character detection (including kana and hangul)
	if (/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/.test(char)) {
	pushCurrentChunk();
	chunks.push(char);
	continue;
	}

	// Whitespace handling
	if (/\s/.test(char)) {
	pushCurrentChunk();
	chunks.push(char);
	continue;
	}

	// Regular word building
	currentChunk += char;

	// Break on punctuation
	if (/[.!?,;:]/.test(nextChar)) {
	pushCurrentChunk();
	}
	}

	pushCurrentChunk();
	return chunks.filter(chunk => chunk !== '');
	}

	function calculateDelay(chunk: string, burstMode: boolean): number {
	const trimmedChunk = chunk.trim();

	// Handle whitespace
	if (trimmedChunk.length === 0) {
	return Math.random() * 20 + 10;
	}

	// Special handling for URLs
	if (chunk.match(/^https?:\/\//)) {
	return Math.random() * 50 + 100; // Slower typing for URLs
	}

	// Special handling for CJK characters
	if (/^[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]$/.test(chunk)) {
	return Math.random() * 100 + 150; // Longer delay for individual CJK characters
	}

	// Base delay calculation
	let baseDelay;
	if (burstMode) {
	baseDelay = Math.random() * 30 + 20;
	} else {
	const effectiveLength = getEffectiveLength(chunk);
	const perCharacterDelay = Math.max(10, 40 - effectiveLength * 2);
	baseDelay = Math.random() * perCharacterDelay + perCharacterDelay;
	}

	// Add variance based on chunk characteristics
	if (/[A-Z]/.test(chunk[0])) {
	baseDelay += Math.random() * 20 + 10;
	}

	if (/[^a-zA-Z\s]/.test(chunk)) {
	baseDelay += Math.random() * 30 + 15;
	}

	// Add pauses for punctuation
	if (/[.!?]$/.test(chunk)) {
	baseDelay += Math.random() * 350 + 200;
	} else if (/[,;:]$/.test(chunk)) {
	baseDelay += Math.random() * 150 + 100;
	}

	return baseDelay;
	}

	function getEffectiveLength(chunk: string): number {
	// Count CJK characters as 2 units
	const cjkCount = (chunk.match(/[\u4e00-\u9fff\u3040-\u30ff\uac00-\ud7af]/g) \|\| []).length;
	const regularCount = chunk.length - cjkCount;
	return regularCount + (cjkCount * 2);
	}

	// Helper function to emit remaining content immediately
	async function emitRemainingContent(
	res: Response,
	requestId: string,
	created: number,
	model: string,
	content: string,
	) {
	if (!content) return;

	const chunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model: model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {content},
	logprobs: null,
	finish_reason: null
	}],
	};
	res.write(`data: ${JSON.stringify(chunk)}\n\n`);
	}

	interface StreamingState {
	currentlyStreaming: boolean;
	currentGenerator: AsyncGenerator<string> \| null;
	remainingContent: string;
	isEmitting: boolean;
	queue: { content: string; resolve: () => void }[];
	processingQueue: boolean;
	}

	function getTokenBudgetAndMaxAttempts(
	reasoningEffort: 'low' \| 'medium' \| 'high' \| null = 'medium',
	maxCompletionTokens: number \| null = null
	): { tokenBudget: number, maxBadAttempts: number } {
	if (maxCompletionTokens !== null) {
	return {
	tokenBudget: maxCompletionTokens,
	maxBadAttempts: 3 // Default to medium setting for max attempts
	};
	}

	switch (reasoningEffort) {
	case 'low':
	return {tokenBudget: 100000, maxBadAttempts: 1};
	case 'high':
	return {tokenBudget: 1000000, maxBadAttempts: 3};
	case 'medium':
	default:
	return {tokenBudget: 500000, maxBadAttempts: 2};
	}
	}


	async function completeCurrentStreaming(
	streamingState: StreamingState,
	res: Response,
	requestId: string,
	created: number,
	model: string
	) {
	if (streamingState.currentlyStreaming && streamingState.remainingContent) {
	// Force completion of current streaming
	await emitRemainingContent(
	res,
	requestId,
	created,
	model,
	streamingState.remainingContent
	);
	// Reset streaming state
	streamingState.currentlyStreaming = false;
	streamingState.remainingContent = '';
	streamingState.currentGenerator = null;
	}
	}

	// OpenAI-compatible chat completions endpoint
	// Models API endpoints
	app.get('/v1/models', (async (_req: Request, res: Response) => {
	const models: Model[] = [{
	id: 'jina-deepsearch-v1',
	object: 'model',
	created: 1686935002,
	owned_by: 'jina-ai'
	}];

	res.json({
	object: 'list',
	data: models
	});
	}) as RequestHandler);

	app.get('/v1/models/:model', (async (req: Request, res: Response) => {
	const modelId = req.params.model;

	if (modelId === 'jina-deepsearch-v1') {
	res.json({
	id: 'jina-deepsearch-v1',
	object: 'model',
	created: 1686935002,
	owned_by: 'jina-ai'
	});
	} else {
	res.status(404).json({
	error: {
	message: `Model '${modelId}' not found`,
	type: 'invalid_request_error',
	param: null,
	code: 'model_not_found'
	}
	});
	}
	}) as RequestHandler);

	if (secret) {
	// Check authentication only if secret is set
	app.use((req, res, next) => {
	const authHeader = req.headers.authorization;
	if (!authHeader \|\| !authHeader.startsWith('Bearer ') \|\| authHeader.split(' ')[1] !== secret) {
	console.log('[chat/completions] Unauthorized request');
	res.status(401).json({error: 'Unauthorized'});
	return;
	}

	return next();
	});
	}

	async function processQueue(streamingState: StreamingState, res: Response, requestId: string, created: number, model: string) {
	if (streamingState.processingQueue) return;

	streamingState.processingQueue = true;

	while (streamingState.queue.length > 0) {
	const current = streamingState.queue[0];

	// Reset streaming state for new content
	streamingState.currentlyStreaming = true;
	streamingState.remainingContent = current.content;
	streamingState.isEmitting = true;

	try {
	for await (const word of streamTextNaturally(current.content, streamingState)) {
	const chunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {content: word},
	logprobs: null,
	finish_reason: null
	}]
	};
	res.write(`data: ${JSON.stringify(chunk)}\n\n`);

	// Small delay between words
	await new Promise(resolve => setTimeout(resolve, 30));
	}

	// Add newline after content
	const newlineChunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {content: '\n'},
	logprobs: null,
	finish_reason: null
	}]
	};
	res.write(`data: ${JSON.stringify(newlineChunk)}\n\n`);

	} catch (error) {
	console.error('Error in streaming:', error);
	} finally {
	// Reset state and remove from queue
	streamingState.isEmitting = false;
	streamingState.currentlyStreaming = false;
	streamingState.remainingContent = '';
	streamingState.queue.shift();
	current.resolve();

	// Small delay between queue items
	await new Promise(resolve => setTimeout(resolve, 50));
	}
	}

	streamingState.processingQueue = false;
	}

	app.post('/v1/chat/completions', (async (req: Request, res: Response) => {
	// Check authentication only if secret is set
	if (secret) {
	const authHeader = req.headers.authorization;
	if (!authHeader \|\| !authHeader.startsWith('Bearer ') \|\| authHeader.split(' ')[1] !== secret) {
	console.log('[chat/completions] Unauthorized request');
	res.status(401).json({error: 'Unauthorized'});
	return;
	}
	}

	// Log request details (excluding sensitive data)
	console.log('[chat/completions] Request:', {
	model: req.body.model,
	stream: req.body.stream,
	messageCount: req.body.messages?.length,
	hasAuth: !!req.headers.authorization,
	requestId: Date.now().toString()
	});

	const body = req.body as ChatCompletionRequest;
	if (!body.messages?.length) {
	return res.status(400).json({error: 'Messages array is required and must not be empty'});
	}
	const lastMessage = body.messages[body.messages.length - 1];
	if (lastMessage.role !== 'user') {
	return res.status(400).json({error: 'Last message must be from user'});
	}

	const {tokenBudget, maxBadAttempts} = getTokenBudgetAndMaxAttempts(
	body.reasoning_effort,
	body.max_completion_tokens
	);

	const requestId = Date.now().toString();
	const created = Math.floor(Date.now() / 1000);
	const context: TrackerContext = {
	tokenTracker: new TokenTracker(),
	actionTracker: new ActionTracker()
	};

	// Add this inside the chat completions endpoint, before setting up the action listener
	const streamingState: StreamingState = {
	currentlyStreaming: false,
	currentGenerator: null,
	remainingContent: '',
	isEmitting: false,
	queue: [],
	processingQueue: false
	};

	if (body.stream) {
	res.setHeader('Content-Type', 'text/event-stream');
	res.setHeader('Cache-Control', 'no-cache');
	res.setHeader('Connection', 'keep-alive');


	// Send initial chunk with opening think tag
	const initialChunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model: body.model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {role: 'assistant', content: '<think>'},
	logprobs: null,
	finish_reason: null
	}]
	};
	res.write(`data: ${JSON.stringify(initialChunk)}\n\n`);

	// Set up progress listener with cleanup
	const actionListener = async (step: StepAction) => {
	// Add content to queue for both thinking steps and final answer
	if (step.think) {
	const content = step.think;
	await new Promise<void>(resolve => {
	streamingState.queue.push({
	content,
	resolve
	});
	// Single call to process queue is sufficient
	processQueue(streamingState, res, requestId, created, body.model);
	});
	}
	};
	context.actionTracker.on('action', actionListener);

	// Make sure to update the cleanup code
	res.on('finish', () => {
	streamingState.currentlyStreaming = false;
	streamingState.currentGenerator = null;
	streamingState.remainingContent = '';
	context.actionTracker.removeListener('action', actionListener);
	});
	}

	try {
	const {result: finalStep} = await getResponse(lastMessage.content as string, tokenBudget, maxBadAttempts, context, body.messages)

	const usage = context.tokenTracker.getTotalUsageSnakeCase();
	if (body.stream) {
	// Complete any ongoing streaming before sending final answer
	await completeCurrentStreaming(streamingState, res, requestId, created, body.model);
	const finalAnswer = buildMdFromAnswer(finalStep as AnswerAction);
	// Send closing think tag
	const closeThinkChunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model: body.model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {content: `</think>\n\n${finalAnswer}`},
	logprobs: null,
	finish_reason: null
	}]
	};
	res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);

	// After the content is fully streamed, send the final chunk with finish_reason and usage
	const finalChunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model: body.model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {content: ''},
	logprobs: null,
	finish_reason: 'stop'
	}],
	usage
	};
	res.write(`data: ${JSON.stringify(finalChunk)}\n\n`);
	res.end();
	} else {

	const response: ChatCompletionResponse = {
	id: requestId,
	object: 'chat.completion',
	created,
	model: body.model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	message: {
	role: 'assistant',
	content: finalStep.action === 'answer' ? buildMdFromAnswer(finalStep) : finalStep.think
	},
	logprobs: null,
	finish_reason: 'stop'
	}],
	usage
	};

	// Log final response (excluding full content for brevity)
	console.log('[chat/completions] Response:', {
	id: response.id,
	status: 200,
	contentLength: response.choices[0].message.content.length,
	usage: response.usage
	});

	res.json(response);
	}
	} catch (error: any) {
	// Log error details
	console.error('[chat/completions] Error:', {
	message: error?.message \|\| 'An error occurred',
	stack: error?.stack,
	type: error?.constructor?.name,
	requestId
	});

	// Track error as rejected tokens with Vercel token counting
	const errorMessage = error?.message \|\| 'An error occurred';

	// Clean up event listeners
	context.actionTracker.removeAllListeners('action');

	// Get token usage in OpenAI API format
	const usage = context.tokenTracker.getTotalUsageSnakeCase();

	if (body.stream && res.headersSent) {
	// For streaming responses that have already started, send error as a chunk
	// First send closing think tag if we're in the middle of thinking
	const closeThinkChunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model: body.model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {content: '</think>'},
	logprobs: null,
	finish_reason: null
	}],
	usage
	};
	res.write(`data: ${JSON.stringify(closeThinkChunk)}\n\n`);


	const errorChunk: ChatCompletionChunk = {
	id: requestId,
	object: 'chat.completion.chunk',
	created,
	model: body.model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	delta: {content: errorMessage},
	logprobs: null,
	finish_reason: 'stop'
	}],
	usage
	};
	res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
	res.end();
	} else {
	// For non-streaming or not-yet-started responses, send error as JSON
	const response: ChatCompletionResponse = {
	id: requestId,
	object: 'chat.completion',
	created,
	model: body.model,
	system_fingerprint: 'fp_' + requestId,
	choices: [{
	index: 0,
	message: {
	role: 'assistant',
	content: `Error: ${errorMessage}`
	},
	logprobs: null,
	finish_reason: 'stop'
	}],
	usage
	};
	res.json(response);
	}
	}
	}) as RequestHandler);


	export default app;