Spaces:

fullstuckdev
/

ai-document-assistant

Sleeping

App Files Files Community

ai-document-assistant / src /app /api /qa /route.ts

fullstuckdev

Upload 25 files

2341446 verified 5 months ago

raw

history blame

2.2 kB

	import { NextRequest, NextResponse } from 'next/server'
	import { HfInference } from '@huggingface/inference'

	const hf = new HfInference(process.env.HUGGINGFACE_API_KEY)

	interface RequestBody {
	query: string
	documents: Array<{ text: string }>
	}

	export async function POST(request: NextRequest) {
	try {
	const body: RequestBody = await request.json()
	const { query, documents } = body

	// If query is empty, return early
	if (!query.trim()) {
	return NextResponse.json({
	answer: 'Please enter a question.'
	})
	}

	// Clean and format the document texts
	const context = documents
	.map(doc => {
	// Clean up the text and ensure proper spacing
	return doc.text
	.replace(/\s+/g, ' ') // Replace multiple spaces with single space
	.trim() // Remove leading/trailing whitespace
	})
	.filter(text => text.length > 0)
	.join('\n\n') // Add clear separation between documents

	// Create a prompt following Llama's instruction format
	const prompt = `[INST] You are a helpful AI assistant. Please answer the following question based on the provided context. If the answer cannot be found in the context, say "I cannot find the answer in the provided documents." Answer in the same language as the question. Do not include any instruction tags in your response.

	Context:
	${context}

	Question:
	${query} [/INST]`

	const response = await hf.textGeneration({
	model: 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
	inputs: prompt,
	parameters: {
	max_new_tokens: 512,
	temperature: 0.7,
	top_p: 0.95,
	repetition_penalty: 1.15,
	return_full_text: false // Only return the generated response
	}
	})

	const answer = response.generated_text?.trim()
	.replace(/\[INST\]/g, '') // Remove [INST] tags
	.replace(/\[\/INST\]/g, '') // Remove [/INST] tags
	.trim() \|\| 'Failed to generate an answer'

	return NextResponse.json({ answer })
	} catch (error) {
	console.error('Error processing QA request:', error)
	return NextResponse.json(
	{ error: 'Failed to process request' },
	{ status: 500 }
	)
	}
	}