File size: 1,791 Bytes
2341446
 
 
a869b9a
2341446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a869b9a
 
2341446
 
a869b9a
2341446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a869b9a
2341446
 
 
 
a869b9a
 
2341446
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import { NextRequest, NextResponse } from 'next/server'
import { HfInference } from '@huggingface/inference'

const hf = new HfInference(process.env.API_TOKEN)

interface RequestBody {
  query: string
  documents: Array<{ text: string }>
}

export async function POST(request: NextRequest) {
  try {
    const body: RequestBody = await request.json()
    const { query, documents } = body

    if (!query.trim()) {
      return NextResponse.json({ 
        answer: 'Please enter a question.' 
      })
    }

    const context = documents
      .map(doc => {
        return doc.text
          .replace(/\s+/g, ' ')  
          .trim()  
      })
      .filter(text => text.length > 0)
      .join('\n\n')  

    const prompt = `[INST] You are a helpful AI assistant. Please answer the following question based on the provided context. If the answer cannot be found in the context, say "I cannot find the answer in the provided documents." Answer in the same language as the question. Do not include any instruction tags in your response.

Context:
${context}

Question:
${query} [/INST]`

    const response = await hf.textGeneration({
      model: 'nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',
      inputs: prompt,
      parameters: {
        max_new_tokens: 512,
        temperature: 0.7,
        top_p: 0.95,
        repetition_penalty: 1.15,
        return_full_text: false 
      }
    })

    const answer = response.generated_text?.trim()
      .replace(/\[INST\]/g, '')  
      .replace(/\[\/INST\]/g, '') 
      .trim() || 'Failed to generate an answer'

    return NextResponse.json({ answer })
  } catch (error) {
    console.error('Error processing QA request:', error)
    return NextResponse.json(
      { error: 'Failed to process request' },
      { status: 500 }
    )
  }
}