File size: 1,986 Bytes
0bcc252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import { evaluateAnswer } from '../evaluator';
import { TokenTracker } from '../../utils/token-tracker';
import { LLMProvider } from '../../config';

describe('evaluateAnswer', () => {
  const providers: Array<LLMProvider> = ['openai', 'gemini'];
  const originalEnv = process.env;

  beforeEach(() => {
    jest.resetModules();
    process.env = { ...originalEnv };
  });

  afterEach(() => {
    process.env = originalEnv;
  });

  providers.forEach(provider => {
    describe(`with ${provider} provider`, () => {
      beforeEach(() => {
        process.env.LLM_PROVIDER = provider;
      });

      it('should evaluate answer definitiveness', async () => {
        const tokenTracker = new TokenTracker();
        const { response } = await evaluateAnswer(
          'What is TypeScript?',
          {
            action: "answer",
            think: "Providing a clear definition of TypeScript",
            answer: "TypeScript is a strongly typed programming language that builds on JavaScript.",
            references: []
          },
          ['definitive'],
          tokenTracker
        );
        expect(response).toHaveProperty('pass');
        expect(response).toHaveProperty('think');
        expect(response.type).toBe('definitive');
      });

      it('should evaluate answer plurality', async () => {
        const tokenTracker = new TokenTracker();
        const { response } = await evaluateAnswer(
          'List three programming languages.',
          {
            action: "answer",
            think: "Providing an example of a programming language",
            answer: "Python is a programming language.",
            references: []
          },
          ['plurality'],
          tokenTracker
        );
        expect(response).toHaveProperty('pass');
        expect(response).toHaveProperty('think');
        expect(response.type).toBe('plurality');
        expect(response.plurality_analysis?.expects_multiple).toBe(true);
      });
    });
  });
});