/** * * Copyright 2023-2025 InspectorRAGet Team * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * **/ import { countBy, isNumber } from 'lodash'; import { Metric, MetricValue } from '@/src/types'; export const MetricDefinitions = { coherence: 'The response is coherent, natural, and not dismissive.', naturalness: 'The response is coherent, natural, and not dismissive.', specificity: 'The response provides appropriate amount of useful information.', appropriateness: 'The response provides appropriate amount of useful information.', faithfulness: 'The response is faithful and grounded on the context.', feedback: "Annotator's comments about quality of response, potential issues etc.", }; export const AgreementLevels = { ABSOLUTE_AGREEMENT: 3, HIGH_AGREEMENT: 2, LOW_AGREEMENT: 1, NO_AGREEMENT: 0, }; export const AgreementLevelDefinitions = { Absolute: 'All annotators selected a same value for a given metric.', High: 'Majority of annotators selected a same value for a given metric and the most common value and the 2nd most common value were less that 2 units apart.', Low: 'Majority of annotators selected a same value for a given metric.', No: 'Majority of annotators selected different values for a given metric.', }; export function extractMetricDisplayValue( value: string | number, references?: MetricValue[], ): string { // If value is of type "string" if (typeof value === 'string') { // Step 1: Check if references are provided to convert "string" value to "numeric" value if (references) { // Step 1.a: Find appropriate reference by comparing "string" values const reference = references.find((entry) => entry.value === value); // Step 1.b: If numeric value exists in reference, then return it if (reference && reference.displayValue) { return reference.displayValue; } else { return value; } } else { return value; } } else { // Value is of type "number" return parseFloat(value.toFixed(2)).toString(); } } export function extractMetricDisplayName(metric: Metric): string { return metric.displayName ? metric.displayName : metric.name.charAt(0).toUpperCase() + metric.name.slice(1).toLowerCase(); } /** * Converts numeric value to metric value using references in case of 'categorical' metrics * @param value numeric value to convert * @param references reference metric values * @returns metric value */ export function castToValue( value: number, references?: MetricValue[], ): string | number { // Step 1: Check if references are provided to convert "numeric" value to "string" value if (references) { // Step 1.a: Find appropriate reference by comparing "string" values const reference = references.find((entry) => entry.numericValue === value); // Step 1.b: If value exists in reference, then return it if (reference && reference.value) { return reference.value; } else { return value; } } // Default return return value; } export function castToNumber( value: string | number, references?: MetricValue[], key?: 'value' | 'displayValue', ): number { // If value is of type "string" if (typeof value === 'string') { // Step 1: Check if references are provided to convert "string" value to "numeric" value if (references) { // Step 1.a: Find appropriate reference by comparing "string" values const reference = references.find((entry) => key ? entry[key] === value : entry.value === value, ); // Step 1.b: If numeric value exists in reference, then return it if ( reference && reference.hasOwnProperty('numericValue') && typeof reference.numericValue === 'number' ) { return reference.numericValue; } else { return parseFloat(value); } } // Step 2: Cast to int, if references are absent else if (value === 'N/A' || value === '') { return 0; } else { return parseFloat(value); } } // Value is of type "number" else { return value; } } /** * Compute mean value * @param metric metric under consideration * @param scores distribution of values * @returns */ function computeMean( metric: Metric, scores: string[] | number[], ): { level: number; value: number | string } { // Step 1: Create counter const counter: { [key: string]: number } = countBy(scores); // Step 2: Sort counter values const sorted_counter = Object.entries(counter); sorted_counter.sort((x, y) => { return y[1] - x[1]; }); // Step 3: Number of unique values, most common value and its count const numberOfUniqueValues = sorted_counter.length; const mostCommonValueCount = sorted_counter[0][1]; // Step 4: Calculate mean let sum: number = 0; for (const [value, count] of Object.entries(counter)) { sum += (typeof value === 'string' ? castToNumber(value, metric.values) : value) * count; } const mean = Math.round((sum / scores.length + Number.EPSILON) * 100) / 100; // Step 5: Common patterns // Step 5.a: Absolute agreement if (mostCommonValueCount === scores.length) return { level: AgreementLevels.ABSOLUTE_AGREEMENT, value: mean, }; // Step 5.b: Absolute disagreement/No agreement if (numberOfUniqueValues === scores.length) return { level: AgreementLevels.NO_AGREEMENT, value: mean, }; // Step 6: Default return return { level: AgreementLevels.HIGH_AGREEMENT, value: mean, }; } /** * Compute median value * @param metric metric under consideration * @param counter distribution of values * @returns */ function computeMedian( metric: Metric, scores: string[] | number[], ): { level: number; value: number | string } { // Step 1: Create counter const counter: { [key: string]: number } = countBy(scores); // Step 2: Sort counter values const sorted_counter = Object.entries(counter); sorted_counter.sort((x, y) => { return y[1] - x[1]; }); // Step 3: Number of unique values, most common value and its count const numberOfUniqueValues = sorted_counter.length; const mostCommonValueCount = sorted_counter[0][1]; // Step 4: Cast score to numbers const numericScores = scores.map((score) => typeof score === 'string' ? castToNumber(score, metric.values) : score, ); // Step 5: Sort the numeric scores const sortedNumericScores = numericScores.toSorted((a, b) => a - b); // Step 6: Calculate median const median = sortedNumericScores.length % 2 == 0 ? sortedNumericScores[sortedNumericScores.length / 2 - 1] : sortedNumericScores[(sortedNumericScores.length + 1) / 2 - 1]; // Step 7: Common patterns // Step 7.a: Absolute agreement if (mostCommonValueCount === scores.length) return { level: AgreementLevels.ABSOLUTE_AGREEMENT, value: castToValue(median, metric.values), }; // Step 7.b: Absolute disagreement/No agreement if (numberOfUniqueValues === scores.length) return { level: AgreementLevels.NO_AGREEMENT, value: castToValue(median, metric.values), }; // Step 8: Default return return { level: AgreementLevels.HIGH_AGREEMENT, value: castToValue(median, metric.values), }; } /** * Compute majority value * @param metric metric under consideration * @param counter distribution of values * @param numberOfAnnotators number of annotators * @returns */ function computeMajority( metric: Metric, counter: { [key: string]: number }, numberOfAnnotators: number, ): { level: number; value: number | string } { // Step 0: Sort counter values const sorted_counter = Object.entries(counter); sorted_counter.sort((x, y) => { return y[1] - x[1]; }); // Step 1: Number of unique values, most common value and its count const numberOfUniqueValues = sorted_counter.length; const mostCommonValue = sorted_counter[0][0]; const mostCommonValueCount = sorted_counter[0][1]; // Step 2: Common patterns // Step 2.a: Absolute agreement if (mostCommonValueCount === numberOfAnnotators) return { level: AgreementLevels.ABSOLUTE_AGREEMENT, value: mostCommonValue, }; // Step 2.b: Absolute disagreement/No agreement if (numberOfUniqueValues === numberOfAnnotators) return { level: AgreementLevels.NO_AGREEMENT, value: 'Indeterminate', }; // Step 3: Calculate agreement levels // Step 3.a: No agreement // * More than half annotators selected different values // OR // * Less than half annotators selected same value and Top-2 most common values are greater than 1 unit apart if ( numberOfUniqueValues > Math.ceil(numberOfAnnotators / 2) || (mostCommonValueCount < Math.ceil(numberOfAnnotators / 2) && numberOfUniqueValues === Math.ceil(numberOfAnnotators / 2) && Math.abs( castToNumber(mostCommonValue, metric.values) - castToNumber(sorted_counter[1][0], metric.values), ) > 1) ) { return { level: AgreementLevels.NO_AGREEMENT, value: 'Indeterminate', }; } // Step 3.b: High agreement // * Maximum two unique values and those are less than 2 unit apart if ( numberOfUniqueValues == 2 && Math.abs( castToNumber(mostCommonValue, metric.values) - castToNumber(sorted_counter[1][0], metric.values), ) < 2 ) { return { level: AgreementLevels.HIGH_AGREEMENT, value: mostCommonValue, }; } // Step 3.c: Default return return { level: AgreementLevels.LOW_AGREEMENT, value: mostCommonValue, }; } export function calculateAggregateValue( metric: Metric, entries: { [key: string]: any }, ) { if (metric.author === 'algorithm') { if (metric.aggregator) { let scores: string[] | number[] = Object.values(entries).map( (entry) => entry.value, ); if (metric.aggregator === 'average' || metric.aggregator === 'mean') { return computeMean(metric, scores); } else if (metric.aggregator === 'median') { return computeMedian(metric, scores); } else { return computeMajority(metric, countBy(scores), scores.length); } } else { return { level: AgreementLevels.NO_AGREEMENT, value: undefined, }; } } else { if (metric.aggregator) { let scores: string[] | number[] = Object.values(entries).map( (entry) => entry.value, ); if (metric.aggregator === 'average' || metric.aggregator === 'mean') { return computeMean(metric, scores); } else if (metric.aggregator === 'median') { return computeMedian(metric, scores); } else { return computeMajority(metric, countBy(scores), scores.length); } } else { return { level: AgreementLevels.NO_AGREEMENT, value: undefined, }; } } } export function mergeAgreementObjects({ source, target, }: { source: object; target: object; }) { if (source) { Object.entries(source).forEach(([group, entry]) => { for (const [key, value] of Object.entries(entry)) { if (target.hasOwnProperty(group)) { if (target[group].hasOwnProperty(key)) { target[group][key] += value; } else { target[group][key] = value; } } else { target[group] = { [key]: value }; } } }); } } export function bin(value: number | string, metric: Metric, n?: number) { if (typeof value === 'number' && metric.type === 'numerical') { if (metric.range && metric.range.length == 3) { for ( let idx: number = 0; metric.range[0] + idx * metric.range[2] + metric.range[2] <= metric.range[1]; idx++ ) { const start: number = parseFloat( (metric.range[0] + idx * metric.range[2]).toFixed(2), ); const end: number = parseFloat( (metric.range[0] + idx * metric.range[2] + metric.range[2]).toFixed( 2, ), ); if (start <= value && value <= end) { return `${start}-${end}`; } } } } return value; } export function compareMetricAggregatedValues( a: { key: string | number; value: number }, b: { key: string | number; value: number }, metric: Metric, ): number { if (metric.aggregator && metric.aggregator === 'average') { if (typeof a.key === 'number' && typeof b.key === 'number') { return a.key - b.key; } else if (typeof a.key === 'string' && typeof b.key === 'string') { return parseFloat(a.key) - parseFloat(b.key); } else { return 0; } } else if (metric.aggregator && metric.aggregator === 'majority') { if (typeof a.key === 'string' && typeof b.key === 'string') { if (a.key === 'Indeterminate' || b.key === 'Indeterminate') { if (b.key === 'Indeterminate' && a.key != 'Indeterminate') { return 1; } else if (a.key === 'Indeterminate' && b.key != 'Indeterminate') { return -1; } return 0; } const aValue = metric.values?.find((entry) => entry.value == a.key); const bValue = metric.values?.find((entry) => entry.value == b.key); if (aValue && bValue) { // Do direct value comparison in numerical values exists if ( (aValue.numericValue != undefined || aValue.numericValue != null) && isNumber(aValue.numericValue) && (bValue.numericValue != undefined || bValue.numericValue != null) && isNumber(bValue.numericValue) ) { return aValue.numericValue - bValue.numericValue; } // For numerical values, do direct value comparison else if (typeof a.value === 'number' && typeof b.value === 'number') { return a.value - b.value; } else { return a.key.localeCompare(b.key); } } // Do string comparison with non-ASCII support return a.key.localeCompare(b.key); } // Default: Preserve same order return 0; } return a.key > b.key ? 1 : -1; }