Spaces:
Runtime error
Runtime error
/* | |
Copyright (c) 2008 - Chris Buckley. | |
Permission is granted for use and modification of this file for | |
research, non-commercial purposes. | |
*/ | |
/* Copyright 2008 Chris Buckley */ | |
/* Takes the top docs and judged prefs for a query, and returns a | |
results_prefs_info object giving the preferences from judged_prefs that | |
are observed in the retrieved docs. | |
Three part process here: | |
1. Add a docid_rank (0..num_judged) to the judgment prefs that for | |
every docno in any judgment pref gives the relative rank at which | |
it occurs in the results (a number 0 to num_judged_ret-1). If the | |
docno does not occur in the results, it is given a (consistent) | |
value from num_judged_ret to num_judged-1. These docid_ranks are | |
used to represent the docs within preferences. | |
2. Go through the judgements again and represent all preferences per judgment | |
group (JG). Two options for representing the preferences within a JG: | |
A. If there is only 1 judgment sub-group (JSG), then the preferences are | |
given by a set of equivalence classes (ECs) where all docs with the | |
same rel_level in the input are in the same equivalence class. Thus all | |
docs in a higher EC.rel_level are preferred to the docs in a lower | |
EC.rel_level. Since there is only 1 JSG, there are complete preferences | |
between any pair of docs not in the same EC. | |
B. If there are multiple JSGs, then the JG preference relation is assumed to | |
be partial. Preferences are given by a preference array, where array[i][j] | |
is 1 iff doc with docid_rank i is preferred to doc with docid_rank j | |
in this JG. Transitive preferences that aren't explicitly in the | |
judgments are added - this happens when (doc A > doc B in JSG jsg1) and | |
(doc B > doc C in JSG jsg2). Note that a JG represents a single information | |
need set of judgments, and is required to be consistent (inconsistent | |
preferences are represented in different JGs). Preference array is of | |
size num_judged * num_judged. | |
3. Go through the preference in each JG, and count num_fulfilled and | |
num_possible preferences in categories "retrieved", "implied" and | |
"not_retrieved" where | |
retrieved means both A and B were retrieved in a pref A > B | |
implied means exactly one of A or B was retrieved. | |
not_retrieved means neither was retrieved. | |
Different evaluation measures may want to do different things with these | |
categories. Counting preferences is accomplished in the two preference | |
representations by: | |
A. When comparing EC1 and EC2 with EC1.rel_level > EC2.rel_level, compare | |
each (docid) rank1 in EC1->ranks with rank2 in EC2->ranks. | |
If rank1 < num_judged_ret then it was retrieved, similarly for rank2. | |
If both retrieved, then if rank1 < rank2 the preference is fulfilled | |
otherwise it wasn't. | |
If rank1 retrieved and rank2 was not, then implied pref fulfilled. | |
If rank1 not retrieved and rank2 was, then preference was not fulfilled. | |
If both not retrieved, then that count is incremented. | |
B. Given preference array PA there are five areas of importance, divided | |
by lines i == NJR (where NJR is num_judged_ret), j == NJR, and i == j | |
NJR | |
\................|... | |
.\...............|... | |
..\.......A1.....|.A3 | |
...\.............|... | |
....\............|... | |
... | |
...A2..........\.|... | |
................\|... | |
NJR -----------------|--- | |
.................|... | |
..........A4.....|.A5 | |
.................|... | |
Area A1 is preferences fulfilled, both retrieved. | |
Area A2 is preferences not fulfilled, both retrieved. | |
Area A3 is preference implied fulfilled (i retrieved, j not) | |
Area A4 is preference implied not fulfilled (i not retrieved, j retrieved) | |
Area A5 is both i and j not retrieved. | |
Simply count the marks (PA[i][j] == 1) in each appropriate area. | |
As well as storing counts within each JG, a counts array for the | |
entire pref_results is constructed. Counts_array CA is exactly the same | |
format and size as the preference arrays, except CA[i][j] is the sum | |
of the conceptual PA[i][j] over all JGs. This allows counts of | |
confirmations (CA[i][j] > 1) and conflicts (CA[i][j] and CA[j][i] both | |
non-zero). | |
Not mentioned in steps 2 and 3 above since it adds even more confusion, | |
is that the relevant (rel_level > 0.0) and nonrelevant docs are also | |
tracked. Different measures may deal with those preferences differently. | |
This procedure may be called repeatedly for a given topic - returned | |
values are cached until the qid changes. | |
results and prefs_info formats must be "trec_results" and "prefs" | |
respectively. | |
UNDEF returned if error, 0 if used cache values, 1 if new values. | |
*/ | |
typedef struct { | |
char *docno; | |
float sim; | |
long rank; | |
} DOCNO_RESULTS; | |
typedef struct { | |
char *jg; | |
char *jsg; | |
float rel_level; | |
char *docno; | |
long rank; | |
} PREFS_AND_RANKS; | |
static int form_jg_ec (const PREFS_AND_RANKS *prefs, const long num_prefs, | |
long *rank_pool_ptr, JG *jg, | |
RESULTS_PREFS *results_prefs); | |
static int add_ec_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs); | |
static int form_jg_pa (const PREFS_AND_RANKS *prefs, const long num_prefs, | |
JG *jg, RESULTS_PREFS *results_prefs); | |
static int add_transitives (PREFS_ARRAY *pa); | |
static int add_pa_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs); | |
static int mult_and_check_change (const PREFS_ARRAY *a1, const PREFS_ARRAY *a2, | |
PREFS_ARRAY *res); | |
static int form_prefs_and_ranks (const EPI*epi, | |
const TEXT_RESULTS_INFO *text_results_info, | |
const TEXT_PREFS_INFO *trec_prefs, | |
PREFS_AND_RANKS *prefs_and_ranks, | |
long *num_judged, long *num_judged_ret); | |
static void init_prefs_array (PREFS_ARRAY *pa); | |
static void init_counts_array (COUNTS_ARRAY *ca); | |
static int comp_prefs_and_ranks_jg_rel_level (); | |
static int comp_prefs_and_ranks_docno(); | |
static int comp_sim_docno (), comp_docno (), comp_results_inc_rank (); | |
static void debug_print_ec (EC *ec),debug_print_prefs_array (PREFS_ARRAY *pa), | |
debug_print_counts_array (COUNTS_ARRAY *ca), debug_print_jg (JG *jg), | |
debug_print_results_prefs (RESULTS_PREFS *rp); | |
static void debug_print_docno_results (DOCNO_RESULTS *dr, long num_results, | |
char *location); | |
static void debug_print_prefs_and_ranks (PREFS_AND_RANKS *par, long num_prefs, | |
char *location); | |
/* Intermediate Temp storage. Not malloc'd and freed every query just | |
for memory management efficiency (avoids fragmentations and | |
thus effects on caching) */ | |
/* Temp Structure for mapping results docno to results rank */ | |
/* Current cached query */ | |
static char *current_query = "no query"; | |
static long max_current_query = 0; | |
/* Space reserved for cached returned values */ | |
static long num_judged_ret; | |
static long num_judged; | |
static long num_jgs; | |
static JG *jgs; | |
static long max_num_jgs = 0; | |
static long *rank_pool; | |
static long max_rank_pool = 0; | |
static EC *ec_pool; | |
static long max_ec_pool = 0; | |
static unsigned short *ca_pool; | |
static long max_ca_pool = 0; | |
static unsigned short **ca_ptr_pool; | |
static long max_ca_ptr_pool = 0; | |
static unsigned char *pa_pool; | |
static long max_pa_pool = 0; | |
static unsigned char **pa_ptr_pool; | |
static long max_pa_ptr_pool = 0; | |
static float *rel_pool; | |
static long max_rel_pool = 0; | |
/* Space reserved for intermediate values */ | |
static PREFS_AND_RANKS *prefs_and_ranks; | |
static long max_prefs_and_ranks = 0; | |
static DOCNO_RESULTS *docno_results; | |
static long max_docno_results = 0; | |
static unsigned char *temp_pa_pool; | |
static long max_temp_pa_pool; | |
static unsigned char **temp_pa_ptr_pool; | |
static long max_temp_pa_ptr_pool; | |
static long saved_num_judged = 0; | |
int | |
form_prefs_counts (const EPI *epi, const REL_INFO *rel_info, | |
const RESULTS *results, RESULTS_PREFS *results_prefs) | |
{ | |
long i; | |
char *jgid, *jsgid; | |
long jg_ind; | |
long num_jgs_with_subgroups; | |
float rel_level; | |
EC * ec_pool_ptr; | |
float *rel_pool_ptr; | |
long *rank_pool_ptr; | |
unsigned char *pa_pool_ptr; | |
unsigned char **pa_ptr_pool_ptr; | |
long start_jg; | |
long num_rel_level; | |
long num_sub_group; | |
TEXT_RESULTS_INFO *text_results_info; | |
TEXT_PREFS_INFO *trec_prefs; | |
if (epi->debug_level >= 3) | |
printf ("Debug: Form_prefs starting query '%s'\n", results->qid); | |
if (0 == strcmp (current_query, results->qid)) { | |
/* Have done this query already. Return cached values */ | |
results_prefs->num_jgs = num_jgs; | |
results_prefs->jgs = jgs; | |
results_prefs->num_judged = num_judged; | |
results_prefs->num_judged_ret = num_judged_ret; | |
results_prefs->pref_counts = (COUNTS_ARRAY) {ca_pool, | |
ca_ptr_pool, | |
num_judged}; | |
if (epi->debug_level >= 3) | |
printf ("Returned Cached Form_prefs\n"); | |
return (0); | |
} | |
/* Check that format type of result info and rel info are correct */ | |
if ((strcmp ("prefs", rel_info->rel_format) && | |
strcmp ("qrels_prefs", rel_info->rel_format)) || | |
strcmp ("trec_results", results->ret_format)) { | |
fprintf (stderr, "trec_eval.form_prefs_info: prefs_info format not (prefs or qrels_prefs) or results format not trec_results\n"); | |
return (UNDEF); | |
} | |
/* Make sure enough space for query and save copy */ | |
i = strlen(results->qid)+1; | |
if (NULL == (current_query = | |
te_chk_and_malloc (current_query, &max_current_query, | |
i, sizeof (char)))) | |
return (UNDEF); | |
(void) strncpy (current_query, results->qid, i); | |
text_results_info = (TEXT_RESULTS_INFO *) results->q_results; | |
trec_prefs = (TEXT_PREFS_INFO *) rel_info->q_rel_info; | |
/* Reserve space for returned and intermediate values, if needed */ | |
if (NULL == (prefs_and_ranks = | |
te_chk_and_malloc (prefs_and_ranks, &max_prefs_and_ranks, | |
trec_prefs->num_text_prefs, | |
sizeof (PREFS_AND_RANKS))) || | |
NULL == (ec_pool = | |
te_chk_and_malloc (ec_pool, &max_ec_pool, | |
trec_prefs->num_text_prefs, sizeof (EC))) || | |
NULL == (rank_pool = | |
te_chk_and_malloc (rank_pool, &max_rank_pool, | |
trec_prefs->num_text_prefs, sizeof (long)))) | |
return (UNDEF); | |
/* get prefs_and_ranks from results and prefs. Will be sorted by | |
jg, jsg, rel_level, rank. Set num_judged, num_judged_ret */ | |
if (UNDEF == form_prefs_and_ranks (epi, text_results_info, trec_prefs, | |
prefs_and_ranks, &num_judged, | |
&num_judged_ret)) | |
return (UNDEF); | |
/* Go through prefs_and ranks, count and reserve space for judgment groups. | |
Also count number of JGs that have subgroups and will need preference | |
arrays. */ | |
jgid = ""; jsgid = ""; | |
num_jgs = 0; num_jgs_with_subgroups = 0; | |
for (i = 0; i < trec_prefs->num_text_prefs; i++) { | |
if (strcmp (jgid, prefs_and_ranks[i].jg)) { | |
/* New JG */ | |
jgid = prefs_and_ranks[i].jg; | |
jsgid = prefs_and_ranks[i].jsg; | |
num_jgs++; | |
} | |
else if (strcmp (jsgid, prefs_and_ranks[i].jsg)) { | |
jsgid = prefs_and_ranks[i].jsg; | |
num_jgs_with_subgroups++; | |
} | |
} | |
/* Reserve pool space for JGs, and final pref_counts */ | |
if (NULL == (jgs = | |
te_chk_and_malloc (jgs, &max_num_jgs, num_jgs, sizeof (JG))) || | |
NULL == (ca_pool = | |
te_chk_and_malloc (ca_pool, &max_ca_pool, | |
num_judged * num_judged, | |
sizeof (unsigned short))) || | |
NULL == (ca_ptr_pool = | |
te_chk_and_malloc (ca_ptr_pool, &max_ca_ptr_pool, num_judged, | |
sizeof (unsigned short *)))) | |
return (UNDEF); | |
if (num_jgs_with_subgroups) { | |
/* Reserve pool space for preference arrays, and rel_level arrays */ | |
if (NULL == (rel_pool = | |
te_chk_and_malloc (rel_pool, &max_rel_pool, | |
num_judged * num_jgs_with_subgroups, | |
sizeof (float))) || | |
NULL == (pa_pool = | |
te_chk_and_malloc (pa_pool, &max_pa_pool, | |
num_judged * num_judged * num_jgs_with_subgroups, | |
sizeof (unsigned char))) || | |
NULL == (pa_ptr_pool = | |
te_chk_and_malloc (pa_ptr_pool, &max_pa_ptr_pool, | |
num_judged * num_jgs_with_subgroups, | |
sizeof (unsigned char *)))) | |
return (UNDEF); | |
} | |
ec_pool_ptr = ec_pool; | |
rel_pool_ptr = rel_pool; | |
rank_pool_ptr = rank_pool; | |
pa_pool_ptr = pa_pool; | |
pa_ptr_pool_ptr = pa_ptr_pool; | |
/* setup returned results_prefs so its summary values can be filled in */ | |
results_prefs->num_jgs = num_jgs; | |
results_prefs->jgs = jgs; | |
results_prefs->num_judged = num_judged; | |
results_prefs->num_judged_ret = num_judged_ret; | |
results_prefs->pref_counts = (COUNTS_ARRAY) {ca_pool, ca_ptr_pool, | |
num_judged}; | |
init_counts_array (&results_prefs->pref_counts); | |
/* Go through prefs_and_ranks, determine and construct appropriate JG | |
preference format. Preferences are counted and add to summary values | |
as each JG is handled. */ | |
jg_ind = 0; | |
start_jg = 0; | |
num_rel_level = 0; | |
num_sub_group = 0; | |
rel_level = -3.0; /* Illegal rel_level */ | |
jgid = prefs_and_ranks[0].jg; | |
jsgid = ""; | |
for (i = 0; i < trec_prefs->num_text_prefs; i++) { | |
if (strcmp (jgid, prefs_and_ranks[i].jg)) { | |
/* New judgment group. Form previous JG and initialize coounts | |
for new JG */ | |
if (num_sub_group > 1) { | |
/* Preference array JG */ | |
jgs[jg_ind].num_ecs = 0; /* Indicator thet prefs_array used */ | |
jgs[jg_ind].prefs_array = (PREFS_ARRAY) {pa_pool_ptr, | |
pa_ptr_pool_ptr, | |
num_judged}; | |
pa_pool_ptr += num_judged * num_judged; | |
pa_ptr_pool_ptr += num_judged; | |
jgs[jg_ind].rel_array = rel_pool_ptr; | |
rel_pool_ptr += num_judged; | |
if (UNDEF == form_jg_pa (&prefs_and_ranks[start_jg], | |
i - start_jg, | |
&jgs[jg_ind], | |
results_prefs)) | |
return (UNDEF); | |
} | |
else { | |
/* EC JG */ | |
jgs[jg_ind].ecs = ec_pool_ptr; | |
ec_pool_ptr += num_rel_level; | |
jgs[jg_ind].num_ecs = num_rel_level; | |
if (UNDEF == form_jg_ec (&prefs_and_ranks[start_jg], | |
i - start_jg, | |
rank_pool_ptr, | |
&jgs[jg_ind], | |
results_prefs)) | |
return (UNDEF); | |
rank_pool_ptr += i - start_jg; | |
} | |
jgid = prefs_and_ranks[i].jg; | |
jg_ind++; | |
jsgid = ""; | |
start_jg = i; | |
num_sub_group = 0; | |
num_rel_level = 0; | |
rel_level = -3.0; /* Illegal rel_level */ | |
} | |
if (strcmp (jsgid, prefs_and_ranks[i].jsg)) { | |
num_sub_group++; | |
jsgid = prefs_and_ranks[i].jsg; | |
} | |
if (rel_level != prefs_and_ranks[i].rel_level) { | |
num_rel_level++; | |
rel_level = prefs_and_ranks[i].rel_level; | |
} | |
} | |
/* Form last JG */ | |
if (num_sub_group > 1) { | |
/* Preference array JG */ | |
jgs[jg_ind].num_ecs = 0; /* Indicator thet prefs_array used */ | |
jgs[jg_ind].prefs_array = (PREFS_ARRAY) {pa_pool_ptr, | |
pa_ptr_pool_ptr, | |
num_judged}; | |
pa_pool_ptr += num_judged * num_judged; | |
pa_ptr_pool_ptr += num_judged; | |
jgs[jg_ind].rel_array = rel_pool_ptr; | |
rel_pool_ptr += num_judged; | |
if (UNDEF == form_jg_pa (&prefs_and_ranks[start_jg], | |
i - start_jg, | |
&jgs[jg_ind], | |
results_prefs)) | |
return (UNDEF); | |
} | |
else { | |
/* EC JG */ | |
jgs[jg_ind].ecs = ec_pool_ptr; | |
ec_pool_ptr += num_rel_level; | |
jgs[jg_ind].num_ecs = num_rel_level; | |
if (UNDEF == form_jg_ec (&prefs_and_ranks[start_jg], | |
i - start_jg, | |
rank_pool_ptr, | |
&jgs[jg_ind], | |
results_prefs)) | |
return (UNDEF); | |
} | |
if (epi->debug_level >= 3) | |
debug_print_results_prefs (results_prefs); | |
return (1); | |
} | |
static int | |
form_jg_ec (const PREFS_AND_RANKS *prefs, const long num_prefs, | |
long *rank_pool_ptr, JG *jg, RESULTS_PREFS *results_prefs) | |
{ | |
EC *ec_ptr =jg->ecs; | |
long *rank_ptr = rank_pool_ptr; | |
long i; | |
float rel_level; | |
/* Fill in prefs array with all known info from prefs */ | |
/* prefs is sorted by jsg, then rel_level, then rank */ | |
rel_level = prefs[0].rel_level; | |
*ec_ptr = (EC) {rel_level, 0, rank_ptr}; | |
for (i = 0; i < num_prefs; i++) { | |
if (prefs[i].rel_level != rel_level) { | |
/* new equivalence class */ | |
rel_level = prefs[i].rel_level; | |
rank_ptr += ec_ptr->num_in_ec; | |
ec_ptr++; | |
*ec_ptr = (EC) {prefs[i].rel_level, 0, rank_ptr}; | |
} | |
ec_ptr->docid_ranks[ec_ptr->num_in_ec++] = prefs[i].rank; | |
} | |
/* Add counts of preference fulfilled and possible to jg and | |
summary counts */ | |
if (UNDEF == add_ec_pref_to_jg (jg, results_prefs)) | |
return (UNDEF); | |
return (1); | |
} | |
/* Add counts of preference fulfilled and possible to jg for EC pref info */ | |
static int | |
add_ec_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs) | |
{ | |
long num_judged_ret = results_prefs->num_judged_ret; | |
unsigned short **pc = results_prefs->pref_counts.array; | |
long *ptr1, *ptr2; | |
long ec1, ec2; | |
jg->num_prefs_fulfilled_ret = 0; | |
jg->num_prefs_possible_ret = 0; | |
jg->num_prefs_fulfilled_imp = 0; | |
jg->num_prefs_possible_imp = 0; | |
jg->num_prefs_possible_notoccur = 0; | |
jg->num_nonrel = 0; | |
jg->num_nonrel_ret = 0; | |
jg->num_rel = 0; | |
jg->num_rel_ret = 0; | |
/* Go through all ecs counting preferences, and setting up | |
prefs_count */ | |
for (ec1 = 0; ec1 < jg->num_ecs; ec1++) { | |
/* Count num rel and ret */ | |
for (ptr1 = jg->ecs[ec1].docid_ranks; | |
ptr1 < &jg->ecs[ec1].docid_ranks[jg->ecs[ec1].num_in_ec]; | |
ptr1++) { | |
if (*ptr1 >= num_judged_ret) | |
break; | |
} | |
if (jg->ecs[ec1].rel_level > 0.0) { | |
jg->num_rel_ret += ptr1 - jg->ecs[ec1].docid_ranks; | |
jg->num_rel += jg->ecs[ec1].num_in_ec; | |
} | |
else { | |
jg->num_nonrel_ret += ptr1 - jg->ecs[ec1].docid_ranks; | |
jg->num_nonrel += jg->ecs[ec1].num_in_ec; | |
} | |
/* Count prefs */ | |
for (ec2 = ec1 + 1; ec2 < jg->num_ecs; ec2++) { | |
for (ptr1 = jg->ecs[ec1].docid_ranks; | |
ptr1 < &jg->ecs[ec1].docid_ranks[jg->ecs[ec1].num_in_ec]; | |
ptr1++) { | |
for (ptr2 = jg->ecs[ec2].docid_ranks; | |
ptr2 < &jg->ecs[ec2].docid_ranks[jg->ecs[ec2].num_in_ec]; | |
ptr2++) { | |
/* Add pref to summary info */ | |
pc[*ptr1][*ptr2]++; | |
/* check for inconsistency: same doc in multiple ec */ | |
if (*ptr1 == *ptr2) { | |
fprintf (stderr, | |
"trec_eval.form_prefs_counts: Internal docid %ld occurs with different rel_level in same jsg\n", *ptr1); | |
return (UNDEF); | |
/* need to check this in pa also? */ | |
} | |
/* Add count to appropriate jg_num* */ | |
if (*ptr1 < *ptr2) { | |
/* judgment fulfilled */ | |
if (*ptr2 < num_judged_ret) | |
jg->num_prefs_fulfilled_ret++; | |
else if (*ptr1 < num_judged_ret) | |
jg->num_prefs_fulfilled_imp++; | |
else | |
jg->num_prefs_possible_notoccur++; | |
} | |
else { | |
if (*ptr1 < num_judged_ret) | |
jg->num_prefs_possible_ret++; | |
else if (*ptr2 < num_judged_ret) | |
jg->num_prefs_possible_imp++; | |
else | |
jg->num_prefs_possible_notoccur++; | |
} | |
} | |
} | |
} | |
} | |
jg->num_prefs_possible_ret += jg->num_prefs_fulfilled_ret; | |
jg->num_prefs_possible_imp += jg->num_prefs_fulfilled_imp; | |
return (1); | |
} | |
static int | |
form_jg_pa (const PREFS_AND_RANKS *prefs, const long num_prefs, | |
JG *jg, RESULTS_PREFS *results_prefs) | |
{ | |
long i,j; | |
PREFS_ARRAY *pa = &jg->prefs_array; | |
/* Initialize and zero prefs_array */ | |
init_prefs_array(pa); | |
/* Initialize and set rel_level to -1. Will check for inconsistencies | |
(rel_level for some docid 0.0 and some > 0.0) as prefs handled */ | |
for (i = 0; i < pa->num_judged; i++) { | |
jg->rel_array[i] = -1.0; | |
} | |
/* Fill in prefs array with all known info from prefs */ | |
/* prefs is sorted by jsg, then rel_level, then rank */ | |
for (i = 0; i < num_prefs; i++) { | |
/* check for consistency and add rel_level info */ | |
if ((jg->rel_array[prefs[i].rank] > 0.0 && | |
prefs[i].rel_level == 0.0) || | |
(jg->rel_array[prefs[i].rank] == 0.0 && | |
prefs[i].rel_level > 0.0)) { | |
fprintf (stderr, | |
"trec_eval.form_prefs_counts: doc '%s' has both 0 and non-0 rel_level assigned\n", | |
prefs[i].docno); | |
return (UNDEF); | |
} | |
jg->rel_array[prefs[i].rank] = prefs[i].rel_level; | |
/* Add preference for i to all other entries in this JSG with lower | |
rel_levels */ | |
j = i+1; | |
/* Just skip over all docs at same rel_level */ | |
while (j < num_prefs && | |
0 == strcmp (prefs[i].jsg, prefs[j].jsg) && | |
prefs[i].rel_level == prefs[j].rel_level) | |
j++; | |
/* Rest of JSG has lower rel_level */ | |
while (j < num_prefs && | |
0 == strcmp (prefs[i].jsg, prefs[j].jsg)) { | |
pa->array[prefs[i].rank][prefs[j].rank] = 1; | |
j++; | |
} | |
} | |
/* Add transitive preferences to pa */ | |
if (UNDEF == add_transitives (pa)) | |
return (UNDEF); | |
/* Add counts of preference fulfilled and possible to jg and | |
summary counts */ | |
if (UNDEF == add_pa_pref_to_jg (jg, results_prefs)) | |
return (UNDEF); | |
return (1); | |
} | |
static int | |
add_transitives(PREFS_ARRAY *pa) | |
{ | |
PREFS_ARRAY m1; | |
PREFS_ARRAY m2; | |
long i, j; | |
PREFS_ARRAY *array_in, *array_out, *temp; | |
/* Need two temporary arrays of same size as pa. Reserve space and | |
zero out arrays */ | |
if (NULL == (temp_pa_pool = | |
te_chk_and_malloc (temp_pa_pool, &max_temp_pa_pool, | |
2 * pa->num_judged * pa->num_judged, | |
sizeof (unsigned char))) || | |
NULL == (temp_pa_ptr_pool = | |
te_chk_and_malloc (temp_pa_ptr_pool, &max_temp_pa_ptr_pool, | |
2 * pa->num_judged, | |
sizeof (unsigned char *)))) | |
return (UNDEF); | |
m1 = (PREFS_ARRAY) {temp_pa_pool, temp_pa_ptr_pool, pa->num_judged}; | |
m2 = (PREFS_ARRAY) {temp_pa_pool + pa->num_judged * pa->num_judged, | |
temp_pa_ptr_pool + pa->num_judged, | |
pa->num_judged}; | |
if (pa->num_judged != saved_num_judged) { | |
/* if new size array, must reset row pointers */ | |
saved_num_judged = pa->num_judged; | |
for (i = 0; i < pa->num_judged; i++) { | |
m1.array[i] = m1.full_array + i * pa->num_judged; | |
m2.array[i] = m2.full_array + i * pa->num_judged; | |
} | |
} | |
/* Add diagonal all ones in pa */ | |
for (i = 0; i < pa->num_judged; i++) { | |
pa->array[i][i] = 1; | |
} | |
/* Multiply prefs_array by itself until there are no changes */ | |
array_in = pa; | |
array_out = &m1; | |
if (mult_and_check_change (pa, array_in, array_out)) { | |
array_in = array_out; | |
array_out = &m2; | |
while (mult_and_check_change (pa, array_in, array_out)) { | |
temp = array_in; | |
array_in = array_out; | |
array_out = temp; | |
} | |
/* There were changes, now set pa values to those of array_out */ | |
for (i = 0; i < pa->num_judged; i++) { | |
for (j = 0; j <pa-> num_judged; j++) { | |
pa->array[i][j] = array_out->array[i][j]; | |
} | |
} | |
} | |
/* Reset all diagonals to 0 in pa */ | |
for (i = 0; i < pa->num_judged; i++) { | |
pa->array[i][i] = 0; | |
} | |
/* Check for inconsistencies */ | |
for (i = 0; i < pa->num_judged; i++) { | |
for (j = 0; j <pa-> num_judged; j++) { | |
if (i != j && pa->array[i][j] && pa->array[j][i]) { | |
fprintf (stderr, "trec_eval.form_prefs_counts: Pref inconsistency found\n internal rank %ld and internal rank %ld are conflicted\n", i, j); | |
abort(); | |
return (-1); | |
} | |
} | |
} | |
return (1); | |
} | |
/* Add counts of preference fulfilled and possible to jg */ | |
static int | |
add_pa_pref_to_jg (JG *jg, RESULTS_PREFS *results_prefs) | |
{ | |
unsigned char **a = jg->prefs_array.array; | |
unsigned short **c = results_prefs->pref_counts.array; | |
long num_judged = results_prefs->num_judged; | |
long num_judged_ret = results_prefs->num_judged_ret; | |
long i,j; | |
jg->num_prefs_fulfilled_ret = 0; | |
jg->num_prefs_possible_ret = 0; | |
jg->num_prefs_fulfilled_imp = 0; | |
jg->num_prefs_possible_imp = 0; | |
jg->num_prefs_possible_notoccur = 0; | |
jg->num_nonrel = 0; | |
jg->num_nonrel_ret = 0; | |
jg->num_rel = 0; | |
jg->num_rel_ret = 0; | |
for (i = 0; i < num_judged_ret; i++) { | |
if (jg->rel_array[i] > 0.0) | |
jg->num_rel_ret++; | |
else if (jg->rel_array[i] == 0.0) | |
jg->num_nonrel_ret++; | |
} | |
jg->num_rel = jg->num_rel_ret; | |
jg->num_nonrel = jg->num_nonrel_ret; | |
for (i = num_judged_ret; i < num_judged; i++) { | |
if (jg->rel_array[i] > 0.0) | |
jg->num_rel++; | |
else if (jg->rel_array[i] == 0.0) | |
jg->num_nonrel++; | |
} | |
for (i = 0; i < num_judged_ret; i++) { | |
for (j = 0; j < i; j++) { | |
if (a[i][j]) { | |
/* Pref not fulfilled. Area A2 (see comment at top) */ | |
c[i][j]++; | |
jg->num_prefs_possible_ret++; | |
} | |
} | |
for (j = i+1; j < num_judged_ret; j++) { | |
if (a[i][j]) { | |
/* Pref fulfilled. Area A1 (see comment at top) */ | |
c[i][j]++; | |
jg->num_prefs_fulfilled_ret++; | |
} | |
} | |
for (j = num_judged_ret; j < num_judged; j++) { | |
if (a[i][j]) { | |
/* Pref fulfilled implied. Area A3 (see comment at top) */ | |
c[i][j]++; | |
jg->num_prefs_fulfilled_imp++; | |
} | |
} | |
} | |
for (i = num_judged_ret; i < num_judged; i++) { | |
for (j = 0; j < num_judged_ret; j++) { | |
if (a[i][j]) { | |
/* Pref not fulfilled implied. Area A4 (see comment at top) */ | |
c[i][j]++; | |
jg->num_prefs_possible_imp++; | |
} | |
} | |
for (j = num_judged_ret; j < num_judged; j++) { | |
if (a[i][j]) { | |
/* Pref not occur at all. Area A5 (see comment at top) */ | |
c[i][j]++; | |
jg->num_prefs_possible_notoccur++; | |
} | |
} | |
} | |
jg->num_prefs_possible_ret += jg->num_prefs_fulfilled_ret; | |
jg->num_prefs_possible_imp += jg->num_prefs_fulfilled_imp; | |
return (1); | |
} | |
/* Multiply array a1 * array a2 and put result in array res. | |
Return (1) if array res is changed from array a2 after multiplication | |
else return 0 */ | |
static int | |
mult_and_check_change (const PREFS_ARRAY *a1, const PREFS_ARRAY *a2, PREFS_ARRAY *res) | |
{ | |
int change = 0; | |
long i, j, k; | |
for (i = 0; i < a1->num_judged; i++) { | |
for (j = 0; j < a1->num_judged; j++) { | |
res->array[i][j] = 0; | |
for (k = 0; k < a1->num_judged; k++) { | |
if (a1->array[i][k] && a2->array[k][j]) { | |
res->array[i][j] = 1; | |
break; | |
} | |
} | |
if (a2->array[i][j] != res->array[i][j]) change = 1; | |
} | |
} | |
return (change); | |
} | |
static int form_prefs_and_ranks (const EPI*epi, | |
const TEXT_RESULTS_INFO *text_results_info, | |
const TEXT_PREFS_INFO *trec_prefs, | |
PREFS_AND_RANKS *prefs_and_ranks, | |
long *num_judged, long *num_judged_ret) | |
{ | |
long lnum_judged_ret; /* local num_judged_ret */ | |
long next_unretrieved_rank; | |
long i; | |
long num_results; /* Current number of results (changes as docs | |
thrown away from docno_results) */ | |
long num_prefs = trec_prefs->num_text_prefs; | |
PREFS_AND_RANKS *ranks_ptr, *end_ranks, *start_ptr; | |
/* Copy docno results and add ranks */ | |
num_results = text_results_info->num_text_results; | |
if (NULL == (docno_results = | |
te_chk_and_malloc (docno_results, &max_docno_results, | |
num_results, sizeof (DOCNO_RESULTS)))) | |
return (UNDEF); | |
for (i = 0; i < num_results; i++) { | |
docno_results[i].docno = text_results_info->text_results[i].docno; | |
docno_results[i].sim = text_results_info->text_results[i].sim; | |
} | |
/* Sort results by sim, breaking ties lexicographically using docno */ | |
qsort ((char *) docno_results, | |
(int) num_results, | |
sizeof (DOCNO_RESULTS), | |
comp_sim_docno); | |
if (epi->debug_level >= 5) | |
debug_print_docno_results (docno_results, num_results, | |
"After input, before ranks"); | |
/* Only look at epi->max_num_docs_per_topic (not normally an issue) */ | |
if (num_results > epi->max_num_docs_per_topic) | |
num_results = epi->max_num_docs_per_topic; | |
/* Add ranks to docno_results (starting at 1) */ | |
for (i = 0; i < num_results; i++) { | |
docno_results[i].rank = i+1; | |
} | |
/* Sort docno_results by increasing docno */ | |
qsort ((char *) docno_results, | |
(int) num_results, | |
sizeof (DOCNO_RESULTS), | |
comp_docno); | |
/* Error checking for duplicates */ | |
for (i = 1; i < num_results; i++) { | |
if (0 == strcmp (docno_results[i].docno, | |
docno_results[i-1].docno)) { | |
fprintf (stderr, "trec_eval.form_prefs_counts: duplicate docs %s", | |
docno_results[i].docno); | |
return (UNDEF); | |
} | |
} | |
if (epi->debug_level >= 5) | |
debug_print_docno_results (docno_results, num_results, | |
"After -M, ranks"); | |
/* Copy trec_prefs - sort by docno. Space already reserved */ | |
for (i = 0; i < num_prefs; i++) { | |
prefs_and_ranks[i].jg = trec_prefs->text_prefs[i].jg; | |
prefs_and_ranks[i].jsg = trec_prefs->text_prefs[i].jsg; | |
prefs_and_ranks[i].rel_level = trec_prefs->text_prefs[i].rel_level; | |
prefs_and_ranks[i].docno = trec_prefs->text_prefs[i].docno; | |
} | |
qsort ((char *) prefs_and_ranks, | |
(int) num_prefs, | |
sizeof (PREFS_AND_RANKS), | |
comp_prefs_and_ranks_docno); | |
if (epi->debug_level >= 5) | |
debug_print_prefs_and_ranks (prefs_and_ranks, num_prefs, | |
"Input, before ranks"); | |
/* Go through docno_results and prefs_and_ranks in parallel (both sorted | |
by docno) and assign ranks of -1 to those docs in docno_results that | |
are not in prefs_and_ranks */ | |
ranks_ptr = prefs_and_ranks; | |
end_ranks = &prefs_and_ranks[num_prefs]; | |
for (i = 0; i < num_results && ranks_ptr < end_ranks; i++) { | |
while (ranks_ptr < end_ranks && | |
strcmp (ranks_ptr->docno, docno_results[i].docno) < 0) | |
ranks_ptr++; | |
if (ranks_ptr < end_ranks && | |
strcmp (ranks_ptr->docno, docno_results[i].docno) == 0) { | |
do { | |
ranks_ptr++; | |
} while (ranks_ptr < end_ranks && | |
strcmp (ranks_ptr->docno, docno_results[i].docno) == 0); | |
} | |
else | |
/* dpcno_results[i] not judged */ | |
docno_results[i].rank = -1; | |
} | |
/* sort docno_results[0..i] by increasing rank */ | |
num_results = i; | |
qsort ((char *) docno_results, | |
(int) num_results, | |
sizeof (DOCNO_RESULTS), | |
comp_results_inc_rank); | |
if (epi->debug_level >= 5) | |
debug_print_docno_results (docno_results, num_results, | |
"After marking not judged"); | |
/* Assign new docid ranks starting at 0 for only judged docs */ | |
lnum_judged_ret = 0; | |
for (i = 0; i < num_results; i ++) { | |
if (docno_results[i].rank > -1) { | |
docno_results[lnum_judged_ret].docno = docno_results[i].docno; | |
docno_results[lnum_judged_ret].rank = lnum_judged_ret; | |
lnum_judged_ret++; | |
} | |
} | |
num_results = lnum_judged_ret; | |
/* Sort docno_results by increasing docno */ | |
qsort ((char *) docno_results, | |
(int) num_results, | |
sizeof (DOCNO_RESULTS), | |
comp_docno); | |
if (epi->debug_level >= 5) | |
debug_print_docno_results (docno_results, num_results, | |
"After assigning docid_ranks"); | |
/* Go through reduced docno_results and prefs_and_ranks in parallel and | |
assign ranks in prefs_and_ranks from docno_results. Note all docnos | |
in docno_results now guaranteed to be in prefs_and_ranks */ | |
ranks_ptr = prefs_and_ranks; | |
end_ranks = &prefs_and_ranks[num_prefs]; | |
next_unretrieved_rank = num_results; | |
for (i = 0; i < num_results; i++) { | |
while (strcmp (ranks_ptr->docno, docno_results[i].docno) < 0) { | |
ranks_ptr->rank = next_unretrieved_rank++; | |
start_ptr = ranks_ptr++; | |
while (strcmp (ranks_ptr->docno, start_ptr->docno) == 0) { | |
ranks_ptr->rank = start_ptr->rank; | |
ranks_ptr++; | |
} | |
} | |
ranks_ptr->rank = docno_results[i].rank; | |
start_ptr = ranks_ptr++; | |
while (ranks_ptr < end_ranks && | |
strcmp (ranks_ptr->docno, start_ptr->docno) == 0) { | |
ranks_ptr->rank = start_ptr->rank; | |
ranks_ptr++; | |
} | |
} | |
while (ranks_ptr < end_ranks) { | |
ranks_ptr->rank = next_unretrieved_rank++; | |
start_ptr = ranks_ptr++; | |
while (ranks_ptr < end_ranks && | |
strcmp (ranks_ptr->docno, start_ptr->docno) == 0) { | |
ranks_ptr->rank = start_ptr->rank; | |
ranks_ptr++; | |
} | |
} | |
/* Now sort prefs_and_ranks by jg, jsg, rel_level, docid_rank */ | |
qsort ((void *) prefs_and_ranks, | |
num_prefs, | |
sizeof (PREFS_AND_RANKS), | |
comp_prefs_and_ranks_jg_rel_level); | |
if (epi->debug_level >= 4) { | |
printf ("Form_prefs: num_judged %ld, num_judged_ret %ld\n", | |
next_unretrieved_rank, num_results); | |
debug_print_prefs_and_ranks (prefs_and_ranks, num_prefs, | |
"Final prefs"); | |
} | |
*num_judged = next_unretrieved_rank; | |
*num_judged_ret = num_results; | |
return (1); | |
} | |
static void | |
init_prefs_array (PREFS_ARRAY *pa) | |
{ | |
unsigned char *row_ptr; | |
long i; | |
(void) memset ((void *) pa->full_array, 0, | |
pa->num_judged * pa->num_judged * | |
sizeof (unsigned char)); | |
row_ptr = pa->full_array; | |
for (i = 0; i < pa->num_judged; i++) { | |
pa->array[i] = row_ptr; | |
row_ptr += pa->num_judged; | |
} | |
} | |
static void | |
init_counts_array (COUNTS_ARRAY *ca) | |
{ | |
unsigned short *row_ptr; | |
long i; | |
(void) memset ((void *) ca->full_array, 0, | |
ca->num_judged * ca->num_judged * | |
sizeof (unsigned short)); | |
row_ptr = ca->full_array; | |
for (i = 0; i < ca->num_judged; i++) { | |
ca->array[i] = row_ptr; | |
row_ptr += ca->num_judged; | |
} | |
} | |
static int | |
comp_prefs_and_ranks_docno (PREFS_AND_RANKS *ptr1, PREFS_AND_RANKS *ptr2) | |
{ | |
return (strcmp (ptr1->docno, ptr2->docno)); | |
} | |
static int | |
comp_prefs_and_ranks_jg_rel_level (PREFS_AND_RANKS *ptr1, PREFS_AND_RANKS *ptr2) | |
{ | |
int jg_comp = strcmp (ptr1->jg, ptr2->jg); | |
if (jg_comp != 0) return (jg_comp); | |
jg_comp = strcmp (ptr1->jsg, ptr2->jsg); | |
if (jg_comp != 0) return (jg_comp); | |
if (ptr1->rel_level > ptr2->rel_level) return (-1); | |
if (ptr1->rel_level < ptr2->rel_level) return (1); | |
return (ptr1->rank - ptr2->rank); | |
} | |
static int | |
comp_sim_docno (ptr1, ptr2) | |
DOCNO_RESULTS *ptr1; | |
DOCNO_RESULTS *ptr2; | |
{ | |
if (ptr1->sim > ptr2->sim) | |
return (-1); | |
if (ptr1->sim < ptr2->sim) | |
return (1); | |
return (strcmp (ptr1->docno, ptr2->docno)); | |
} | |
static int | |
comp_docno (DOCNO_RESULTS *ptr1, DOCNO_RESULTS *ptr2) | |
{ | |
return (strcmp (ptr1->docno, ptr2->docno)); | |
} | |
static int | |
comp_results_inc_rank (DOCNO_RESULTS *ptr1, DOCNO_RESULTS *ptr2) | |
{ | |
return (ptr1->rank - ptr2->rank); | |
} | |
static void | |
debug_print_prefs_and_ranks (PREFS_AND_RANKS *par, long num_prefs, | |
char *location) | |
{ | |
long i; | |
printf ("Prefs_and_ranks Dump. num_pref_lines %ld, %s\n", | |
num_prefs, location); | |
for (i = 0; i < num_prefs; i++) { | |
printf (" %s\t%s\t%4.2f\t%s\t%3ld\n", | |
par[i].jg, par[i].jsg, par[i].rel_level, par[i].docno, | |
par[i].rank); | |
} | |
fflush (stdout); | |
} | |
static void | |
debug_print_docno_results (DOCNO_RESULTS *dr, long num_results, | |
char *location) | |
{ | |
long i; | |
printf ("Docno_results Dump. num_results %ld, %s\n", | |
num_results, location); | |
for (i = 0; i < num_results; i++) { | |
printf (" %s\t%4.2f\t%3ld\n", | |
dr[i].docno, dr[i].sim, dr[i].rank); | |
} | |
fflush (stdout); | |
} | |
static void | |
debug_print_ec (EC *ec) { | |
long i; | |
printf (" EC Dump. Rel_level %4.2f. Num_docid_ranks %ld", | |
ec->rel_level, ec->num_in_ec); | |
for (i = 0; i < ec->num_in_ec; i++) { | |
if (0 == (i % 10)) | |
printf ("\n "); | |
printf ("%3ld ", ec->docid_ranks[i]); | |
} | |
putchar ('\n'); | |
fflush (stdout); | |
} | |
static void | |
debug_print_prefs_array (PREFS_ARRAY *pa) { | |
long i,j; | |
printf (" Prefs_Array Dump. Num_judged %ld", pa->num_judged); | |
for (i = 0; i < pa->num_judged; i++) { | |
printf ("\n Row %3ld\n ", i); | |
for (j = 0; j < pa->num_judged; j++) { | |
if (j && 0 == (j % 20)) | |
printf (" (%ld)\n ", j); | |
printf ("%2hhd", pa->array[i][j]); | |
} | |
} | |
putchar ('\n'); | |
fflush (stdout); | |
} | |
static void | |
debug_print_counts_array (COUNTS_ARRAY *ca) { | |
long i,j; | |
printf (" Counts_Array Dump. Num_judged %ld", ca->num_judged); | |
for (i = 0; i < ca->num_judged; i++) { | |
printf ("\n Row %3ld\n ", i); | |
for (j = 0; j < ca->num_judged; j++) { | |
if (j && 0 == (j % 20)) | |
printf (" (%ld)\n ", j); | |
printf ("%2hd ", ca->array[i][j]); | |
} | |
} | |
putchar ('\n'); | |
fflush (stdout); | |
} | |
static void | |
debug_print_jg (JG *jg) { | |
long i; | |
printf (" JG Dump. Type %s\n", jg->num_ecs > 0 ? "EC":"Prefs_array"); | |
printf (" num_prefs_fulfilled_ret %ld\n", jg->num_prefs_fulfilled_ret); | |
printf (" num_prefs_possible_ret %ld\n", jg->num_prefs_possible_ret); | |
printf (" num_prefs_fulfilled_imp %ld\n", jg->num_prefs_fulfilled_imp); | |
printf (" num_prefs_possible_imp %ld\n", jg->num_prefs_possible_imp); | |
printf (" num_prefs_possible_notoccur %ld\n", jg->num_prefs_possible_notoccur); | |
printf (" num_nonrel %ld\n", jg->num_nonrel); | |
printf (" num_nonrel_ret %ld\n", jg->num_nonrel_ret); | |
printf (" num_rel %ld\n", jg->num_rel); | |
printf (" num_rel_ret %ld\n", jg->num_rel_ret); | |
if (0 == jg->num_ecs && jg->rel_array) { | |
debug_print_prefs_array (&jg->prefs_array); | |
printf (" Rel_array Dump. %ld values", | |
jg->prefs_array.num_judged); | |
for (i = 0; i < jg->prefs_array.num_judged; i++) { | |
if (0 == (i % 10)) | |
printf ("\n "); | |
printf ("%4.2f ", jg->rel_array[i]); | |
} | |
putchar ('\n'); | |
} | |
else if (0 == jg->num_ecs) | |
printf (" JG is not initialized (0 ECs and no rel_array\n"); | |
else { | |
printf (" Dump of %ld ECs within JG\n", jg->num_ecs); | |
for (i = 0; i < jg->num_ecs; i++) | |
debug_print_ec (&jg->ecs[i]); | |
} | |
fflush (stdout); | |
} | |
static void | |
debug_print_results_prefs (RESULTS_PREFS *rp) { | |
long i; | |
printf ("Results_prefs Dump. %ld Judgment Groups\n", rp->num_jgs); | |
printf (" num_judged_ret %ld, num_judged %ld\n", | |
rp->num_judged_ret, rp->num_judged); | |
for (i = 0; i < rp->num_jgs; i++) | |
debug_print_jg (&rp->jgs[i]); | |
debug_print_counts_array (&rp->pref_counts); | |
} | |
int | |
te_form_pref_counts_cleanup () | |
{ | |
if (max_current_query > 0) { | |
Free (current_query); | |
max_current_query = 0; | |
current_query = "no_query"; | |
} | |
if (max_num_jgs > 0) { | |
Free (jgs); | |
max_num_jgs = 0; | |
} | |
if (max_rank_pool > 0) { | |
Free (rank_pool); | |
max_rank_pool = 0; | |
} | |
if (max_ec_pool > 0) { | |
Free (ec_pool); | |
max_ec_pool = 0; | |
} | |
if (max_ca_pool > 0) { | |
Free (ca_pool); | |
max_ca_pool = 0; | |
} | |
if (max_ca_ptr_pool > 0) { | |
Free (ca_ptr_pool); | |
max_ca_ptr_pool = 0; | |
} | |
if (max_pa_pool > 0) { | |
Free (pa_pool); | |
max_pa_pool = 0; | |
} | |
if (max_pa_ptr_pool > 0) { | |
Free (pa_ptr_pool); | |
max_pa_ptr_pool = 0; | |
} | |
if (max_rel_pool > 0) { | |
Free (rel_pool); | |
max_rel_pool = 0; | |
} | |
if (max_prefs_and_ranks > 0) { | |
Free (prefs_and_ranks); | |
max_prefs_and_ranks = 0; | |
} | |
if (max_docno_results > 0) { | |
Free (docno_results); | |
max_docno_results = 0; | |
} | |
if (max_temp_pa_pool > 0) { | |
Free (temp_pa_pool); | |
max_temp_pa_pool = 0; | |
} | |
if (max_temp_pa_ptr_pool > 0) { | |
Free (temp_pa_ptr_pool); | |
max_temp_pa_ptr_pool = 0; | |
} | |
return (1); | |
} | |