Spaces:

bhavanishankarpullela
/

CoSTA

Sleeping

File size: 1,689 Bytes

2dc7757

import csv
import string
from scipy.stats import wilcoxon
import numpy as np

def process_sentence(sentence):
    if not isinstance(sentence, str):
        return ""
    
    sentence = sentence.split('\n')[0]
    sentence = sentence.strip()
    sentence = sentence.lower()
    
    for punctuation in string.punctuation:
        sentence = sentence.replace(punctuation, "")
    sentence = sentence.strip()
    
    if sentence and sentence[-1] == '।':
        sentence = sentence[:-1]

    return sentence

# Read CSV and generate exact match scores for Prompt A
with open('MT0_xxl_results/result_vi', 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    scores_a = [1 if process_sentence(row['pred_label']) == process_sentence(row['label']) else 0 for row in reader]

# Read CSV and generate exact match scores for Prompt B
with open('MT0_xxl_results/result_vi_80p', 'r') as csvfile:
    reader = csv.DictReader(csvfile)
    scores_b = [1 if process_sentence(row['pred_label']) == process_sentence(row['label']) else 0 for row in reader]

# Count the number of 1s in each list
count_a = scores_a.count(1)
count_b = scores_b.count(1)

# Print the counts
print(f"Number of exact matches for Prompt A: {count_a}")
print(f"Number of exact matches for Prompt B: {count_b}")

# Conduct Wilcoxon Signed Rank test
w_stat, p_val = wilcoxon(scores_a, scores_b)

# Print the results
print(f"Wilcoxon Signed Rank statistic: {w_stat}")
print(f"P-value: {p_val}")

if p_val < 0.05:
    print("The difference in score distributions between the prompts is statistically significant.")
else:
    print("The difference in score distributions between the prompts is not statistically significant.")