File size: 3,978 Bytes
a5ab0b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import random
import math
import pandas as pd
import gradio as gr
import numpy as np
from typing import List
from llm import OpenAI3


llm = OpenAI3()


def _scores_recalculate(scores: List):
    scores_array = np.array(scores)
    scores_dis = abs(scores_array - scores_array.mean())
    scores_count = np.ceil(1 / (scores_dis + scores_dis.mean()) * len(scores)).astype(int)
    new_scores = []
    for i in range(len(scores)):
        new_scores.extend([scores[i]] * scores_count[i])
    return new_scores


def generate_scores_and_comments(standard_file_path, rewrite_prompt, topic, num=10):
    df = pd.read_excel(standard_file_path)
    pd.set_option('display.max_colwidth', None)
    standard_index = df.columns.to_list()
    standard_index.pop(standard_index.index('得分'))
    standard_scores = list(tuple(df['得分'].to_list()))
    scores_choice = _scores_recalculate(standard_scores)
    assessments = []
    for i in range(num):
        assessment = dict()
        scores = dict()
        stand_comment = []
        for index in standard_index:
            score = random.choice(scores_choice)
            scores[index] = score
            content = df[df['得分'] == score][index].to_string(index=False)
            stand_comment.append(content)
        mean_score = np.array(list(scores.values()))
        # scores['mean_score'] = mean_score

        assessment['scores'] = scores
        stand_comment = 'ï¼›'.join(stand_comment)
        prompt = 'f"""' + rewrite_prompt + '"""'
        r_comment = llm(eval(prompt))
        r_comment = r_comment.split('\n')[-1]
        assessment['comment'] = r_comment
        assessments.append(assessment)
    return assessments, df


def load_scores_and_comments(comments_file_path):
    df = pd.read_excel(comments_file_path)
    pd.set_option('display.max_colwidth', None)
    comments = df['comments'].to_list()
    df.pop('comments')
    scores = df.to_dict(orient='records')
    assessments = [{'scores': scores[i], 'comment': comments[i]} for i in range(len(comments))]
    return assessments


def medium_score_rewrite(standard_file, rewrite_prompt, topic, assessments):
    scores = [i['scores'] for i in assessments]
    scores = pd.DataFrame(scores)
    try:
        medium = scores.quantile(0.5)
    except Exception as e:  # incase that the values in one column are all None
        scores = scores.fillna(0)
        medium = scores.quantile(0.5)
    medium = medium.astype(int)

    stand_comment = []
    df = standard_file
    for index in medium.index:
        score = math.ceil(medium[index])
        content = df[df['得分'] == score][index].to_string(index=False)
        stand_comment.append(content)

    s_comment = ';'.join(stand_comment)
    prompt = 'f"""' + rewrite_prompt + '"""'
    r_comment = llm(eval(prompt))
    return r_comment


def quantile_summary(summary_prompt, assessments):
    if not isinstance(assessments[0], dict):
        comment = '\n'.join(assessments)
    else:
        comments = [i['comment'] for i in assessments]
        comments = pd.Series(comments)
        scores = [i['scores'] for i in assessments]
        scores = pd.DataFrame(scores)

        try:
            quartiles = scores.quantile([0.25, 0.75])
        except Exception as e:  # incase that the values in one column are all None
            scores = scores.fillna(0)
            quartiles = scores.quantile([0.25, 0.75])
        quartiles = quartiles.astype(int)
        up = (scores - quartiles.loc[0.75]) < 0
        down = (scores - quartiles.loc[0.25]) > 0
        select_index = (up & down).all(axis=1)
        if not select_index.any():
            select_index = (up & down).any(axis=1)

        if select_index.any():
            select_comments = comments[select_index].to_list()
        else:
            select_comments = comments.to_list()

        comment = '\n'.join(select_comments)

    prompt = 'f"""' + summary_prompt + '"""'
    s_comment = llm(eval(prompt))
    return s_comment