File size: 3,611 Bytes
9dc1a52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import random
import math
import pandas as pd
import gradio as gr
import numpy as np
from typing import List
from llm import OpenAI3


llm = OpenAI3()


def _scores_recalculate(scores: List):
    scores_array = np.array(scores)
    scores_dis = abs(scores_array - scores_array.mean())
    scores_count = np.ceil(1 / (scores_dis + scores_dis.mean()) * len(scores)).astype(int)
    new_scores = []
    for i in range(len(scores)):
        new_scores.extend([scores[i]] * scores_count[i])
    return new_scores


def generate_scores_and_comments(standard_file_path, rewrite_prompt, topic, num=10):
    df = pd.read_excel(standard_file_path)
    pd.set_option('display.max_colwidth', None)
    standard_index = df.columns.to_list()
    standard_index.pop(standard_index.index('得分'))
    standard_scores = list(tuple(df['得分'].to_list()))
    scores_choice = _scores_recalculate(standard_scores)
    assessments = []
    for i in range(num):
        assessment = dict()
        scores = dict()
        stand_comment = []
        for index in standard_index:
            score = random.choice(scores_choice)
            scores[index] = score
            content = df[df['得分'] == score][index].to_string(index=False)
            stand_comment.append(content)
        mean_score = np.array(list(scores.values()))
        # scores['mean_score'] = mean_score

        assessment['scores'] = scores
        stand_comment = 'ï¼›'.join(stand_comment)
        prompt = 'f"""' + rewrite_prompt + '"""'
        r_comment = llm(eval(prompt))
        r_comment = r_comment.split('\n')[-1]
        assessment['comment'] = r_comment
        assessments.append(assessment)
    return assessments, df


def medium_score_rewrite(standard_file, rewrite_prompt, topic, assessments):
    scores = [i['scores'] for i in assessments]
    scores = pd.DataFrame(scores)
    try:
        medium = scores.quantile(0.5)
    except Exception as e:  # incase that the values in one column are all None
        scores = scores.fillna(0)
        medium = scores.quantile(0.5)
    medium = medium.astype(int)

    stand_comment = []
    df = standard_file
    for index in medium.index:
        score = math.ceil(medium[index])
        content = df[df['得分'] == score][index].to_string(index=False)
        stand_comment.append(content)

    s_comment = ';'.join(stand_comment)
    prompt = 'f"""' + rewrite_prompt + '"""'
    r_comment = llm(eval(prompt))
    return r_comment


def quantile_summary(summary_prompt, assessments):
    if not isinstance(assessments[0], dict):
        comment = '\n'.join(assessments)
    else:
        comments = [i['comment'] for i in assessments]
        comments = pd.Series(comments)
        scores = [i['scores'] for i in assessments]
        scores = pd.DataFrame(scores)

        try:
            quartiles = scores.quantile([0.25, 0.75])
        except Exception as e:  # incase that the values in one column are all None
            scores = scores.fillna(0)
            quartiles = scores.quantile([0.25, 0.75])
        quartiles = quartiles.astype(int)
        up = (scores - quartiles.loc[0.75]) < 0
        down = (scores - quartiles.loc[0.25]) > 0
        select_index = (up & down).all(axis=1)
        if not select_index.any():
            select_index = (up & down).any(axis=1)

        if select_index.any():
            select_comments = comments[select_index].to_list()
        else:
            select_comments = comments.to_list()

        comment = '\n'.join(select_comments)

    prompt = 'f"""' + summary_prompt + '"""'
    s_comment = llm(eval(prompt))
    return s_comment