import random import math import pandas as pd import numpy as np from typing import List from llm import OpenAI3 llm = OpenAI3() def _scores_recalculate(scores: List): scores_array = np.array(scores) scores_dis = abs(scores_array - scores_array.mean()) scores_count = np.ceil(1 / (scores_dis + scores_dis.mean()) * len(scores)).astype(int) new_scores = [] for i in range(len(scores)): new_scores.extend([scores[i]] * scores_count[i]) return new_scores def generate_scores_and_comments(standard_file_path, rewrite_prompt, topic, num=10): df = pd.read_excel(standard_file_path) pd.set_option('display.max_colwidth', None) standard_index = df.columns.to_list() standard_index.pop(standard_index.index('得分')) standard_scores = list(tuple(df['得分'].to_list())) scores_choice = _scores_recalculate(standard_scores) assessments = [] for i in range(num): assessment = dict() scores = dict() stand_comment = [] for index in standard_index: score = random.choice(scores_choice) scores[index] = score content = df[df['得分'] == score][index].to_string(index=False) stand_comment.append(content) mean_score = np.array(list(scores.values())) # scores['mean_score'] = mean_score assessment['scores'] = scores stand_comment = ';'.join(stand_comment) prompt = 'f"""' + rewrite_prompt + '"""' r_comment = llm(eval(prompt)) r_comment = r_comment.split('\n')[-1] assessment['comment'] = r_comment assessments.append(assessment) return assessments, df def load_scores_and_comments(comments_file_path): df = pd.read_excel(comments_file_path) pd.set_option('display.max_colwidth', None) comments = df['comments'].to_list() df.pop('comments') scores = df.to_dict(orient='records') assessments = [{'scores': scores[i], 'comment': comments[i]} for i in range(len(comments))] return assessments def medium_score_rewrite(standard_file, rewrite_prompt, topic, assessments): scores = [i['scores'] for i in assessments] scores = pd.DataFrame(scores) try: medium = scores.quantile(0.5) except Exception as e: # incase that the values in one column are all None scores = scores.fillna(0) medium = scores.quantile(0.5) medium = medium.astype(int) stand_comment = [] df = standard_file for index in medium.index: score = math.ceil(medium[index]) content = df[df['得分'] == score][index].to_string(index=False) stand_comment.append(content) stand_comment = ';'.join(stand_comment) prompt = 'f"""' + rewrite_prompt + '"""' r_comment = llm(eval(prompt)) return r_comment def quantile_summary(summary_prompt, assessments): if not isinstance(assessments[0], dict): comment = '\n'.join(assessments) else: comments = [i['comment'] for i in assessments] comments = pd.Series(comments) scores = [i['scores'] for i in assessments] scores = pd.DataFrame(scores) try: quartiles = scores.quantile([0.25, 0.75]) except Exception as e: # incase that the values in one column are all None scores = scores.fillna(0) quartiles = scores.quantile([0.25, 0.75]) quartiles = quartiles.astype(int) up = (scores - quartiles.loc[0.75]) < 0 down = (scores - quartiles.loc[0.25]) > 0 select_index = (up & down).all(axis=1) if not select_index.any(): select_index = (up & down).any(axis=1) if select_index.any(): select_comments = comments[select_index].to_list() else: select_comments = comments.to_list() comment = '\n'.join(select_comments) prompt = 'f"""' + summary_prompt + '"""' s_comment = llm(eval(prompt)) return s_comment