LearnAndThink / mock_data.py
weiwei1392
init
9dc1a52
raw
history blame
3.61 kB
import random
import math
import pandas as pd
import gradio as gr
import numpy as np
from typing import List
from llm import OpenAI3
llm = OpenAI3()
def _scores_recalculate(scores: List):
scores_array = np.array(scores)
scores_dis = abs(scores_array - scores_array.mean())
scores_count = np.ceil(1 / (scores_dis + scores_dis.mean()) * len(scores)).astype(int)
new_scores = []
for i in range(len(scores)):
new_scores.extend([scores[i]] * scores_count[i])
return new_scores
def generate_scores_and_comments(standard_file_path, rewrite_prompt, topic, num=10):
df = pd.read_excel(standard_file_path)
pd.set_option('display.max_colwidth', None)
standard_index = df.columns.to_list()
standard_index.pop(standard_index.index('得分'))
standard_scores = list(tuple(df['得分'].to_list()))
scores_choice = _scores_recalculate(standard_scores)
assessments = []
for i in range(num):
assessment = dict()
scores = dict()
stand_comment = []
for index in standard_index:
score = random.choice(scores_choice)
scores[index] = score
content = df[df['得分'] == score][index].to_string(index=False)
stand_comment.append(content)
mean_score = np.array(list(scores.values()))
# scores['mean_score'] = mean_score
assessment['scores'] = scores
stand_comment = 'ï¼›'.join(stand_comment)
prompt = 'f"""' + rewrite_prompt + '"""'
r_comment = llm(eval(prompt))
r_comment = r_comment.split('\n')[-1]
assessment['comment'] = r_comment
assessments.append(assessment)
return assessments, df
def medium_score_rewrite(standard_file, rewrite_prompt, topic, assessments):
scores = [i['scores'] for i in assessments]
scores = pd.DataFrame(scores)
try:
medium = scores.quantile(0.5)
except Exception as e: # incase that the values in one column are all None
scores = scores.fillna(0)
medium = scores.quantile(0.5)
medium = medium.astype(int)
stand_comment = []
df = standard_file
for index in medium.index:
score = math.ceil(medium[index])
content = df[df['得分'] == score][index].to_string(index=False)
stand_comment.append(content)
s_comment = ';'.join(stand_comment)
prompt = 'f"""' + rewrite_prompt + '"""'
r_comment = llm(eval(prompt))
return r_comment
def quantile_summary(summary_prompt, assessments):
if not isinstance(assessments[0], dict):
comment = '\n'.join(assessments)
else:
comments = [i['comment'] for i in assessments]
comments = pd.Series(comments)
scores = [i['scores'] for i in assessments]
scores = pd.DataFrame(scores)
try:
quartiles = scores.quantile([0.25, 0.75])
except Exception as e: # incase that the values in one column are all None
scores = scores.fillna(0)
quartiles = scores.quantile([0.25, 0.75])
quartiles = quartiles.astype(int)
up = (scores - quartiles.loc[0.75]) < 0
down = (scores - quartiles.loc[0.25]) > 0
select_index = (up & down).all(axis=1)
if not select_index.any():
select_index = (up & down).any(axis=1)
if select_index.any():
select_comments = comments[select_index].to_list()
else:
select_comments = comments.to_list()
comment = '\n'.join(select_comments)
prompt = 'f"""' + summary_prompt + '"""'
s_comment = llm(eval(prompt))
return s_comment