Meteor / eval /mathvista /utilities.py
BK-Lee's picture
v1
6957169
raw
history blame
6.07 kB
import os
import cv2
import json
import time
import pickle
import openai
import re
from word2number import w2n
def create_dir(output_dir):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
def read_csv(file):
data = []
with open(file, 'r') as f:
for line in f:
data.append(line.strip())
return data
def read_pandas_csv(csv_path):
# read a pandas csv sheet
import pandas as pd
df = pd.read_csv(csv_path)
return df
def read_json(path):
with open(path, 'r', encoding='utf-8') as f:
return json.load(f)
def read_jsonl(file):
with open(file, 'r') as f:
data = [json.loads(line) for line in f]
return data
def read_pickle(path):
with open(path, 'rb') as f:
return pickle.load(f)
def save_json(data, path):
with open(path, 'w') as f:
json.dump(data, f, indent=4)
def save_array_img(path, image):
cv2.imwrite(path, image)
def contains_digit(text):
# check if text contains a digit
if any(char.isdigit() for char in text):
return True
return False
def contains_number_word(text):
# check if text contains a number word
ignore_words = ["a", "an", "point"]
words = re.findall(r'\b\w+\b', text) # This regex pattern matches any word in the text
for word in words:
if word in ignore_words:
continue
try:
w2n.word_to_num(word)
return True # If the word can be converted to a number, return True
except ValueError:
continue # If the word can't be converted to a number, continue with the next word
# check if text contains a digit
if any(char.isdigit() for char in text):
return True
return False # If none of the words could be converted to a number, return False
def contains_quantity_word(text, special_keep_words=[]):
# check if text contains a quantity word
quantity_words = ["most", "least", "fewest"
"more", "less", "fewer",
"largest", "smallest", "greatest",
"larger", "smaller", "greater",
"highest", "lowest", "higher", "lower",
"increase", "decrease",
"minimum", "maximum", "max", "min",
"mean", "average", "median",
"total", "sum", "add", "subtract",
"difference", "quotient", "gap",
"half", "double", "twice", "triple",
"square", "cube", "root",
"approximate", "approximation",
"triangle", "rectangle", "circle", "square", "cube", "sphere", "cylinder", "cone", "pyramid",
"multiply", "divide",
"percentage", "percent", "ratio", "proportion", "fraction", "rate",
]
quantity_words += special_keep_words # dataset specific words
words = re.findall(r'\b\w+\b', text) # This regex pattern matches any word in the text
if any(word in quantity_words for word in words):
return True
return False # If none of the words could be converted to a number, return False
def is_bool_word(text):
if text in ["Yes", "No", "True", "False",
"yes", "no", "true", "false",
"YES", "NO", "TRUE", "FALSE"]:
return True
return False
def is_digit_string(text):
# remove ".0000"
text = text.strip()
text = re.sub(r'\.0+$', '', text)
try:
int(text)
return True
except ValueError:
return False
def is_float_string(text):
# text is a float string if it contains a "." and can be converted to a float
if "." in text:
try:
float(text)
return True
except ValueError:
return False
return False
def copy_image(image_path, output_image_path):
from shutil import copyfile
copyfile(image_path, output_image_path)
def copy_dir(src_dir, dst_dir):
from shutil import copytree
# copy the source directory to the target directory
copytree(src_dir, dst_dir)
import PIL.Image as Image
def get_image_size(img_path):
img = Image.open(img_path)
width, height = img.size
return width, height
def get_chat_response(promot, api_key, model="gpt-3.5-turbo", temperature=0, max_tokens=256, n=1, patience=10000000,
sleep_time=0):
messages = [
{"role": "user", "content": promot},
]
# print("I am here")
while patience > 0:
patience -= 1
try:
response = openai.ChatCompletion.create(model=model,
messages=messages,
api_key=api_key,
temperature=temperature,
max_tokens=max_tokens,
n=n)
if n == 1:
prediction = response['choices'][0]['message']['content'].strip()
if prediction != "" and prediction != None:
return prediction
else:
prediction = [choice['message']['content'].strip() for choice in response['choices']]
if prediction[0] != "" and prediction[0] != None:
return prediction
except Exception as e:
if "Rate limit" not in str(e):
print(e)
if "Please reduce the length of the messages" in str(e):
print("!!Reduce promot size")
# reduce input prompt and keep the tail
new_size = int(len(promot) * 0.9)
new_start = len(promot) - new_size
promot = promot[new_start:]
messages = [
{"role": "user", "content": promot},
]
if sleep_time > 0:
time.sleep(sleep_time)
return ""