import os import cv2 import json import time import pickle import openai import re from word2number import w2n def create_dir(output_dir): if not os.path.exists(output_dir): os.makedirs(output_dir) def read_csv(file): data = [] with open(file, 'r') as f: for line in f: data.append(line.strip()) return data def read_pandas_csv(csv_path): # read a pandas csv sheet import pandas as pd df = pd.read_csv(csv_path) return df def read_json(path): with open(path, 'r', encoding='utf-8') as f: return json.load(f) def read_jsonl(file): with open(file, 'r') as f: data = [json.loads(line) for line in f] return data def read_pickle(path): with open(path, 'rb') as f: return pickle.load(f) def save_json(data, path): with open(path, 'w') as f: json.dump(data, f, indent=4) def save_array_img(path, image): cv2.imwrite(path, image) def contains_digit(text): # check if text contains a digit if any(char.isdigit() for char in text): return True return False def contains_number_word(text): # check if text contains a number word ignore_words = ["a", "an", "point"] words = re.findall(r'\b\w+\b', text) # This regex pattern matches any word in the text for word in words: if word in ignore_words: continue try: w2n.word_to_num(word) return True # If the word can be converted to a number, return True except ValueError: continue # If the word can't be converted to a number, continue with the next word # check if text contains a digit if any(char.isdigit() for char in text): return True return False # If none of the words could be converted to a number, return False def contains_quantity_word(text, special_keep_words=[]): # check if text contains a quantity word quantity_words = ["most", "least", "fewest" "more", "less", "fewer", "largest", "smallest", "greatest", "larger", "smaller", "greater", "highest", "lowest", "higher", "lower", "increase", "decrease", "minimum", "maximum", "max", "min", "mean", "average", "median", "total", "sum", "add", "subtract", "difference", "quotient", "gap", "half", "double", "twice", "triple", "square", "cube", "root", "approximate", "approximation", "triangle", "rectangle", "circle", "square", "cube", "sphere", "cylinder", "cone", "pyramid", "multiply", "divide", "percentage", "percent", "ratio", "proportion", "fraction", "rate", ] quantity_words += special_keep_words # dataset specific words words = re.findall(r'\b\w+\b', text) # This regex pattern matches any word in the text if any(word in quantity_words for word in words): return True return False # If none of the words could be converted to a number, return False def is_bool_word(text): if text in ["Yes", "No", "True", "False", "yes", "no", "true", "false", "YES", "NO", "TRUE", "FALSE"]: return True return False def is_digit_string(text): # remove ".0000" text = text.strip() text = re.sub(r'\.0+$', '', text) try: int(text) return True except ValueError: return False def is_float_string(text): # text is a float string if it contains a "." and can be converted to a float if "." in text: try: float(text) return True except ValueError: return False return False def copy_image(image_path, output_image_path): from shutil import copyfile copyfile(image_path, output_image_path) def copy_dir(src_dir, dst_dir): from shutil import copytree # copy the source directory to the target directory copytree(src_dir, dst_dir) import PIL.Image as Image def get_image_size(img_path): img = Image.open(img_path) width, height = img.size return width, height def get_chat_response(promot, api_key, model="gpt-3.5-turbo", temperature=0, max_tokens=256, n=1, patience=10000000, sleep_time=0): messages = [ {"role": "user", "content": promot}, ] # print("I am here") while patience > 0: patience -= 1 try: response = openai.ChatCompletion.create(model=model, messages=messages, api_key=api_key, temperature=temperature, max_tokens=max_tokens, n=n) if n == 1: prediction = response['choices'][0]['message']['content'].strip() if prediction != "" and prediction != None: return prediction else: prediction = [choice['message']['content'].strip() for choice in response['choices']] if prediction[0] != "" and prediction[0] != None: return prediction except Exception as e: if "Rate limit" not in str(e): print(e) if "Please reduce the length of the messages" in str(e): print("!!Reduce promot size") # reduce input prompt and keep the tail new_size = int(len(promot) * 0.9) new_start = len(promot) - new_size promot = promot[new_start:] messages = [ {"role": "user", "content": promot}, ] if sleep_time > 0: time.sleep(sleep_time) return ""