Spaces:

LanguageBind
/

UniWorld-V1

Runtime error

UniWorld-V1 / univa /eval /gedit /viescore /utils.py

LinB203

init

0c8d55e 5 days ago

14.6 kB

	import os
	from typing import Union, List, Optional
	import json
	import regex as re
	import ast
	import random

	def fix_json(input_str):
	# Add double quotes around keys using regex
	fixed_str = re.sub(r'(\w+):', r'"\1":', input_str)

	# Add double quotes around string values if necessary and wrap int/float values in []
	def format_value(match):
	key, value, comma = match.groups()
	value = value.strip()
	# Check if value is an integer or float
	if re.match(r'^-?\d+(\.\d+)?$', value):
	value = f'[{value}]'
	# Check if value is a boolean or null
	elif re.match(r'^(true\|false\|null)$', value, re.IGNORECASE):
	pass # leave as is
	else:
	# Add quotes around string values
	value = f'"{value}"'
	return f'{key}: {value}{comma}'

	fixed_str = re.sub(r'(".?"):(.?)(,\|})', format_value, fixed_str)

	return fixed_str

	def read_file_to_string(file_path):
	"""
	Reads the contents of a text file and returns it as a string.

	:param file_path: The path to the text file.
	:return: A string containing the contents of the file.
	"""
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	return file.read()
	except FileNotFoundError:
	print(f"The file {file_path} was not found.")
	return None
	except Exception as e:
	print(f"An error occurred: {e}")
	return None

	def read_files_to_string(file_paths):
	"""
	Reads the contents of multiple text files and returns them as a single string,
	with each file's contents separated by a newline.

	:param file_paths: A list of paths to text files.
	:return: A string containing the concatenated contents of the files.
	"""
	all_contents = [] # List to hold the contents of each file

	for file_path in file_paths:
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	all_contents.append(file.read())
	except FileNotFoundError:
	print(f"The file {file_path} was not found.")
	except Exception as e:
	print(f"An error occurred while reading {file_path}: {e}")

	# Join all the contents with a newline character
	return "\n".join(all_contents)

	def get_file_path(filename: Union[str, os.PathLike], search_from: Union[str, os.PathLike] = "."):
	"""
	Search for a file across a directory and return its absolute path.

	Args:
	filename (Union[str, os.PathLike]): The name of the file to search for.
	search_from (Union[str, os.PathLike], optional): The directory from which to start the search. Defaults to ".".

	Returns:
	str: Absolute path to the found file.

	Raises:
	FileNotFoundError: If the file is not found.
	"""
	for root, dirs, files in os.walk(search_from):
	for name in files:
	if name == filename:
	return os.path.abspath(os.path.join(root, name))
	raise FileNotFoundError(filename, "not found.")



	#+=========================================================================================
	def verify(s, target_sequence):
	# Count the occurrences of the target sequence
	count = s.count(target_sequence)

	# Check if the target sequence appears exactly twice
	return count == 2


	def is_int_between_0_and_10(s):
	try:
	num = int(s)
	return 0 <= num <= 10
	except ValueError:
	return False

	def is_str_a_list_of_ints_0_to_10(s):
	try:
	# Attempt to parse the string as a Python literal (list, dict, etc.)
	parsed = ast.literal_eval(s)

	# Check if the parsed object is a list
	if not isinstance(parsed, list):
	return False

	# Check if all elements are integers and between 0 to 10
	return all(isinstance(item, int) and 0 <= item <= 10 for item in parsed)

	except (ValueError, SyntaxError):
	# If parsing fails or any other error occurs
	return False

	def is_str_valid_score_format_brackets(s):
	try:
	# Removing brackets and splitting the string by commas
	content = s.strip("[]").split(',')

	length = len(content)

	# Parsing each element and checking the format and range
	scores = {}
	for item in content:
	key, value = item.split(':')
	key = key.strip()
	value = int(value.strip())

	# Check if the key starts with 'score' and the value is in the correct range
	if not key.startswith("score") or not 0 <= value <= 10:
	return False

	scores[key] = value

	fetch_words = [f"score{i+1}" for i in range(length)]
	# Check if at least 'score1' and 'score2' are present
	return all(key in scores for key in fetch_words)

	except (ValueError, SyntaxError):
	# If any parsing error occurs
	return False


	#+=========================================================================================
	def mllm_output_to_dict(input_string, give_up_parsing=False):
	"""
	Args:
	input_string (str): actually the output of the mllm model to be parsed
	output_file_name (str): The name of the output file.
	"""
	# Catch for gpt4v rate_limit_exceeded error
	if input_string == "rate_limit_exceeded":
	return "rate_limit_exceeded"

	# Define the delimiters
	delimiter = '\|\|V^=^V\|\|'

	if input_string.count(delimiter) == 2:
	if not verify(input_string, delimiter):
	print("The required delimiters were not found correctly in the string.")
	return False
	# Extract the content between the delimiters
	start_index = input_string.find(delimiter) + len(delimiter)
	end_index = input_string.rfind(delimiter)
	else:
	# find the json mannually
	# some mllm tends not to output the delimiters, but it does output the json contents
	# so we will find the json content mannually
	start_index = input_string.find('{')
	end_index = input_string.rfind('}') + 1
	if start_index == -1 or end_index == 0:
	# json not found
	# some mllm tends to output only a list of scores like [6, 0],
	# this time we will just get the scores and ignore the reasoning (other part of the json)
	start_index = input_string.find('[')
	end_index = input_string.rfind(']') + 1
	if give_up_parsing: # if we want to give up parsing
	guessed_value = random.randint(0, 10)
	print(f"Failed to find the json content in the string. Guess a value : {guessed_value}.")
	json_content = {'score': [guessed_value], "reasoning": f"guess_if_cannot_parse \| {input_string}"}
	json_str = json.dumps(json_content)
	input_string = json_str
	start_index = 0
	end_index = len(json_str)
	elif re.match(r'^\[\d+, ?\d+\]$', input_string[start_index:end_index]):
	scores = json.loads(input_string[start_index:end_index])
	if not isinstance(scores, list):
	scores = [scores]
	json_content = {'score': scores, "reasoning": "System: output is simply a list of scores"}
	json_str = json.dumps(json_content)
	input_string = json_str
	start_index = 0
	end_index = len(json_str)
	elif is_int_between_0_and_10(input_string): # if output is simply a number
	scores = [int(input_string)]
	json_content = {'score': scores, "reasoning": "System: output is simply a number"}
	json_str = json.dumps(json_content)
	input_string = json_str
	start_index = 0
	end_index = len(json_str)
	else:
	print("Failed to find the json content in the string.")
	return False

	# Check if we found two delimiters
	if start_index != -1 and end_index != -1 and start_index != end_index:
	# Extract the JSON string
	json_str = input_string[start_index:end_index].strip()
	json_str = json_str.replace("\n", "")
	# Parse the JSON string into a dictionary
	try:
	new_data = json.loads(json_str)
	if not isinstance(new_data['score'], list):
	new_data['score'] = [new_data['score']]
	except:
	print("Now fixing: ", json_str)
	try:
	new_data = json.loads(fix_json(json_str))
	return new_data
	except:
	print("Error: Cannot fix", json_str)
	return False
	return new_data
	else:
	print("The required delimiters were not found correctly in the string.")
	return False

	def write_entry_to_json_file(input_string, uid, prompt_input, vision_input, output_file_name, give_up_parsing=False):
	"""
	Args:
	input_string (str): actually the output of the mllm model to be parsed
	uid (str): The unique identifier for the each item in the test data
	prompt_input (str): The prompt input for the entry. text prompt.
	vision_input (str): The vision input for the entry. image links.
	output_file_name (str): The name of the output file.
	"""
	# Catch for gpt4v rate_limit_exceeded error
	if input_string == "rate_limit_exceeded":
	return "rate_limit_exceeded"

	# Define the delimiters
	delimiter = '\|\|V^=^V\|\|'

	if input_string.count(delimiter) == 2:
	if not verify(input_string, delimiter):
	print("The required delimiters were not found correctly in the string.")
	return False
	# Extract the content between the delimiters
	start_index = input_string.find(delimiter) + len(delimiter)
	end_index = input_string.rfind(delimiter)
	else:
	# find the json mannually
	# some mllm tends not to output the delimiters, but it does output the json contents
	# so we will find the json content mannually
	start_index = input_string.find('{')
	end_index = input_string.rfind('}') + 1
	if start_index == -1 or end_index == 0:
	# json not found
	# some mllm tends to output only a list of scores like [6, 0],
	# this time we will just get the scores and ignore the reasoning (other part of the json)
	start_index = input_string.find('[')
	end_index = input_string.rfind(']') + 1
	if give_up_parsing: # if we want to give up parsing
	guessed_value = random.randint(0, 10)
	print(f"Failed to find the json content in the string. Guess a value : {guessed_value}.")
	json_content = {'score': [guessed_value], "reasoning": f"guess_if_cannot_parse \| {input_string}"}
	json_str = json.dumps(json_content)
	input_string = json_str
	start_index = 0
	end_index = len(json_str)
	elif re.match(r'^\[\d+, ?\d+\]$', input_string[start_index:end_index]):
	scores = json.loads(input_string[start_index:end_index])
	json_content = {'score': scores, "reasoning": None}
	json_str = json.dumps(json_content)
	input_string = json_str
	start_index = 0
	end_index = len(json_str)
	elif is_int_between_0_and_10(input_string): # if output is simply a number
	scores = [int(input_string)]
	json_content = {'score': scores, "reasoning": None}
	json_str = json.dumps(json_content)
	input_string = json_str
	start_index = 0
	end_index = len(json_str)
	else:
	print("Failed to find the json content in the string.")
	return False

	# Check if we found two delimiters
	if start_index != -1 and end_index != -1 and start_index != end_index:
	# Extract the JSON string
	json_str = input_string[start_index:end_index].strip()
	json_str = json_str.replace("\n", "")
	try:
	# Parse the JSON string into a dictionary
	new_data = json.loads(json_str)

	# Ensure the directory exists
	os.makedirs(os.path.dirname(output_file_name), exist_ok=True)

	# Initialize or load existing data
	if os.path.exists(output_file_name):
	with open(output_file_name, 'r') as json_file:
	data = json.load(json_file)
	else:
	data = {}

	# If the additional key is already in the data, add or update notes
	if uid in data:
	data[uid].update(new_data) # Update with new data
	if prompt_input: # If there are new notes, update or add them
	data[uid]['prompt_input'] = prompt_input
	if vision_input: # If there are new notes, update or add them
	data[uid]['vision_input'] = vision_input
	else:
	# If it's a new key, add the entry to the dictionary
	data[uid] = new_data
	if prompt_input:
	data[uid]['prompt_input'] = prompt_input
	if vision_input:
	data[uid]['vision_input'] = vision_input

	# Write the updated data to the file
	with open(output_file_name, 'w') as json_file:
	json.dump(data, json_file, indent=4)

	print(f"Data was successfully updated in {output_file_name}")
	return True
	except json.JSONDecodeError as e:
	print(f"An error occurred while parsing the JSON content: {e}")
	return False
	else:
	print("The required delimiters were not found correctly in the string.")
	return False


	def check_key_in_json(file_path, key):
	try:
	with open(file_path, 'r') as json_file:
	data = json.load(json_file)

	# Check if the key exists at the top level of the JSON structure
	if key in data:
	return True
	else:
	return False
	except FileNotFoundError:
	print(f"The file {file_path} was not found.")
	except json.JSONDecodeError as e:
	print(f"Error reading {file_path}: {e}")
	except Exception as e:
	print(f"An error occurred with {file_path}: {e}")
	return False