import argparse import os #from utils import * from typing import Dict, List from tqdm import tqdm import re class ParseError(Exception): def __init__(self, msg): self.msg = msg HEADER = '#include "/home/ReSym/clang-parser/defs.hh"\n' def process_funname(raw_addr:str) -> str: # sub_401220 -> 401220 if raw_addr == 'main': return raw_addr match = re.search(r'^sub_([\w\d]+)$', raw_addr) if match: return match.group(1) else: return None def hex_to_decimal(hex_str : str) -> int: # Check if the input hex string is valid if not re.match(r'^-?[0-9a-fA-F]+$', hex_str): return None # Convert the hex string to decimal decimal_num = int(hex_str, 16) return decimal_num def extract_comments(fun_content:List[str]) -> List[Dict]: var_decl_pattern = r'^(.+?\s+\**)(\S+);\s+\/\/(.*)$' # ; // rbp_offset_pattern = r'\[rbp(-[\d\w]+?)h\]' # [rbp-h] array_name_pattern = r'^(.*?)\[(\d+)\]$' # [] var_decl_info = [] for line in fun_content: match = re.match(var_decl_pattern, line.strip()) if match: var_type = match.group(1).strip() var_name = match.group(2).strip() comment = match.group(3).strip() # parse var_name (handle array) array_name_match = re.match(array_name_pattern, var_name) if array_name_match: var_name = array_name_match.group(1) array_size = int(array_name_match.group(2)) else: array_size = None # parse comment, get rbp offset rbp_offset = None rbp_offset_match = re.search(rbp_offset_pattern, comment) if rbp_offset_match: rbp_offset = rbp_offset_match.group(1) rbp_offset_dec = hex_to_decimal(rbp_offset) if rbp_offset is not None else None # handle * ptr_level = var_name.count("*") var_name = var_name.replace('*', "") var_decl_info.append({ 'name': var_name, 'type': var_type, 'comment': comment.strip().replace('"',"`").replace("'", '`'), 'array_size': array_size, 'ptr_level': ptr_level, 'rbp_offset_hex': rbp_offset, 'rbp_offset_dec': rbp_offset_dec, 'original_line': line.strip().replace('"',"`").replace("'", '`') }) return var_decl_info def parse_signature(file_content:List[str], funname:str=None) -> List[Dict]: arg_info = [] if not funname: pattern = r'((sub_[\d\w]+)|main)\((.*?)\)' # () else: pattern = r'(({})|main)\((.*?)\)'.format(funname) # () if isinstance(file_content, str): file_content = file_content.split('\n') found = False for l_index in range(3): line = file_content[l_index] match = re.search(pattern, line) if match: funname, arglist = match.group(1), match.group(3) found = True break if not found: raise ParseError('Fail to parse the signature.') if not arglist: return arg_info arg_pattern = r'^(.*?)(a\d+)$' # xxxx a1: arg_pattern2 = r'^((struct\s|const\s)?\w+?\s+\*?)(\w+)$' # (struct/const )?xxx *? for arg in arglist.split(','): if arg.strip() == '...': arg_info.append({ 'name': arg.strip(), 'original_line': arg.strip() }) continue if arg.strip() == 'void': continue arg_match = re.match(arg_pattern, arg.strip()) if arg_match: argtype, argname = arg_match.group(1).strip(), arg_match.group(2) else: arg_match = re.match(arg_pattern2, arg.strip()) if arg_match: argtype, argname = arg_match.group(1).strip(), arg_match.group(3) else: raise ParseError(f'Cannot find the declaration of argument {arg.strip()}.') if argname in arg_info: raise ParseError(f'{argname} duplicate') arg_info.append({ 'name': argname, 'type': argtype, 'original_line': arg.strip() }) return arg_info