Spaces:
Running
on
Zero
Running
on
Zero
import argparse | |
import os | |
#from utils import * | |
from typing import Dict, List | |
from tqdm import tqdm | |
import re | |
class ParseError(Exception): | |
def __init__(self, msg): | |
self.msg = msg | |
HEADER = '#include "/home/ReSym/clang-parser/defs.hh"\n' | |
def process_funname(raw_addr:str) -> str: | |
# sub_401220 -> 401220 | |
if raw_addr == 'main': | |
return raw_addr | |
match = re.search(r'^sub_([\w\d]+)$', raw_addr) | |
if match: | |
return match.group(1) | |
else: | |
return None | |
def hex_to_decimal(hex_str : str) -> int: | |
# Check if the input hex string is valid | |
if not re.match(r'^-?[0-9a-fA-F]+$', hex_str): | |
return None | |
# Convert the hex string to decimal | |
decimal_num = int(hex_str, 16) | |
return decimal_num | |
def extract_comments(fun_content:List[str]) -> List[Dict]: | |
var_decl_pattern = r'^(.+?\s+\**)(\S+);\s+\/\/(.*)$' # <g1> <g2>; // <g3> | |
rbp_offset_pattern = r'\[rbp(-[\d\w]+?)h\]' # [rbp-<g1>h] | |
array_name_pattern = r'^(.*?)\[(\d+)\]$' # <g1>[<g2>] | |
var_decl_info = [] | |
for line in fun_content: | |
match = re.match(var_decl_pattern, line.strip()) | |
if match: | |
var_type = match.group(1).strip() | |
var_name = match.group(2).strip() | |
comment = match.group(3).strip() | |
# parse var_name (handle array) | |
array_name_match = re.match(array_name_pattern, var_name) | |
if array_name_match: | |
var_name = array_name_match.group(1) | |
array_size = int(array_name_match.group(2)) | |
else: | |
array_size = None | |
# parse comment, get rbp offset | |
rbp_offset = None | |
rbp_offset_match = re.search(rbp_offset_pattern, comment) | |
if rbp_offset_match: | |
rbp_offset = rbp_offset_match.group(1) | |
rbp_offset_dec = hex_to_decimal(rbp_offset) if rbp_offset is not None else None | |
# handle * | |
ptr_level = var_name.count("*") | |
var_name = var_name.replace('*', "") | |
var_decl_info.append({ | |
'name': var_name, | |
'type': var_type, | |
'comment': comment.strip().replace('"',"`").replace("'", '`'), | |
'array_size': array_size, | |
'ptr_level': ptr_level, | |
'rbp_offset_hex': rbp_offset, | |
'rbp_offset_dec': rbp_offset_dec, | |
'original_line': line.strip().replace('"',"`").replace("'", '`') | |
}) | |
return var_decl_info | |
def parse_signature(file_content:List[str], funname:str=None) -> List[Dict]: | |
arg_info = [] | |
if not funname: | |
pattern = r'((sub_[\d\w]+)|main)\((.*?)\)' # <g1> (<g2>) | |
else: | |
pattern = r'(({})|main)\((.*?)\)'.format(funname) # <g1> (<g2>) | |
if isinstance(file_content, str): | |
file_content = file_content.split('\n') | |
found = False | |
for l_index in range(3): | |
line = file_content[l_index] | |
match = re.search(pattern, line) | |
if match: | |
funname, arglist = match.group(1), match.group(3) | |
found = True | |
break | |
if not found: | |
raise ParseError('Fail to parse the signature.') | |
if not arglist: | |
return arg_info | |
arg_pattern = r'^(.*?)(a\d+)$' # xxxx a1: <g1><g2> | |
arg_pattern2 = r'^((struct\s|const\s)?\w+?\s+\*?)(\w+)$' # (struct/const )?xxx *?<g3> | |
for arg in arglist.split(','): | |
if arg.strip() == '...': | |
arg_info.append({ | |
'name': arg.strip(), | |
'original_line': arg.strip() | |
}) | |
continue | |
if arg.strip() == 'void': | |
continue | |
arg_match = re.match(arg_pattern, arg.strip()) | |
if arg_match: | |
argtype, argname = arg_match.group(1).strip(), arg_match.group(2) | |
else: | |
arg_match = re.match(arg_pattern2, arg.strip()) | |
if arg_match: | |
argtype, argname = arg_match.group(1).strip(), arg_match.group(3) | |
else: | |
raise ParseError(f'Cannot find the declaration of argument {arg.strip()}.') | |
if argname in arg_info: | |
raise ParseError(f'{argname} duplicate') | |
arg_info.append({ | |
'name': argname, | |
'type': argtype, | |
'original_line': arg.strip() | |
}) | |
return arg_info | |