File size: 1,195 Bytes
ee7776a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
import re
from utils import dotdict


def remove_annotations(file_path):
    # read the file
    with open(file_path, 'r', encoding="utf8") as f:
        data = f.read()

    data = re.sub(r"//.*?$", "", data, flags=re.MULTILINE)  # remove single-line comments
    data = re.sub(r"/\*.*?\*/", "", data, flags=re.DOTALL)  # remove multi-line comments
    data = re.sub(r"\n\s*\n", "\n", data)  # remove empty lines
    data = data.strip()  # remove leading/trailing whitespace

    return data

if __name__ == '__main__':

    for filename in os.listdir("data/CVE"):
        if not filename.endswith(".sol"):
            continue
        filepath = f"data/CVE/{filename}"
        content = remove_annotations(filepath)
        new_filepath = f"data/CVE_clean/{filename}"

        with open(new_filepath, 'w') as f:
            f.write(content)


def mainfnc(data_dir):

    for filename in os.listdir("data/CVE"):
        if not filename.endswith(".sol"):
            continue
        filepath = f"data/CVE/{filename}"
        content = remove_annotations(filepath)
        new_filepath = f"{data_dir}/{filename}"

        with open(new_filepath, 'w') as f:
            f.write(content)