huamnifierWithSimpleGrammer / utils /filter_brackets.py
“[shujaatalishariati]”
Initial commit for Gradio app with GECToR
847e3e1
raw
history blame
1.02 kB
import argparse
import re
from helpers import write_lines
def filter_line(line):
if "-LRB-" in line and "-RRB-" in line:
rep = re.sub(r'\-.*?LRB.*?\-.*?\-.*?RRB.*?\-', '', line)
line_cleaned = rep
elif ("-LRB-" in line and "-RRB-" not in line) or (
"-LRB-" not in line and "-RRB-" in line):
line_cleaned = line.replace("-LRB-", '"').replace("-RRB-", '"')
else:
line_cleaned = line
return line_cleaned
def main(args):
with open(args.source) as f:
data = [row.rstrip() for row in f]
write_lines(args.output, [filter_line(row) for row in data])
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--source',
help='Path to the source file',
required=True)
parser.add_argument('-o', '--output',
help='Path to the output file',
required=True)
args = parser.parse_args()
main(args)