whoami02 commited on
Commit
5ac6f83
·
verified ·
1 Parent(s): b9bb47a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -28
app.py CHANGED
@@ -60,8 +60,9 @@ def get_vals(file_path, wh):
60
 
61
  def clean_dir(path):
62
  files = os.listdir(path=path)
63
- for i in range(1,len(files)+1):
64
- os.remove(f"{path}/{i}.jpg")
 
65
 
66
  def html_path(img, counter):
67
  img.save(f"{sub_img_temp}/{counter}.jpg")
@@ -71,40 +72,42 @@ def create_box(l): # l represents the bounds of box
71
  return (l[0], l[2], l[1], l[3])
72
 
73
  def process(filepath, regex, size=(1656,1170)):
74
- clean_dir(path=img_temp)
75
- clean_dir(path=sub_img_temp)
76
- img = Image.open(filepath)
77
- (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
78
- for i in range(0, width, size[0]):
79
- for j in range(0, height, size[1]):
80
- counter += 1
81
- box = (i, j, i+size[0], j+size[1])
82
- img.crop(box).save(f"{img_temp}/{counter}.jpg")
83
- parts.append(img.crop(box))
84
- temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
85
- if regex == 'Regex-1':
86
- pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
87
- else:
88
- pattern = re.compile(r"\d+")
 
89
 
90
- data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
91
- counter, idx = 1, []
92
- for d in data:
93
- dimensions.append(ast.literal_eval(d.split(':')[0]))
94
- im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
95
- values.append(d.split(':')[1])
96
- counter += 1
97
- metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
98
- df = metadata[metadata['Value'].str.contains(pattern)] #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
99
 
100
- return df#.to_markdown()
101
 
102
  def main():
103
 
104
  demo = gr.Interface(
105
  fn=process,
106
  inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
107
- outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
 
108
  title="OCR"
109
  )
110
  demo.launch(debug=True, show_error=True)
 
60
 
61
  def clean_dir(path):
62
  files = os.listdir(path=path)
63
+ return files
64
+ # for i in range(1,len(files)+1):
65
+ # os.remove(f"{path}/{i}.jpg")
66
 
67
  def html_path(img, counter):
68
  img.save(f"{sub_img_temp}/{counter}.jpg")
 
72
  return (l[0], l[2], l[1], l[3])
73
 
74
  def process(filepath, regex, size=(1656,1170)):
75
+ f1 = clean_dir(path=img_temp)
76
+ f2 = clean_dir(path=sub_img_temp)
77
+ return [f1, f2]
78
+ # img = Image.open(filepath)
79
+ # (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
80
+ # for i in range(0, width, size[0]):
81
+ # for j in range(0, height, size[1]):
82
+ # counter += 1
83
+ # box = (i, j, i+size[0], j+size[1])
84
+ # img.crop(box).save(f"{img_temp}/{counter}.jpg")
85
+ # parts.append(img.crop(box))
86
+ # temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
87
+ # if regex == 'Regex-1':
88
+ # pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
89
+ # else:
90
+ # pattern = re.compile(r"\d+")
91
 
92
+ # data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
93
+ # counter, idx = 1, []
94
+ # for d in data:
95
+ # dimensions.append(ast.literal_eval(d.split(':')[0]))
96
+ # im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
97
+ # values.append(d.split(':')[1])
98
+ # counter += 1
99
+ # metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
100
+ # df = metadata[metadata['Value'].str.contains(pattern)] #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
101
 
102
+ # return df#.to_markdown()
103
 
104
  def main():
105
 
106
  demo = gr.Interface(
107
  fn=process,
108
  inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
109
+ # outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
110
+ outputs = "list"
111
  title="OCR"
112
  )
113
  demo.launch(debug=True, show_error=True)