Ki-Seki commited on
Commit
0269e89
Β·
1 Parent(s): accf5a5

chore: update

Browse files
Files changed (2) hide show
  1. app.py +6 -3
  2. autotab.py +28 -16
app.py CHANGED
@@ -13,6 +13,7 @@ def auto_tabulator_completion(
13
  max_examples: int,
14
  model_name: str,
15
  generation_config: dict,
 
16
  save_every: int,
17
  api_key: str,
18
  base_url: str,
@@ -20,14 +21,15 @@ def auto_tabulator_completion(
20
  output_file_name = "ouput.xlsx"
21
  autotab = AutoTab(
22
  in_file_path=in_file_path,
23
- instruction=instruction,
24
  out_file_path=output_file_name,
 
25
  max_examples=max_examples,
26
  model_name=model_name,
27
- api_key=api_key,
28
- base_url=base_url,
29
  generation_config=json.loads(generation_config),
 
30
  save_every=save_every,
 
 
31
  )
32
  start = time.time()
33
  autotab.run()
@@ -49,6 +51,7 @@ inputs = [
49
  value='{"temperature": 0, "max_tokens": 128}',
50
  label="Generation Config in Dict",
51
  ),
 
52
  gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"),
53
  gr.Textbox(
54
  value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah", label="API Key"
 
13
  max_examples: int,
14
  model_name: str,
15
  generation_config: dict,
16
+ request_interval: float,
17
  save_every: int,
18
  api_key: str,
19
  base_url: str,
 
21
  output_file_name = "ouput.xlsx"
22
  autotab = AutoTab(
23
  in_file_path=in_file_path,
 
24
  out_file_path=output_file_name,
25
+ instruction=instruction,
26
  max_examples=max_examples,
27
  model_name=model_name,
 
 
28
  generation_config=json.loads(generation_config),
29
+ request_interval=request_interval,
30
  save_every=save_every,
31
+ api_key=api_key,
32
+ base_url=base_url,
33
  )
34
  start = time.time()
35
  autotab.run()
 
51
  value='{"temperature": 0, "max_tokens": 128}',
52
  label="Generation Config in Dict",
53
  ),
54
+ gr.Slider(value=0.01, minimum=0, maximum=10, label="Request Interval in Seconds"),
55
  gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"),
56
  gr.Textbox(
57
  value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah", label="API Key"
autotab.py CHANGED
@@ -1,4 +1,5 @@
1
  import re
 
2
 
3
  import openai
4
  import pandas as pd
@@ -18,7 +19,6 @@ class AutoTab:
18
  save_every: int,
19
  api_key: str,
20
  base_url: str,
21
-
22
  ):
23
  self.in_file_path = in_file_path
24
  self.out_file_path = out_file_path
@@ -71,7 +71,6 @@ class AutoTab:
71
  for col in output_columns
72
  )
73
  in_context += "\n"
74
- self.in_context = in_context
75
  return in_context
76
 
77
  def predict_output(
@@ -104,19 +103,32 @@ class AutoTab:
104
 
105
  # ─── Engine ───────────────────────────────────────────────────────────
106
 
 
 
 
 
 
 
 
 
 
 
 
107
  def run(self):
108
- data, input_fields, output_fields = self.load_excel()
109
- in_context = self.derive_incontext(data, input_fields, output_fields)
110
-
111
- num_existed_examples = len(data.dropna(subset=output_fields))
112
-
113
- for i in tqdm(range(num_existed_examples, len(data))):
114
- prediction = self.predict_output(in_context, data.iloc[i], input_fields)
115
- extracted_fields = self.extract_fields(prediction, output_fields)
116
- for field_name in output_fields:
117
- data.at[i, field_name] = extracted_fields.get(field_name, "")
118
- if i % self.save_every == 0:
119
- data.to_excel(self.out_file_path, index=False)
120
- self.data = data
121
- data.to_excel(self.out_file_path, index=False)
 
 
122
  print(f"Results saved to {self.out_file_path}")
 
1
  import re
2
+ import time
3
 
4
  import openai
5
  import pandas as pd
 
19
  save_every: int,
20
  api_key: str,
21
  base_url: str,
 
22
  ):
23
  self.in_file_path = in_file_path
24
  self.out_file_path = out_file_path
 
71
  for col in output_columns
72
  )
73
  in_context += "\n"
 
74
  return in_context
75
 
76
  def predict_output(
 
103
 
104
  # ─── Engine ───────────────────────────────────────────────────────────
105
 
106
+ def batch_prediction(self, start_index: int, end_index: int):
107
+ """Process a batch of predictions."""
108
+ for i in range(start_index, end_index):
109
+ prediction = self.predict_output(
110
+ self.in_context, self.data.iloc[i], self.input_fields
111
+ )
112
+ extracted_fields = self.extract_fields(prediction, self.output_fields)
113
+ for field_name in self.output_fields:
114
+ self.data.at[i, field_name] = extracted_fields.get(field_name, "")
115
+ time.sleep(self.request_interval)
116
+
117
  def run(self):
118
+ self.data, self.input_fields, self.output_fields = self.load_excel()
119
+ self.in_context = self.derive_incontext(
120
+ self.data, self.input_fields, self.output_fields
121
+ )
122
+
123
+ self.num_data = len(self.data)
124
+ self.num_examples = len(self.data.dropna(subset=self.output_fields))
125
+
126
+ for start_index in tqdm(
127
+ range(self.num_examples, self.num_data, self.save_every),
128
+ description="Processing batches",
129
+ ):
130
+ end_index = min(start_index + self.save_every, self.num_data)
131
+ self.batch_prediction(start_index, end_index)
132
+ self.data.to_excel(self.out_file_path, index=False)
133
+ self.data.to_excel(self.out_file_path, index=False)
134
  print(f"Results saved to {self.out_file_path}")