chore: update
Browse files- app.py +6 -3
- autotab.py +28 -16
app.py
CHANGED
@@ -13,6 +13,7 @@ def auto_tabulator_completion(
|
|
13 |
max_examples: int,
|
14 |
model_name: str,
|
15 |
generation_config: dict,
|
|
|
16 |
save_every: int,
|
17 |
api_key: str,
|
18 |
base_url: str,
|
@@ -20,14 +21,15 @@ def auto_tabulator_completion(
|
|
20 |
output_file_name = "ouput.xlsx"
|
21 |
autotab = AutoTab(
|
22 |
in_file_path=in_file_path,
|
23 |
-
instruction=instruction,
|
24 |
out_file_path=output_file_name,
|
|
|
25 |
max_examples=max_examples,
|
26 |
model_name=model_name,
|
27 |
-
api_key=api_key,
|
28 |
-
base_url=base_url,
|
29 |
generation_config=json.loads(generation_config),
|
|
|
30 |
save_every=save_every,
|
|
|
|
|
31 |
)
|
32 |
start = time.time()
|
33 |
autotab.run()
|
@@ -49,6 +51,7 @@ inputs = [
|
|
49 |
value='{"temperature": 0, "max_tokens": 128}',
|
50 |
label="Generation Config in Dict",
|
51 |
),
|
|
|
52 |
gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"),
|
53 |
gr.Textbox(
|
54 |
value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah", label="API Key"
|
|
|
13 |
max_examples: int,
|
14 |
model_name: str,
|
15 |
generation_config: dict,
|
16 |
+
request_interval: float,
|
17 |
save_every: int,
|
18 |
api_key: str,
|
19 |
base_url: str,
|
|
|
21 |
output_file_name = "ouput.xlsx"
|
22 |
autotab = AutoTab(
|
23 |
in_file_path=in_file_path,
|
|
|
24 |
out_file_path=output_file_name,
|
25 |
+
instruction=instruction,
|
26 |
max_examples=max_examples,
|
27 |
model_name=model_name,
|
|
|
|
|
28 |
generation_config=json.loads(generation_config),
|
29 |
+
request_interval=request_interval,
|
30 |
save_every=save_every,
|
31 |
+
api_key=api_key,
|
32 |
+
base_url=base_url,
|
33 |
)
|
34 |
start = time.time()
|
35 |
autotab.run()
|
|
|
51 |
value='{"temperature": 0, "max_tokens": 128}',
|
52 |
label="Generation Config in Dict",
|
53 |
),
|
54 |
+
gr.Slider(value=0.01, minimum=0, maximum=10, label="Request Interval in Seconds"),
|
55 |
gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"),
|
56 |
gr.Textbox(
|
57 |
value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah", label="API Key"
|
autotab.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import re
|
|
|
2 |
|
3 |
import openai
|
4 |
import pandas as pd
|
@@ -18,7 +19,6 @@ class AutoTab:
|
|
18 |
save_every: int,
|
19 |
api_key: str,
|
20 |
base_url: str,
|
21 |
-
|
22 |
):
|
23 |
self.in_file_path = in_file_path
|
24 |
self.out_file_path = out_file_path
|
@@ -71,7 +71,6 @@ class AutoTab:
|
|
71 |
for col in output_columns
|
72 |
)
|
73 |
in_context += "\n"
|
74 |
-
self.in_context = in_context
|
75 |
return in_context
|
76 |
|
77 |
def predict_output(
|
@@ -104,19 +103,32 @@ class AutoTab:
|
|
104 |
|
105 |
# βββ Engine βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
def run(self):
|
108 |
-
data, input_fields, output_fields = self.load_excel()
|
109 |
-
in_context = self.derive_incontext(
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
|
|
|
|
122 |
print(f"Results saved to {self.out_file_path}")
|
|
|
1 |
import re
|
2 |
+
import time
|
3 |
|
4 |
import openai
|
5 |
import pandas as pd
|
|
|
19 |
save_every: int,
|
20 |
api_key: str,
|
21 |
base_url: str,
|
|
|
22 |
):
|
23 |
self.in_file_path = in_file_path
|
24 |
self.out_file_path = out_file_path
|
|
|
71 |
for col in output_columns
|
72 |
)
|
73 |
in_context += "\n"
|
|
|
74 |
return in_context
|
75 |
|
76 |
def predict_output(
|
|
|
103 |
|
104 |
# βββ Engine βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
105 |
|
106 |
+
def batch_prediction(self, start_index: int, end_index: int):
|
107 |
+
"""Process a batch of predictions."""
|
108 |
+
for i in range(start_index, end_index):
|
109 |
+
prediction = self.predict_output(
|
110 |
+
self.in_context, self.data.iloc[i], self.input_fields
|
111 |
+
)
|
112 |
+
extracted_fields = self.extract_fields(prediction, self.output_fields)
|
113 |
+
for field_name in self.output_fields:
|
114 |
+
self.data.at[i, field_name] = extracted_fields.get(field_name, "")
|
115 |
+
time.sleep(self.request_interval)
|
116 |
+
|
117 |
def run(self):
|
118 |
+
self.data, self.input_fields, self.output_fields = self.load_excel()
|
119 |
+
self.in_context = self.derive_incontext(
|
120 |
+
self.data, self.input_fields, self.output_fields
|
121 |
+
)
|
122 |
+
|
123 |
+
self.num_data = len(self.data)
|
124 |
+
self.num_examples = len(self.data.dropna(subset=self.output_fields))
|
125 |
+
|
126 |
+
for start_index in tqdm(
|
127 |
+
range(self.num_examples, self.num_data, self.save_every),
|
128 |
+
description="Processing batches",
|
129 |
+
):
|
130 |
+
end_index = min(start_index + self.save_every, self.num_data)
|
131 |
+
self.batch_prediction(start_index, end_index)
|
132 |
+
self.data.to_excel(self.out_file_path, index=False)
|
133 |
+
self.data.to_excel(self.out_file_path, index=False)
|
134 |
print(f"Results saved to {self.out_file_path}")
|