File size: 2,877 Bytes
d6c98c4 fa48de8 d2c0e35 fa48de8 d2c0e35 d6c98c4 d2c0e35 0269e89 d2c0e35 652b4e6 d2c0e35 fa48de8 24fa1b6 d6c98c4 d2c0e35 d6c98c4 0269e89 d6c98c4 0269e89 d6c98c4 652b4e6 0269e89 d6c98c4 fa48de8 d6c98c4 24fa1b6 fa48de8 24fa1b6 d6c98c4 24fa1b6 d6c98c4 6e11e8c fa48de8 652b4e6 fa48de8 d6c98c4 24fa1b6 d6c98c4 fa48de8 d6c98c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import json
import time
import gradio as gr
import pandas as pd
from autotab import AutoTab
def auto_tabulator_completion(
in_file_path: str,
instruction: str,
max_examples: int,
model_name: str,
generation_config: dict,
request_interval: float,
save_every: int,
str_api_keys: str,
base_url: str,
) -> tuple[str, str, str, pd.DataFrame]:
output_file_name = f"output_{time.strftime('%Y%m%d%H%M%S')}.xlsx"
autotab = AutoTab(
in_file_path=in_file_path,
out_file_path=output_file_name,
instruction=instruction,
max_examples=max_examples,
model_name=model_name,
generation_config=json.loads(generation_config),
request_interval=request_interval,
save_every=save_every,
api_keys=str_api_keys.split(),
base_url=base_url,
)
start = time.time()
autotab.run()
time_taken = time.time() - start
report = f"Total data points: {autotab.num_data}\n" + \
f"Total missing (before): {autotab.num_missing}\n" + \
f"Total missing (after): {autotab.failed_count}\n" + \
f"Total queries made: {autotab.request_count}\n" + \
f"Time taken: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}\n" + \
f"Prediction per second: {autotab.num_missing / time_taken:.2f}\n" + \
f"Query per second: {autotab.request_count / time_taken:.2f}"
query_example = autotab.query_example if autotab.request_count > 0 else "No queries made."
return report, output_file_name, query_example, autotab.data[:15]
# Gradio interface
inputs = [
gr.File(label="Input Excel File"),
gr.Textbox(
value="You are a helpful assistant. Help me finish the task.",
label="Instruction",
),
gr.Slider(value=4, minimum=1, maximum=50, step=1, label="Max Examples"),
gr.Textbox(value="Qwen/Qwen2-7B-Instruct", label="Model Name"),
gr.Textbox(
value='{"temperature": 0, "max_tokens": 128}',
label="Generation Config in Dict",
),
gr.Slider(value=0.1, minimum=0, maximum=10, label="Request Interval in Seconds"),
gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"),
gr.Textbox(
value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah",
label="API Key(s). One per line.",
),
gr.Textbox(value="https://public-beta-api.siliconflow.cn/v1", label="Base URL"),
]
outputs = [
gr.Textbox(label="Report"),
gr.File(label="Output Excel File"),
gr.Textbox(label="Query Example"),
gr.Dataframe(label="First 15 rows."),
]
gr.Interface(
fn=auto_tabulator_completion,
inputs=inputs,
outputs=outputs,
title="Auto Tabulator Completion",
description="Automatically complete missing output values in tabular data based on in-context learning.",
).launch()
|