File size: 2,919 Bytes
d6c98c4
fa48de8
 
d2c0e35
 
fa48de8
d2c0e35
d6c98c4
 
 
d2c0e35
 
 
 
 
0269e89
d2c0e35
652b4e6
d2c0e35
fa48de8
24fa1b6
d6c98c4
d2c0e35
d6c98c4
0269e89
d6c98c4
 
 
0269e89
d6c98c4
652b4e6
0269e89
d6c98c4
fa48de8
d6c98c4
24fa1b6
fa48de8
24fa1b6
 
 
 
 
 
 
 
 
 
d6c98c4
 
 
 
 
 
 
 
 
24fa1b6
d6c98c4
 
 
 
 
6e11e8c
fa48de8
 
652b4e6
 
fa48de8
 
d6c98c4
 
 
24fa1b6
d6c98c4
fa48de8
d6c98c4
 
 
 
 
 
 
 
c2c0cff
d6c98c4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import json
import time

import gradio as gr
import pandas as pd

from autotab import AutoTab


def auto_tabulator_completion(
    in_file_path: str,
    instruction: str,
    max_examples: int,
    model_name: str,
    generation_config: dict,
    request_interval: float,
    save_every: int,
    str_api_keys: str,
    base_url: str,
) -> tuple[str, str, str, pd.DataFrame]:
    output_file_name = f"output_{time.strftime('%Y%m%d%H%M%S')}.xlsx"
    autotab = AutoTab(
        in_file_path=in_file_path,
        out_file_path=output_file_name,
        instruction=instruction,
        max_examples=max_examples,
        model_name=model_name,
        generation_config=json.loads(generation_config),
        request_interval=request_interval,
        save_every=save_every,
        api_keys=str_api_keys.split(),
        base_url=base_url,
    )
    start = time.time()
    autotab.run()
    time_taken = time.time() - start

    report = f"Total data points: {autotab.num_data}\n" + \
            f"Total missing (before): {autotab.num_missing}\n" + \
            f"Total missing (after): {autotab.failed_count}\n" + \
            f"Total queries made: {autotab.request_count}\n" + \
            f"Time taken: {time.strftime('%H:%M:%S', time.gmtime(time.time() - start))}\n" + \
            f"Prediction per second: {autotab.num_missing / time_taken:.2f}\n" + \
            f"Query per second: {autotab.request_count / time_taken:.2f}"

    query_example = autotab.query_example if autotab.request_count > 0 else "No queries made."
    return report, output_file_name, query_example, autotab.data[:15]


# Gradio interface
inputs = [
    gr.File(label="Input Excel File"),
    gr.Textbox(
        value="You are a helpful assistant. Help me finish the task.",
        label="Instruction",
    ),
    gr.Slider(value=4, minimum=1, maximum=50, step=1, label="Max Examples"),
    gr.Textbox(value="Qwen/Qwen2-7B-Instruct", label="Model Name"),
    gr.Textbox(
        value='{"temperature": 0, "max_tokens": 128}',
        label="Generation Config in Dict",
    ),
    gr.Slider(value=0.1, minimum=0, maximum=10, label="Request Interval in Seconds"),
    gr.Slider(value=100, minimum=1, maximum=1000, step=1, label="Save Every N Steps"),
    gr.Textbox(
        value="sk-exhahhjfqyanmwewndukcqtrpegfdbwszkjucvcpajdufiah",
        label="API Key(s). One per line.",
    ),
    gr.Textbox(value="https://public-beta-api.siliconflow.cn/v1", label="Base URL"),
]

outputs = [
    gr.Textbox(label="Report"),
    gr.File(label="Output Excel File"),
    gr.Textbox(label="Query Example"),
    gr.Dataframe(label="First 15 rows."),
]

gr.Interface(
    fn=auto_tabulator_completion,
    inputs=inputs,
    outputs=outputs,
    title="Auto Tabulator Completion",
    description="Automatically complete missing output values in tabular data based on in-context learning. Check https://github.com/Ki-Seki/autotab.",
).launch()