Spaces:
Running
Running
piotr-szleg-bards-ai
commited on
Commit
•
0b07a42
1
Parent(s):
38ee35d
Update
Browse files- app.py +162 -0
- data +1 -0
- pipeline/config.py +62 -0
- pipeline/models.py +150 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
+
import plotly
|
6 |
+
from pandas.api.types import is_numeric_dtype
|
7 |
+
|
8 |
+
from pipeline.config import QueriesConfig, LLMBoardConfig
|
9 |
+
from pipeline.models import models_costs
|
10 |
+
|
11 |
+
README = """
|
12 |
+
<br/><h2>About this project</h2>
|
13 |
+
<p>
|
14 |
+
This project analyses different models and providers from the perspective of an application developer.
|
15 |
+
<br/>
|
16 |
+
Models are asked to summarize a text in different languages and using different output formats with following prompt:
|
17 |
+
</p>
|
18 |
+
<pre>
|
19 |
+
<code>{}</code>
|
20 |
+
</pre>
|
21 |
+
"""
|
22 |
+
|
23 |
+
summary_df: pd.DataFrame = pd.read_csv("data/2024-01-25 13:30:17.207984_summary.csv")
|
24 |
+
time_of_day_comparison_df = pd.read_csv("data/2024-01-25 13:30:20.959750_time_of_day_comparison.csv")
|
25 |
+
general_plots = pd.read_csv("data/2024-01-25 12:22:00.759762_general_plot.csv")
|
26 |
+
|
27 |
+
|
28 |
+
with open("data/time_of_day_plot.json", "r") as f:
|
29 |
+
time_of_day_plot = plotly.io.from_json(f.read())
|
30 |
+
|
31 |
+
model_costs_df = models_costs()
|
32 |
+
|
33 |
+
|
34 |
+
searched_model_name = ""
|
35 |
+
collapse_languages = False
|
36 |
+
collapse_output_method = False
|
37 |
+
|
38 |
+
|
39 |
+
def filter_dataframes(input: str):
|
40 |
+
global searched_model_name
|
41 |
+
input = input.lower()
|
42 |
+
searched_model_name = input
|
43 |
+
return dataframes()
|
44 |
+
|
45 |
+
|
46 |
+
def collapse_languages_toggle():
|
47 |
+
global collapse_languages
|
48 |
+
if collapse_languages:
|
49 |
+
collapse_languages = False
|
50 |
+
button_text = "Collapse languages"
|
51 |
+
else:
|
52 |
+
collapse_languages = True
|
53 |
+
button_text = "Un-collapse languages"
|
54 |
+
return dataframes()[0], button_text
|
55 |
+
|
56 |
+
|
57 |
+
def collapse_output_method_toggle():
|
58 |
+
global collapse_output_method
|
59 |
+
if collapse_output_method:
|
60 |
+
collapse_output_method = False
|
61 |
+
button_text = "Collapse output method"
|
62 |
+
else:
|
63 |
+
collapse_output_method = True
|
64 |
+
button_text = "Un-collapse output method"
|
65 |
+
return dataframes()[0], button_text
|
66 |
+
|
67 |
+
|
68 |
+
def dataframes():
|
69 |
+
global collapse_languages, collapse_output_method, searched_model_name, summary_df, time_of_day_comparison_df, model_costs_df
|
70 |
+
|
71 |
+
summary_df_columns = summary_df.columns.to_list()
|
72 |
+
group_columns = LLMBoardConfig().group_columns.copy()
|
73 |
+
if collapse_languages:
|
74 |
+
summary_df_columns.remove("language")
|
75 |
+
group_columns.remove("language")
|
76 |
+
if collapse_output_method:
|
77 |
+
summary_df_columns.remove("template_name")
|
78 |
+
group_columns.remove("template_name")
|
79 |
+
|
80 |
+
summary_df_processed = summary_df[summary_df_columns].groupby(by=group_columns).mean().reset_index()
|
81 |
+
return (
|
82 |
+
dataframe_style(summary_df_processed[summary_df_processed.model.str.lower().str.contains(searched_model_name)]),
|
83 |
+
dataframe_style(
|
84 |
+
time_of_day_comparison_df[time_of_day_comparison_df.model.str.lower().str.contains(searched_model_name)]
|
85 |
+
),
|
86 |
+
dataframe_style(model_costs_df[model_costs_df.Model.str.lower().str.contains(searched_model_name)]),
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
def dataframe_style(df: pd.DataFrame):
|
91 |
+
df = df.copy()
|
92 |
+
df.columns = [snake_case_to_title(column) for column in df.columns]
|
93 |
+
column_formats = {}
|
94 |
+
for column in df.columns:
|
95 |
+
if is_numeric_dtype(df[column]):
|
96 |
+
if column == "execution_time":
|
97 |
+
column_formats[column] = "{:.4f}"
|
98 |
+
else:
|
99 |
+
column_formats[column] = "{:.2f}"
|
100 |
+
df = df.style.format(column_formats, na_rep="")
|
101 |
+
return df
|
102 |
+
|
103 |
+
|
104 |
+
def snake_case_to_title(text):
|
105 |
+
# Convert snake_case to title-case
|
106 |
+
words = re.split(r"_", text)
|
107 |
+
title_words = [word.capitalize() for word in words]
|
108 |
+
return " ".join(title_words)
|
109 |
+
|
110 |
+
|
111 |
+
filter_textbox = gr.Textbox(label="Model name part")
|
112 |
+
filter_button = gr.Button("Filter dataframes by model name")
|
113 |
+
collapse_languages_button = gr.Button("Collapse languages")
|
114 |
+
collapse_output_method_button = gr.Button("Collapse output method")
|
115 |
+
last_textbox = 0
|
116 |
+
|
117 |
+
with gr.Blocks() as demo:
|
118 |
+
gr.HTML("<h1>LLM Board</h1>" + README.format(QueriesConfig().base_query_template))
|
119 |
+
|
120 |
+
with gr.Row():
|
121 |
+
filter_textbox.render()
|
122 |
+
filter_button.render()
|
123 |
+
with gr.Tab("Basic information"):
|
124 |
+
for index, row in general_plots.iterrows():
|
125 |
+
gr.Plot(plotly.io.from_json(row["plot_json"]), label=row["description"])
|
126 |
+
gr.Markdown(str(row["comment"]))
|
127 |
+
with gr.Tab("Output characteristics"):
|
128 |
+
with gr.Row():
|
129 |
+
collapse_languages_button.render()
|
130 |
+
collapse_output_method_button.render()
|
131 |
+
summary_ui = gr.DataFrame(dataframe_style(summary_df), label="Statistics")
|
132 |
+
|
133 |
+
with gr.Tab("Preformance by time of the day"):
|
134 |
+
time_of_day_comparison_ui = gr.DataFrame(time_of_day_comparison_df, label="Time of day")
|
135 |
+
time_of_day_plot_ui = gr.Plot(time_of_day_plot, label="Time of the day plot")
|
136 |
+
|
137 |
+
with gr.Tab("Costs comparison"):
|
138 |
+
models_costs_ui = gr.DataFrame(dataframe_style(models_costs()), label="Costs comparison")
|
139 |
+
filter_button.click(
|
140 |
+
fn=filter_dataframes,
|
141 |
+
inputs=filter_textbox,
|
142 |
+
outputs=[summary_ui, time_of_day_comparison_ui, models_costs_ui],
|
143 |
+
api_name="filter_dataframes",
|
144 |
+
)
|
145 |
+
filter_textbox.submit(
|
146 |
+
fn=filter_dataframes,
|
147 |
+
inputs=filter_textbox,
|
148 |
+
outputs=[summary_ui, time_of_day_comparison_ui, models_costs_ui],
|
149 |
+
api_name="filter_dataframes",
|
150 |
+
)
|
151 |
+
collapse_languages_button.click(
|
152 |
+
fn=collapse_languages_toggle,
|
153 |
+
outputs=[summary_ui, collapse_languages_button],
|
154 |
+
api_name="collapse_languages_toggle",
|
155 |
+
)
|
156 |
+
collapse_output_method_button.click(
|
157 |
+
fn=collapse_output_method_toggle,
|
158 |
+
outputs=[summary_ui, collapse_output_method_button],
|
159 |
+
api_name="collapse_output_method_toggle",
|
160 |
+
)
|
161 |
+
|
162 |
+
demo.launch()
|
data
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"data":[{"alignmentgroup":"True","hovertemplate":"Model=Mistral-7B-Instruct-v0.2\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Mistral-7B-Instruct-v0.2","marker":{"color":"#636efa","pattern":{"shape":""}},"name":"Mistral-7B-Instruct-v0.2","offsetgroup":"Mistral-7B-Instruct-v0.2","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[null,null,3.174540030956268],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=Mixtral-8x7B-Instruct-v0.1\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Mixtral-8x7B-Instruct-v0.1","marker":{"color":"#EF553B","pattern":{"shape":""}},"name":"Mixtral-8x7B-Instruct-v0.1","offsetgroup":"Mixtral-8x7B-Instruct-v0.1","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[7.142197625471814],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0","marker":{"color":"#00cc96","pattern":{"shape":""}},"name":"TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0","offsetgroup":"TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[1.2902645373344421],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=chat-bison\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"chat-bison","marker":{"color":"#ab63fa","pattern":{"shape":""}},"name":"chat-bison","offsetgroup":"chat-bison","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[3.890243631601334],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=chat-bison-32k\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"chat-bison-32k","marker":{"color":"#FFA15A","pattern":{"shape":""}},"name":"chat-bison-32k","offsetgroup":"chat-bison-32k","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[4.128177767992019],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gemini-pro\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gemini-pro","marker":{"color":"#19d3f3","pattern":{"shape":""}},"name":"gemini-pro","offsetgroup":"gemini-pro","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[4.72138064004937],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gpt-3.5-turbo\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gpt-3.5-turbo","marker":{"color":"#FF6692","pattern":{"shape":""}},"name":"gpt-3.5-turbo","offsetgroup":"gpt-3.5-turbo","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[4.666725277900696,4.351089119911194,5.336839800789242],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gpt-4\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gpt-4","marker":{"color":"#B6E880","pattern":{"shape":""}},"name":"gpt-4","offsetgroup":"gpt-4","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[15.5218456586202],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gpt-4-turbo\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gpt-4-turbo","marker":{"color":"#FF97FF","pattern":{"shape":""}},"name":"gpt-4-turbo","offsetgroup":"gpt-4-turbo","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[14.008529031276703],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=llama-2-70b-chat\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"llama-2-70b-chat","marker":{"color":"#FECB52","pattern":{"shape":""}},"name":"llama-2-70b-chat","offsetgroup":"llama-2-70b-chat","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[2.1692867279052734,5.552149415016174,6.175082007679371],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=zephyr-7b-beta\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"zephyr-7b-beta","marker":{"color":"#636efa","pattern":{"shape":""}},"name":"zephyr-7b-beta","offsetgroup":"zephyr-7b-beta","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[null,null,3.2387993240356447],"yaxis":"y","type":"bar"}],"layout":{"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Time of day"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Execution Time (s)"}},"legend":{"title":{"text":"Model"},"tracegroupgap":0},"title":{"text":"Execution time for different times of the day"},"barmode":"group"}}
|
pipeline/config.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
+
# this try/except is important for publishing to Hugging Face
|
4 |
+
try:
|
5 |
+
from dagster import Config
|
6 |
+
except ImportError:
|
7 |
+
Config = object
|
8 |
+
|
9 |
+
class LLMBoardConfig(Config):
|
10 |
+
group_columns: List[str] = ["model", "language", "template_name"]
|
11 |
+
single_values_columns: List[str] = ["execution_time", "characters_count", "words_count"]
|
12 |
+
list_columns: List[str] = ["chunk_sizes", "chunk_generation_times", "chunk_generation_times_by_chunk_sizes"]
|
13 |
+
plot_dir: str = "./html/plots/"
|
14 |
+
plot_json_dir: str = "./data/"
|
15 |
+
saving_path: str = "data/"
|
16 |
+
|
17 |
+
|
18 |
+
class QueriesConfig(Config):
|
19 |
+
base_query_template: str = """Summarize me this text, the summary should be in {language}
|
20 |
+
```
|
21 |
+
{text}
|
22 |
+
```
|
23 |
+
"""
|
24 |
+
|
25 |
+
query_template: dict = {
|
26 |
+
"markdown": """Return output as markdown""",
|
27 |
+
"json": """Return output as json in format:
|
28 |
+
{
|
29 |
+
"summary": "<summary">
|
30 |
+
}""",
|
31 |
+
"call": """Return output by calling summary_result()""",
|
32 |
+
}
|
33 |
+
|
34 |
+
|
35 |
+
class OpenAIConfig(Config):
|
36 |
+
mock: bool = False
|
37 |
+
|
38 |
+
|
39 |
+
class QueriesDatasetConfig(Config):
|
40 |
+
dataset_name: str = "GEM/xlsum"
|
41 |
+
samples_per_measurement: int = 2
|
42 |
+
languages: List[str] = ["english", "japanese"]
|
43 |
+
query_config: QueriesConfig = QueriesConfig()
|
44 |
+
|
45 |
+
|
46 |
+
class SummaryConfig(Config):
|
47 |
+
saving_path: str = "data/"
|
48 |
+
|
49 |
+
|
50 |
+
class TimeOfDayComparisonConfig(Config):
|
51 |
+
saving_path: str = "data/"
|
52 |
+
|
53 |
+
|
54 |
+
class GeneralPlotConfig(Config):
|
55 |
+
plots_dir: str = "./html/plots/"
|
56 |
+
saving_path: str = "data/"
|
57 |
+
endpoint_startup_time_minutes: int = 2
|
58 |
+
endpoint_cleanup_time_minutes: int = 2
|
59 |
+
seconds_per_token: float = 184 / 6
|
60 |
+
input_size: int = 100
|
61 |
+
expected_output_size: int = 50
|
62 |
+
queries: int = 1000
|
pipeline/models.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dataclasses import dataclass
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass
|
8 |
+
class Model(object):
|
9 |
+
model_display_name: str
|
10 |
+
model_name: str
|
11 |
+
api_url: str
|
12 |
+
provider: str
|
13 |
+
hourly_cost: int = None
|
14 |
+
cost: str = None
|
15 |
+
supports_functions: str = False
|
16 |
+
size_billion_parameters: int = None # in billion paramters
|
17 |
+
cost_per_million_tokens: int = None
|
18 |
+
cost_per_million_input_tokens: int = None
|
19 |
+
cost_per_million_output_tokens: int = None
|
20 |
+
|
21 |
+
def __post_init__(self):
|
22 |
+
self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens
|
23 |
+
self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens
|
24 |
+
if not self.cost and self.hourly_cost:
|
25 |
+
self.cost = f"${self.hourly_cost} / hour"
|
26 |
+
if not self.cost and self.cost_per_million_tokens:
|
27 |
+
self.cost = f"${self.cost_per_million_tokens} / 1M tokens"
|
28 |
+
elif not self.cost and self.cost_per_million_input_tokens and self.cost_per_million_output_tokens:
|
29 |
+
self.cost = f"${self.cost_per_million_input_tokens} / 1M input tokens, ${self.cost_per_million_output_tokens} / 1M output tokens"
|
30 |
+
|
31 |
+
|
32 |
+
env = os.environ
|
33 |
+
|
34 |
+
MODELS = [
|
35 |
+
# source: https://openai.com/pricing
|
36 |
+
# converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability
|
37 |
+
Model(
|
38 |
+
"gpt-3.5-turbo",
|
39 |
+
"gpt-3.5-turbo",
|
40 |
+
None,
|
41 |
+
"OpenAI",
|
42 |
+
supports_functions=True,
|
43 |
+
cost_per_million_input_tokens=1,
|
44 |
+
cost_per_million_output_tokens=2,
|
45 |
+
),
|
46 |
+
Model(
|
47 |
+
"gpt-4-turbo",
|
48 |
+
"gpt-4-1106-preview",
|
49 |
+
None,
|
50 |
+
"OpenAI",
|
51 |
+
supports_functions=True,
|
52 |
+
cost_per_million_input_tokens=10,
|
53 |
+
cost_per_million_output_tokens=30,
|
54 |
+
),
|
55 |
+
Model(
|
56 |
+
"gpt-4",
|
57 |
+
"gpt-4",
|
58 |
+
None,
|
59 |
+
"OpenAI",
|
60 |
+
supports_functions=True,
|
61 |
+
cost_per_million_input_tokens=30,
|
62 |
+
cost_per_million_output_tokens=60,
|
63 |
+
),
|
64 |
+
# we don't test gpt-4-32k because the tasks don't reach gpt-4 limitations
|
65 |
+
Model(
|
66 |
+
"gpt-3.5-turbo",
|
67 |
+
"gpt-3.5-turbo",
|
68 |
+
None,
|
69 |
+
"OpenAI",
|
70 |
+
supports_functions=True,
|
71 |
+
cost_per_million_input_tokens=1,
|
72 |
+
cost_per_million_output_tokens=2,
|
73 |
+
),
|
74 |
+
# source: https://www.together.ai/pricing
|
75 |
+
Model(
|
76 |
+
"llama-2-70b-chat",
|
77 |
+
"together_ai/togethercomputer/llama-2-70b-chat",
|
78 |
+
None,
|
79 |
+
"Together AI",
|
80 |
+
cost_per_million_tokens=0.2,
|
81 |
+
),
|
82 |
+
Model(
|
83 |
+
"Mixtral-8x7B-Instruct-v0.1",
|
84 |
+
"together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
|
85 |
+
None,
|
86 |
+
"Together AI",
|
87 |
+
size_billion_parameters=8 * 7,
|
88 |
+
cost_per_million_tokens=0.9,
|
89 |
+
),
|
90 |
+
# taken from endpoint pages
|
91 |
+
Model(
|
92 |
+
"zephyr-7b-beta",
|
93 |
+
"huggingface/HuggingFaceH4/zephyr-7b-beta",
|
94 |
+
env["ZEPHYR_7B_BETA_URL"],
|
95 |
+
"Hugging Face Inference Endpoint",
|
96 |
+
hourly_cost=1.30,
|
97 |
+
size_billion_parameters=7,
|
98 |
+
),
|
99 |
+
Model(
|
100 |
+
"Mistral-7B-Instruct-v0.2",
|
101 |
+
"huggingface/mistralai/Mistral-7B-Instruct-v0.2",
|
102 |
+
env["MISTRAL_7B_BETA_URL"],
|
103 |
+
"Hugging Face Inference Endpoint",
|
104 |
+
hourly_cost=1.30,
|
105 |
+
size_billion_parameters=7,
|
106 |
+
),
|
107 |
+
Model(
|
108 |
+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
109 |
+
"huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
110 |
+
env["TINY_LLAMA_URL"],
|
111 |
+
"Hugging Face Inference Endpoint",
|
112 |
+
hourly_cost=0.60,
|
113 |
+
size_billion_parameters=1.1,
|
114 |
+
),
|
115 |
+
Model(
|
116 |
+
"gemini-pro",
|
117 |
+
"gemini-pro",
|
118 |
+
None,
|
119 |
+
"Google VertexAI",
|
120 |
+
# https://ai.google.dev/pricing
|
121 |
+
cost="$0.25 / 1M input characters, $0.5 / 1K output characters (60 queries per minute are free)",
|
122 |
+
cost_per_million_input_tokens=0.25,
|
123 |
+
cost_per_million_output_tokens=0.5,
|
124 |
+
),
|
125 |
+
Model(
|
126 |
+
"chat-bison",
|
127 |
+
"chat-bison",
|
128 |
+
None,
|
129 |
+
"Google VertexAI",
|
130 |
+
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
|
131 |
+
cost_per_million_input_tokens=0.25,
|
132 |
+
cost_per_million_output_tokens=0.5,
|
133 |
+
),
|
134 |
+
Model(
|
135 |
+
"chat-bison-32k",
|
136 |
+
"chat-bison-32k",
|
137 |
+
None,
|
138 |
+
"Google VertexAI",
|
139 |
+
# https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
|
140 |
+
cost_per_million_input_tokens=0.25,
|
141 |
+
cost_per_million_output_tokens=0.5,
|
142 |
+
),
|
143 |
+
]
|
144 |
+
|
145 |
+
|
146 |
+
def models_costs():
|
147 |
+
return pd.DataFrame(
|
148 |
+
[(model.model_display_name, model.provider, model.cost) for model in MODELS],
|
149 |
+
columns=["Model", "Provider", "Cost"],
|
150 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
plotly
|