piotr-szleg-bards-ai commited on
Commit
0b07a42
1 Parent(s): 38ee35d
Files changed (5) hide show
  1. app.py +162 -0
  2. data +1 -0
  3. pipeline/config.py +62 -0
  4. pipeline/models.py +150 -0
  5. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import plotly
6
+ from pandas.api.types import is_numeric_dtype
7
+
8
+ from pipeline.config import QueriesConfig, LLMBoardConfig
9
+ from pipeline.models import models_costs
10
+
11
+ README = """
12
+ <br/><h2>About this project</h2>
13
+ <p>
14
+ This project analyses different models and providers from the perspective of an application developer.
15
+ <br/>
16
+ Models are asked to summarize a text in different languages and using different output formats with following prompt:
17
+ </p>
18
+ <pre>
19
+ <code>{}</code>
20
+ </pre>
21
+ """
22
+
23
+ summary_df: pd.DataFrame = pd.read_csv("data/2024-01-25 13:30:17.207984_summary.csv")
24
+ time_of_day_comparison_df = pd.read_csv("data/2024-01-25 13:30:20.959750_time_of_day_comparison.csv")
25
+ general_plots = pd.read_csv("data/2024-01-25 12:22:00.759762_general_plot.csv")
26
+
27
+
28
+ with open("data/time_of_day_plot.json", "r") as f:
29
+ time_of_day_plot = plotly.io.from_json(f.read())
30
+
31
+ model_costs_df = models_costs()
32
+
33
+
34
+ searched_model_name = ""
35
+ collapse_languages = False
36
+ collapse_output_method = False
37
+
38
+
39
+ def filter_dataframes(input: str):
40
+ global searched_model_name
41
+ input = input.lower()
42
+ searched_model_name = input
43
+ return dataframes()
44
+
45
+
46
+ def collapse_languages_toggle():
47
+ global collapse_languages
48
+ if collapse_languages:
49
+ collapse_languages = False
50
+ button_text = "Collapse languages"
51
+ else:
52
+ collapse_languages = True
53
+ button_text = "Un-collapse languages"
54
+ return dataframes()[0], button_text
55
+
56
+
57
+ def collapse_output_method_toggle():
58
+ global collapse_output_method
59
+ if collapse_output_method:
60
+ collapse_output_method = False
61
+ button_text = "Collapse output method"
62
+ else:
63
+ collapse_output_method = True
64
+ button_text = "Un-collapse output method"
65
+ return dataframes()[0], button_text
66
+
67
+
68
+ def dataframes():
69
+ global collapse_languages, collapse_output_method, searched_model_name, summary_df, time_of_day_comparison_df, model_costs_df
70
+
71
+ summary_df_columns = summary_df.columns.to_list()
72
+ group_columns = LLMBoardConfig().group_columns.copy()
73
+ if collapse_languages:
74
+ summary_df_columns.remove("language")
75
+ group_columns.remove("language")
76
+ if collapse_output_method:
77
+ summary_df_columns.remove("template_name")
78
+ group_columns.remove("template_name")
79
+
80
+ summary_df_processed = summary_df[summary_df_columns].groupby(by=group_columns).mean().reset_index()
81
+ return (
82
+ dataframe_style(summary_df_processed[summary_df_processed.model.str.lower().str.contains(searched_model_name)]),
83
+ dataframe_style(
84
+ time_of_day_comparison_df[time_of_day_comparison_df.model.str.lower().str.contains(searched_model_name)]
85
+ ),
86
+ dataframe_style(model_costs_df[model_costs_df.Model.str.lower().str.contains(searched_model_name)]),
87
+ )
88
+
89
+
90
+ def dataframe_style(df: pd.DataFrame):
91
+ df = df.copy()
92
+ df.columns = [snake_case_to_title(column) for column in df.columns]
93
+ column_formats = {}
94
+ for column in df.columns:
95
+ if is_numeric_dtype(df[column]):
96
+ if column == "execution_time":
97
+ column_formats[column] = "{:.4f}"
98
+ else:
99
+ column_formats[column] = "{:.2f}"
100
+ df = df.style.format(column_formats, na_rep="")
101
+ return df
102
+
103
+
104
+ def snake_case_to_title(text):
105
+ # Convert snake_case to title-case
106
+ words = re.split(r"_", text)
107
+ title_words = [word.capitalize() for word in words]
108
+ return " ".join(title_words)
109
+
110
+
111
+ filter_textbox = gr.Textbox(label="Model name part")
112
+ filter_button = gr.Button("Filter dataframes by model name")
113
+ collapse_languages_button = gr.Button("Collapse languages")
114
+ collapse_output_method_button = gr.Button("Collapse output method")
115
+ last_textbox = 0
116
+
117
+ with gr.Blocks() as demo:
118
+ gr.HTML("<h1>LLM Board</h1>" + README.format(QueriesConfig().base_query_template))
119
+
120
+ with gr.Row():
121
+ filter_textbox.render()
122
+ filter_button.render()
123
+ with gr.Tab("Basic information"):
124
+ for index, row in general_plots.iterrows():
125
+ gr.Plot(plotly.io.from_json(row["plot_json"]), label=row["description"])
126
+ gr.Markdown(str(row["comment"]))
127
+ with gr.Tab("Output characteristics"):
128
+ with gr.Row():
129
+ collapse_languages_button.render()
130
+ collapse_output_method_button.render()
131
+ summary_ui = gr.DataFrame(dataframe_style(summary_df), label="Statistics")
132
+
133
+ with gr.Tab("Preformance by time of the day"):
134
+ time_of_day_comparison_ui = gr.DataFrame(time_of_day_comparison_df, label="Time of day")
135
+ time_of_day_plot_ui = gr.Plot(time_of_day_plot, label="Time of the day plot")
136
+
137
+ with gr.Tab("Costs comparison"):
138
+ models_costs_ui = gr.DataFrame(dataframe_style(models_costs()), label="Costs comparison")
139
+ filter_button.click(
140
+ fn=filter_dataframes,
141
+ inputs=filter_textbox,
142
+ outputs=[summary_ui, time_of_day_comparison_ui, models_costs_ui],
143
+ api_name="filter_dataframes",
144
+ )
145
+ filter_textbox.submit(
146
+ fn=filter_dataframes,
147
+ inputs=filter_textbox,
148
+ outputs=[summary_ui, time_of_day_comparison_ui, models_costs_ui],
149
+ api_name="filter_dataframes",
150
+ )
151
+ collapse_languages_button.click(
152
+ fn=collapse_languages_toggle,
153
+ outputs=[summary_ui, collapse_languages_button],
154
+ api_name="collapse_languages_toggle",
155
+ )
156
+ collapse_output_method_button.click(
157
+ fn=collapse_output_method_toggle,
158
+ outputs=[summary_ui, collapse_output_method_button],
159
+ api_name="collapse_output_method_toggle",
160
+ )
161
+
162
+ demo.launch()
data ADDED
@@ -0,0 +1 @@
 
 
1
+ {"data":[{"alignmentgroup":"True","hovertemplate":"Model=Mistral-7B-Instruct-v0.2\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Mistral-7B-Instruct-v0.2","marker":{"color":"#636efa","pattern":{"shape":""}},"name":"Mistral-7B-Instruct-v0.2","offsetgroup":"Mistral-7B-Instruct-v0.2","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[null,null,3.174540030956268],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=Mixtral-8x7B-Instruct-v0.1\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"Mixtral-8x7B-Instruct-v0.1","marker":{"color":"#EF553B","pattern":{"shape":""}},"name":"Mixtral-8x7B-Instruct-v0.1","offsetgroup":"Mixtral-8x7B-Instruct-v0.1","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[7.142197625471814],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0","marker":{"color":"#00cc96","pattern":{"shape":""}},"name":"TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0","offsetgroup":"TinyLlama\u002fTinyLlama-1.1B-Chat-v1.0","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[1.2902645373344421],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=chat-bison\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"chat-bison","marker":{"color":"#ab63fa","pattern":{"shape":""}},"name":"chat-bison","offsetgroup":"chat-bison","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[3.890243631601334],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=chat-bison-32k\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"chat-bison-32k","marker":{"color":"#FFA15A","pattern":{"shape":""}},"name":"chat-bison-32k","offsetgroup":"chat-bison-32k","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[4.128177767992019],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gemini-pro\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gemini-pro","marker":{"color":"#19d3f3","pattern":{"shape":""}},"name":"gemini-pro","offsetgroup":"gemini-pro","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[4.72138064004937],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gpt-3.5-turbo\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gpt-3.5-turbo","marker":{"color":"#FF6692","pattern":{"shape":""}},"name":"gpt-3.5-turbo","offsetgroup":"gpt-3.5-turbo","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[4.666725277900696,4.351089119911194,5.336839800789242],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gpt-4\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gpt-4","marker":{"color":"#B6E880","pattern":{"shape":""}},"name":"gpt-4","offsetgroup":"gpt-4","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[15.5218456586202],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=gpt-4-turbo\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"gpt-4-turbo","marker":{"color":"#FF97FF","pattern":{"shape":""}},"name":"gpt-4-turbo","offsetgroup":"gpt-4-turbo","orientation":"v","showlegend":true,"textposition":"auto","x":["morning"],"xaxis":"x","y":[14.008529031276703],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=llama-2-70b-chat\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"llama-2-70b-chat","marker":{"color":"#FECB52","pattern":{"shape":""}},"name":"llama-2-70b-chat","offsetgroup":"llama-2-70b-chat","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[2.1692867279052734,5.552149415016174,6.175082007679371],"yaxis":"y","type":"bar"},{"alignmentgroup":"True","hovertemplate":"Model=zephyr-7b-beta\u003cbr\u003eTime of day=%{x}\u003cbr\u003eExecution Time (s)=%{y}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"zephyr-7b-beta","marker":{"color":"#636efa","pattern":{"shape":""}},"name":"zephyr-7b-beta","offsetgroup":"zephyr-7b-beta","orientation":"v","showlegend":true,"textposition":"auto","x":["afternoon","evening","morning"],"xaxis":"x","y":[null,null,3.2387993240356447],"yaxis":"y","type":"bar"}],"layout":{"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Time of day"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Execution Time (s)"}},"legend":{"title":{"text":"Model"},"tracegroupgap":0},"title":{"text":"Execution time for different times of the day"},"barmode":"group"}}
pipeline/config.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ # this try/except is important for publishing to Hugging Face
4
+ try:
5
+ from dagster import Config
6
+ except ImportError:
7
+ Config = object
8
+
9
+ class LLMBoardConfig(Config):
10
+ group_columns: List[str] = ["model", "language", "template_name"]
11
+ single_values_columns: List[str] = ["execution_time", "characters_count", "words_count"]
12
+ list_columns: List[str] = ["chunk_sizes", "chunk_generation_times", "chunk_generation_times_by_chunk_sizes"]
13
+ plot_dir: str = "./html/plots/"
14
+ plot_json_dir: str = "./data/"
15
+ saving_path: str = "data/"
16
+
17
+
18
+ class QueriesConfig(Config):
19
+ base_query_template: str = """Summarize me this text, the summary should be in {language}
20
+ ```
21
+ {text}
22
+ ```
23
+ """
24
+
25
+ query_template: dict = {
26
+ "markdown": """Return output as markdown""",
27
+ "json": """Return output as json in format:
28
+ {
29
+ "summary": "<summary">
30
+ }""",
31
+ "call": """Return output by calling summary_result()""",
32
+ }
33
+
34
+
35
+ class OpenAIConfig(Config):
36
+ mock: bool = False
37
+
38
+
39
+ class QueriesDatasetConfig(Config):
40
+ dataset_name: str = "GEM/xlsum"
41
+ samples_per_measurement: int = 2
42
+ languages: List[str] = ["english", "japanese"]
43
+ query_config: QueriesConfig = QueriesConfig()
44
+
45
+
46
+ class SummaryConfig(Config):
47
+ saving_path: str = "data/"
48
+
49
+
50
+ class TimeOfDayComparisonConfig(Config):
51
+ saving_path: str = "data/"
52
+
53
+
54
+ class GeneralPlotConfig(Config):
55
+ plots_dir: str = "./html/plots/"
56
+ saving_path: str = "data/"
57
+ endpoint_startup_time_minutes: int = 2
58
+ endpoint_cleanup_time_minutes: int = 2
59
+ seconds_per_token: float = 184 / 6
60
+ input_size: int = 100
61
+ expected_output_size: int = 50
62
+ queries: int = 1000
pipeline/models.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dataclasses import dataclass
3
+
4
+ import pandas as pd
5
+
6
+
7
+ @dataclass
8
+ class Model(object):
9
+ model_display_name: str
10
+ model_name: str
11
+ api_url: str
12
+ provider: str
13
+ hourly_cost: int = None
14
+ cost: str = None
15
+ supports_functions: str = False
16
+ size_billion_parameters: int = None # in billion paramters
17
+ cost_per_million_tokens: int = None
18
+ cost_per_million_input_tokens: int = None
19
+ cost_per_million_output_tokens: int = None
20
+
21
+ def __post_init__(self):
22
+ self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens
23
+ self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens
24
+ if not self.cost and self.hourly_cost:
25
+ self.cost = f"${self.hourly_cost} / hour"
26
+ if not self.cost and self.cost_per_million_tokens:
27
+ self.cost = f"${self.cost_per_million_tokens} / 1M tokens"
28
+ elif not self.cost and self.cost_per_million_input_tokens and self.cost_per_million_output_tokens:
29
+ self.cost = f"${self.cost_per_million_input_tokens} / 1M input tokens, ${self.cost_per_million_output_tokens} / 1M output tokens"
30
+
31
+
32
+ env = os.environ
33
+
34
+ MODELS = [
35
+ # source: https://openai.com/pricing
36
+ # converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability
37
+ Model(
38
+ "gpt-3.5-turbo",
39
+ "gpt-3.5-turbo",
40
+ None,
41
+ "OpenAI",
42
+ supports_functions=True,
43
+ cost_per_million_input_tokens=1,
44
+ cost_per_million_output_tokens=2,
45
+ ),
46
+ Model(
47
+ "gpt-4-turbo",
48
+ "gpt-4-1106-preview",
49
+ None,
50
+ "OpenAI",
51
+ supports_functions=True,
52
+ cost_per_million_input_tokens=10,
53
+ cost_per_million_output_tokens=30,
54
+ ),
55
+ Model(
56
+ "gpt-4",
57
+ "gpt-4",
58
+ None,
59
+ "OpenAI",
60
+ supports_functions=True,
61
+ cost_per_million_input_tokens=30,
62
+ cost_per_million_output_tokens=60,
63
+ ),
64
+ # we don't test gpt-4-32k because the tasks don't reach gpt-4 limitations
65
+ Model(
66
+ "gpt-3.5-turbo",
67
+ "gpt-3.5-turbo",
68
+ None,
69
+ "OpenAI",
70
+ supports_functions=True,
71
+ cost_per_million_input_tokens=1,
72
+ cost_per_million_output_tokens=2,
73
+ ),
74
+ # source: https://www.together.ai/pricing
75
+ Model(
76
+ "llama-2-70b-chat",
77
+ "together_ai/togethercomputer/llama-2-70b-chat",
78
+ None,
79
+ "Together AI",
80
+ cost_per_million_tokens=0.2,
81
+ ),
82
+ Model(
83
+ "Mixtral-8x7B-Instruct-v0.1",
84
+ "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
85
+ None,
86
+ "Together AI",
87
+ size_billion_parameters=8 * 7,
88
+ cost_per_million_tokens=0.9,
89
+ ),
90
+ # taken from endpoint pages
91
+ Model(
92
+ "zephyr-7b-beta",
93
+ "huggingface/HuggingFaceH4/zephyr-7b-beta",
94
+ env["ZEPHYR_7B_BETA_URL"],
95
+ "Hugging Face Inference Endpoint",
96
+ hourly_cost=1.30,
97
+ size_billion_parameters=7,
98
+ ),
99
+ Model(
100
+ "Mistral-7B-Instruct-v0.2",
101
+ "huggingface/mistralai/Mistral-7B-Instruct-v0.2",
102
+ env["MISTRAL_7B_BETA_URL"],
103
+ "Hugging Face Inference Endpoint",
104
+ hourly_cost=1.30,
105
+ size_billion_parameters=7,
106
+ ),
107
+ Model(
108
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
109
+ "huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
110
+ env["TINY_LLAMA_URL"],
111
+ "Hugging Face Inference Endpoint",
112
+ hourly_cost=0.60,
113
+ size_billion_parameters=1.1,
114
+ ),
115
+ Model(
116
+ "gemini-pro",
117
+ "gemini-pro",
118
+ None,
119
+ "Google VertexAI",
120
+ # https://ai.google.dev/pricing
121
+ cost="$0.25 / 1M input characters, $0.5 / 1K output characters (60 queries per minute are free)",
122
+ cost_per_million_input_tokens=0.25,
123
+ cost_per_million_output_tokens=0.5,
124
+ ),
125
+ Model(
126
+ "chat-bison",
127
+ "chat-bison",
128
+ None,
129
+ "Google VertexAI",
130
+ # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
131
+ cost_per_million_input_tokens=0.25,
132
+ cost_per_million_output_tokens=0.5,
133
+ ),
134
+ Model(
135
+ "chat-bison-32k",
136
+ "chat-bison-32k",
137
+ None,
138
+ "Google VertexAI",
139
+ # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
140
+ cost_per_million_input_tokens=0.25,
141
+ cost_per_million_output_tokens=0.5,
142
+ ),
143
+ ]
144
+
145
+
146
+ def models_costs():
147
+ return pd.DataFrame(
148
+ [(model.model_display_name, model.provider, model.cost) for model in MODELS],
149
+ columns=["Model", "Provider", "Cost"],
150
+ )
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pandas
2
+ plotly