noumanjavaid commited on
Commit
7945c9d
·
verified ·
1 Parent(s): dcbcd99

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +732 -0
app.py ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ import numpy as np
6
+ from datetime import datetime
7
+
8
+ # Page configuration
9
+ st.set_page_config(
10
+ page_title="GPT-4o mini Pricing Calculator",
11
+ page_icon="🤖",
12
+ layout="wide",
13
+ initial_sidebar_state="expanded"
14
+ )
15
+
16
+ # Custom styling
17
+ st.markdown("""
18
+ <style>
19
+ .main {
20
+ background-color: #f9f9f9;
21
+ padding: 1rem;
22
+ }
23
+ .title-container {
24
+ background-color: #f0f2f6;
25
+ padding: 1rem;
26
+ border-radius: 10px;
27
+ margin-bottom: 1rem;
28
+ }
29
+ .metric-container {
30
+ background-color: white;
31
+ padding: 1rem;
32
+ border-radius: 10px;
33
+ box-shadow: 0 2px 5px rgba(0,0,0,0.1);
34
+ margin-bottom: 1rem;
35
+ }
36
+ .sub-header {
37
+ font-weight: bold;
38
+ color: #3366CC;
39
+ margin-bottom: 0.5rem;
40
+ }
41
+ .footer {
42
+ text-align: center;
43
+ margin-top: 2rem;
44
+ font-size: 0.8rem;
45
+ color: #666;
46
+ }
47
+ .stTabs [data-baseweb="tab-list"] {
48
+ gap: 24px;
49
+ }
50
+ .stTabs [data-baseweb="tab"] {
51
+ height: 50px;
52
+ white-space: pre-wrap;
53
+ background-color: #f9f9f9;
54
+ border-radius: 4px 4px 0px 0px;
55
+ padding: 10px;
56
+ }
57
+ .stTabs [aria-selected="true"] {
58
+ background-color: #3366CC;
59
+ color: white;
60
+ }
61
+ </style>
62
+ """, unsafe_allow_html=True)
63
+
64
+ # Title section
65
+ st.markdown('<div class="title-container">', unsafe_allow_html=True)
66
+ st.title("GPT-4o mini Pricing Calculator")
67
+ st.markdown("Interactive cost analysis for text and audio processing using GPT-4o mini")
68
+ st.markdown('</div>', unsafe_allow_html=True)
69
+
70
+ # Sidebar navigation
71
+ st.sidebar.title("Navigation")
72
+ page = st.sidebar.radio("Select Page", ["Dashboard", "Text Analysis", "Audio Analysis", "Comparative Analysis", "Cost Calculator", "Documentation"])
73
+
74
+ # GPT-4o mini pricing constants
75
+ TEXT_INPUT_PRICE = 0.60 # $ per 1M tokens
76
+ TEXT_OUTPUT_PRICE = 2.40 # $ per 1M tokens
77
+ TEXT_CACHED_PRICE = 0.30 # $ per 1M tokens
78
+
79
+ AUDIO_INPUT_PRICE = 10.00 # $ per 1M tokens
80
+ AUDIO_OUTPUT_PRICE = 20.00 # $ per 1M tokens
81
+ AUDIO_CACHED_PRICE = 0.30 # $ per 1M tokens
82
+
83
+ # Helper functions for calculations
84
+ def calculate_text_costs(users, msgs_per_user, input_tokens, output_tokens, cached_pct=0):
85
+ # Calculate total token counts
86
+ total_input_tokens = users * msgs_per_user * input_tokens
87
+ total_output_tokens = users * msgs_per_user * output_tokens
88
+
89
+ # Apply caching
90
+ cached_input_tokens = total_input_tokens * (cached_pct / 100)
91
+ standard_input_tokens = total_input_tokens - cached_input_tokens
92
+
93
+ # Calculate costs
94
+ input_cost = (standard_input_tokens * TEXT_INPUT_PRICE / 1000000) + (cached_input_tokens * TEXT_CACHED_PRICE / 1000000)
95
+ output_cost = total_output_tokens * TEXT_OUTPUT_PRICE / 1000000
96
+ total_cost = input_cost + output_cost
97
+
98
+ return {
99
+ 'input_tokens': total_input_tokens,
100
+ 'output_tokens': total_output_tokens,
101
+ 'input_cost': input_cost,
102
+ 'output_cost': output_cost,
103
+ 'total_cost': total_cost,
104
+ 'cost_per_message': total_cost / (users * msgs_per_user) if users * msgs_per_user > 0 else 0
105
+ }
106
+
107
+ def calculate_audio_costs(users, audio_minutes, tokens_per_sec=600, output_ratio=0.05, cached_pct=0):
108
+ # Calculate token counts
109
+ seconds = audio_minutes * 60
110
+ total_input_tokens = users * seconds * tokens_per_sec
111
+ total_output_tokens = total_input_tokens * output_ratio
112
+
113
+ # Apply caching
114
+ cached_input_tokens = total_input_tokens * (cached_pct / 100)
115
+ standard_input_tokens = total_input_tokens - cached_input_tokens
116
+
117
+ # Calculate costs
118
+ input_cost = (standard_input_tokens * AUDIO_INPUT_PRICE / 1000000) + (cached_input_tokens * AUDIO_CACHED_PRICE / 1000000)
119
+ output_cost = total_output_tokens * AUDIO_OUTPUT_PRICE / 1000000
120
+ total_cost = input_cost + output_cost
121
+
122
+ return {
123
+ 'input_tokens': total_input_tokens,
124
+ 'output_tokens': total_output_tokens,
125
+ 'input_cost': input_cost,
126
+ 'output_cost': output_cost,
127
+ 'total_cost': total_cost,
128
+ 'cost_per_minute': total_cost / audio_minutes if audio_minutes > 0 else 0
129
+ }
130
+
131
+ # Dashboard page
132
+ if page == "Dashboard":
133
+ # Key metrics overview
134
+ st.header("GPT-4o mini Pricing Overview")
135
+ col1, col2, col3 = st.columns(3)
136
+
137
+ with col1:
138
+ st.markdown('<div class="metric-container">', unsafe_allow_html=True)
139
+ st.markdown('<p class="sub-header">Text Processing</p>', unsafe_allow_html=True)
140
+ st.metric("Input Cost", f"${TEXT_INPUT_PRICE:.2f}/1M tokens")
141
+ st.metric("Output Cost", f"${TEXT_OUTPUT_PRICE:.2f}/1M tokens")
142
+ st.metric("Cached Input", f"${TEXT_CACHED_PRICE:.2f}/1M tokens")
143
+ st.markdown('</div>', unsafe_allow_html=True)
144
+
145
+ with col2:
146
+ st.markdown('<div class="metric-container">', unsafe_allow_html=True)
147
+ st.markdown('<p class="sub-header">Audio Processing</p>', unsafe_allow_html=True)
148
+ st.metric("Input Cost", f"${AUDIO_INPUT_PRICE:.2f}/1M tokens")
149
+ st.metric("Output Cost", f"${AUDIO_OUTPUT_PRICE:.2f}/1M tokens")
150
+ st.metric("Cached Input", f"${AUDIO_CACHED_PRICE:.2f}/1M tokens")
151
+ st.markdown('</div>', unsafe_allow_html=True)
152
+
153
+ with col3:
154
+ st.markdown('<div class="metric-container">', unsafe_allow_html=True)
155
+ st.markdown('<p class="sub-header">Average Costs</p>', unsafe_allow_html=True)
156
+
157
+ # Calculate example costs
158
+ text_example = calculate_text_costs(1, 100, 15, 20)
159
+ audio_example = calculate_audio_costs(1, 10)
160
+
161
+ st.metric("Avg Text Cost/Message", f"${text_example['cost_per_message']:.6f}")
162
+ st.metric("Avg Audio Cost/Minute", f"${audio_example['cost_per_minute']:.4f}")
163
+ st.metric("Audio/Text Cost Ratio", f"{audio_example['cost_per_minute'] / (text_example['cost_per_message'] * 60):.1f}x")
164
+ st.markdown('</div>', unsafe_allow_html=True)
165
+
166
+ # Quick comparison chart
167
+ st.subheader("Cost Comparison: Text vs. Audio")
168
+
169
+ comparison_data = pd.DataFrame({
170
+ 'Model Type': ['Text', 'Text', 'Audio', 'Audio'],
171
+ 'Cost Component': ['Input', 'Output', 'Input', 'Output'],
172
+ 'Cost per 1M Tokens': [TEXT_INPUT_PRICE, TEXT_OUTPUT_PRICE, AUDIO_INPUT_PRICE, AUDIO_OUTPUT_PRICE]
173
+ })
174
+
175
+ fig = px.bar(comparison_data, x='Model Type', y='Cost per 1M Tokens', color='Cost Component',
176
+ barmode='group', title="Cost Comparison per 1M Tokens",
177
+ color_discrete_sequence=["#3366CC", "#FF9900"])
178
+ fig.update_layout(yaxis_title="Cost ($)")
179
+ st.plotly_chart(fig, use_container_width=True)
180
+
181
+ # Usage scenarios
182
+ st.subheader("Common Usage Scenarios")
183
+ scenarios = pd.DataFrame({
184
+ 'Scenario': ['Customer Support Chat', 'Document Analysis', 'Meeting Transcription', 'Podcast Analysis', 'Phone Call Analysis'],
185
+ 'Type': ['Text', 'Text', 'Audio', 'Audio', 'Audio'],
186
+ 'Avg Monthly Cost': [10.50, 25.75, 185.00, 740.00, 370.00],
187
+ 'Suitable Plan': ['Basic', 'Basic', 'Premium', 'Enterprise', 'Premium']
188
+ })
189
+
190
+ st.dataframe(scenarios, use_container_width=True)
191
+
192
+ # Text Model Analysis
193
+ elif page == "Text Analysis":
194
+ st.header("GPT-4o mini Text Model Analysis")
195
+
196
+ st.info(f"""
197
+ **Text Model Pricing**:
198
+ - Input: ${TEXT_INPUT_PRICE:.2f} per 1M tokens
199
+ - Output: ${TEXT_OUTPUT_PRICE:.2f} per 1M tokens
200
+ - Cached Input: ${TEXT_CACHED_PRICE:.2f} per 1M tokens
201
+ """)
202
+
203
+ # Parameters section with input widgets
204
+ st.subheader("Usage Parameters")
205
+
206
+ col1, col2 = st.columns(2)
207
+ with col1:
208
+ users = st.number_input("Number of Users", min_value=100, value=5000, step=100)
209
+ free_pct = st.slider("% Free Tier Users", min_value=0, max_value=100, value=80)
210
+ basic_pct = st.slider("% Basic Tier Users (\$12.99)", min_value=0, max_value=100, value=15)
211
+ pro_pct = st.slider("% Pro Tier Users (\$24.99)", min_value=0, max_value=100, value=5)
212
+
213
+ with col2:
214
+ msgs_per_user_free = st.number_input("Free Tier Messages/Month", min_value=10, value=100, step=10)
215
+ msgs_per_user_basic = st.number_input("Basic Tier Messages/Month", min_value=10, value=300, step=10)
216
+ msgs_per_user_pro = st.number_input("Pro Tier Messages/Month", min_value=10, value=500, step=10)
217
+ input_tokens = st.slider("Input Tokens per Message", min_value=5, max_value=100, value=15)
218
+ output_tokens = st.slider("Output Tokens per Message", min_value=5, max_value=100, value=20)
219
+ cached_pct = st.slider("% Cached Input Tokens", min_value=0, max_value=100, value=0)
220
+
221
+ # Calculate user distribution
222
+ total_pct = free_pct + basic_pct + pro_pct
223
+
224
+ if total_pct != 100:
225
+ st.warning(f"Tier percentages sum to {total_pct}%. Please adjust to equal 100%.")
226
+
227
+ free_users = int(users * free_pct / 100)
228
+ basic_users = int(users * basic_pct / 100)
229
+ pro_users = int(users * pro_pct / 100)
230
+
231
+ # Token cost calculations
232
+ free_costs = calculate_text_costs(free_users, msgs_per_user_free, input_tokens, output_tokens, cached_pct)
233
+ basic_costs = calculate_text_costs(basic_users, msgs_per_user_basic, input_tokens, output_tokens, cached_pct)
234
+ pro_costs = calculate_text_costs(pro_users, msgs_per_user_pro, input_tokens, output_tokens, cached_pct)
235
+
236
+ # Calculate revenue
237
+ free_revenue = 0
238
+ basic_revenue = basic_users * 12.99
239
+ pro_revenue = pro_users * 24.99
240
+ total_revenue = free_revenue + basic_revenue + pro_revenue
241
+ total_cost = free_costs['total_cost'] + basic_costs['total_cost'] + pro_costs['total_cost']
242
+
243
+ # Display metrics
244
+ st.subheader("Cost Analysis")
245
+
246
+ col1, col2, col3 = st.columns(3)
247
+
248
+ with col1:
249
+ st.metric("Total Monthly Cost", f"${total_cost:.2f}")
250
+ st.metric("Total Monthly Revenue", f"${total_revenue:.2f}")
251
+
252
+ with col2:
253
+ profit = total_revenue - total_cost
254
+ margin = (profit / total_revenue * 100) if total_revenue > 0 else 0
255
+ st.metric("Monthly Profit", f"${profit:.2f}")
256
+ st.metric("Profit Margin", f"{margin:.1f}%")
257
+
258
+ with col3:
259
+ avg_cost_per_user = total_cost / users if users > 0 else 0
260
+ st.metric("Avg. Cost per User", f"${avg_cost_per_user:.4f}")
261
+ st.metric("Total Messages/Month", f"{free_users * msgs_per_user_free + basic_users * msgs_per_user_basic + pro_users * msgs_per_user_pro:,}")
262
+
263
+ # Create visualizations
264
+ st.subheader("Cost Distribution")
265
+
266
+ # Cost breakdown by tier
267
+ tier_costs = pd.DataFrame({
268
+ 'Tier': ['Free', 'Basic', 'Pro'],
269
+ 'Cost': [free_costs['total_cost'], basic_costs['total_cost'], pro_costs['total_cost']],
270
+ 'Users': [free_users, basic_users, pro_users]
271
+ })
272
+
273
+ col1, col2 = st.columns(2)
274
+
275
+ with col1:
276
+ fig = px.pie(tier_costs, values='Cost', names='Tier', title="Cost Distribution by Tier",
277
+ color_discrete_sequence=px.colors.qualitative.Plotly)
278
+ st.plotly_chart(fig, use_container_width=True)
279
+
280
+ with col2:
281
+ # Create revenue vs cost comparison
282
+ comparison_data = pd.DataFrame({
283
+ 'Tier': ['Free', 'Basic', 'Pro'],
284
+ 'Revenue': [free_revenue, basic_revenue, pro_revenue],
285
+ 'Cost': [free_costs['total_cost'], basic_costs['total_cost'], pro_costs['total_cost']]
286
+ })
287
+
288
+ fig = px.bar(comparison_data, x='Tier', y=['Revenue', 'Cost'], barmode='group',
289
+ title="Revenue vs Cost by Tier",
290
+ color_discrete_sequence=["#3366CC", "#FF9900"])
291
+ st.plotly_chart(fig, use_container_width=True)
292
+
293
+ # Token usage breakdown
294
+ st.subheader("Token Usage Analysis")
295
+
296
+ token_data = pd.DataFrame({
297
+ 'Tier': ['Free', 'Basic', 'Pro'],
298
+ 'Input Tokens (M)': [free_costs['input_tokens']/1000000, basic_costs['input_tokens']/1000000, pro_costs['input_tokens']/1000000],
299
+ 'Output Tokens (M)': [free_costs['output_tokens']/1000000, basic_costs['output_tokens']/1000000, pro_costs['output_tokens']/1000000]
300
+ })
301
+
302
+ fig = px.bar(token_data, x='Tier', y=['Input Tokens (M)', 'Output Tokens (M)'], barmode='group',
303
+ title="Monthly Token Usage by Tier (Millions)",
304
+ color_discrete_sequence=["#4CAF50", "#2196F3"])
305
+ st.plotly_chart(fig, use_container_width=True)
306
+
307
+ # Break-even analysis
308
+ st.subheader("Break-even Analysis")
309
+
310
+ # Calculate fixed costs (assumed)
311
+ fixed_costs = 2000
312
+
313
+ # Calculate contribution margin per user type
314
+ cm_basic = 12.99 - (basic_costs['total_cost'] / basic_users if basic_users > 0 else 0)
315
+ cm_pro = 24.99 - (pro_costs['total_cost'] / pro_users if pro_users > 0 else 0)
316
+
317
+ # Calculate break-even point
318
+ total_cm = (cm_basic * basic_users) + (cm_pro * pro_users)
319
+ break_even_users = int(fixed_costs / (total_cm / (basic_users + pro_users))) if basic_users + pro_users > 0 else 0
320
+
321
+ col1, col2 = st.columns(2)
322
+
323
+ with col1:
324
+ st.metric("Fixed Monthly Costs", f"${fixed_costs:.2f}")
325
+ st.metric("Contribution Margin (Basic)", f"${cm_basic:.2f}/user")
326
+ st.metric("Contribution Margin (Pro)", f"${cm_pro:.2f}/user")
327
+
328
+ with col2:
329
+ st.metric("Break-even Point", f"{break_even_users:,} paid users")
330
+ be_conversion = break_even_users / (users * (basic_pct + pro_pct) / 100) if users * (basic_pct + pro_pct) / 100 > 0 else 0
331
+ st.metric("Required Conversion Rate", f"{be_conversion:.1%}")
332
+
333
+ # Audio Model Analysis
334
+ elif page == "Audio Analysis":
335
+ st.header("GPT-4o mini Audio Model Analysis")
336
+
337
+ st.info(f"""
338
+ **Audio Model Pricing**:
339
+ - Input: ${AUDIO_INPUT_PRICE:.2f} per 1M tokens
340
+ - Output: ${AUDIO_OUTPUT_PRICE:.2f} per 1M tokens
341
+ - Cached Input: ${AUDIO_CACHED_PRICE:.2f} per 1M tokens
342
+ """)
343
+
344
+ # Audio model parameters
345
+ st.subheader("Audio Processing Parameters")
346
+
347
+ col1, col2 = st.columns(2)
348
+ with col1:
349
+ audio_minutes = st.number_input("Average Minutes of Audio/Month/User", min_value=1, value=10, step=1)
350
+ tokens_per_sec = st.number_input("Audio Tokens per Second", min_value=100, value=600, step=10)
351
+ users = st.number_input("Number of Users", min_value=10, value=1000, step=10)
352
+
353
+ with col2:
354
+ output_tokens_ratio = st.slider("Output:Input Token Ratio", min_value=0.01, max_value=0.20, value=0.05, step=0.01)
355
+ cached_ratio = st.slider("% Input Tokens Cached", min_value=0, max_value=100, value=20)
356
+ pricing_tier = st.selectbox("Pricing Model", ["B2C App (\$12.99/month)",
357
+ "B2B Service (\$299/month)",
358
+ "Enterprise (\$2500/month)"])
359
+
360
+ # Calculate costs
361
+ audio_costs = calculate_audio_costs(users, audio_minutes, tokens_per_sec, output_tokens_ratio, cached_ratio)
362
+
363
+ # Pricing model revenue
364
+ if pricing_tier == "B2C App (\$12.99/month)":
365
+ price_per_user = 12.99
366
+ elif pricing_tier == "B2B Service (\$299/month)":
367
+ price_per_user = 299
368
+ else: # Enterprise
369
+ price_per_user = 2500
370
+
371
+ revenue = users * price_per_user
372
+ cost_per_user = audio_costs['total_cost'] / users if users > 0 else 0
373
+
374
+ profit = revenue - audio_costs['total_cost']
375
+ margin = (profit / revenue) * 100 if revenue > 0 else 0
376
+
377
+ # Display metrics and charts
378
+ st.subheader("Cost Metrics")
379
+
380
+ col1, col2, col3 = st.columns(3)
381
+ with col1:
382
+ st.metric("Cost per Minute", f"${(audio_costs['total_cost']/audio_minutes/users):.4f}")
383
+ st.metric("Total Monthly Cost", f"${audio_costs['total_cost']:.2f}")
384
+ with col2:
385
+ st.metric("Monthly Revenue", f"${revenue:.2f}")
386
+ st.metric("Monthly Profit", f"${profit:.2f}")
387
+ with col3:
388
+ st.metric("Profit Margin", f"{margin:.1f}%")
389
+ st.metric("Cost per User", f"${cost_per_user:.2f}")
390
+
391
+ # Visualization - Cost breakdown
392
+ st.subheader("Cost Breakdown")
393
+
394
+ # Calculate components
395
+ standard_input_cost = audio_costs['input_tokens'] * (1 - cached_ratio/100) * AUDIO_INPUT_PRICE / 1000000
396
+ cached_input_cost = audio_costs['input_tokens'] * (cached_ratio/100) * AUDIO_CACHED_PRICE / 1000000
397
+ output_cost = audio_costs['output_cost']
398
+
399
+ cost_components = pd.DataFrame({
400
+ 'Component': ['Standard Input Cost', 'Cached Input Cost', 'Output Cost'],
401
+ 'Cost': [standard_input_cost, cached_input_cost, output_cost]
402
+ })
403
+
404
+ col1, col2 = st.columns(2)
405
+
406
+ with col1:
407
+ fig = px.pie(cost_components, values='Cost', names='Component', title="Audio Processing Cost Distribution",
408
+ color_discrete_sequence=px.colors.qualitative.Pastel)
409
+ st.plotly_chart(fig, use_container_width=True)
410
+
411
+ with col2:
412
+ fig = px.bar(cost_components, x='Component', y='Cost', title="Cost Component Comparison",
413
+ color_discrete_sequence=["#4CAF50", "#2196F3", "#FF9800"])
414
+ st.plotly_chart(fig, use_container_width=True)
415
+
416
+ # Caching impact analysis
417
+ st.subheader("Impact of Caching on Costs")
418
+
419
+ cache_options = [0, 20, 40, 60, 80, 100]
420
+ cache_costs = []
421
+
422
+ for cache_pct in cache_options:
423
+ cache_result = calculate_audio_costs(users, audio_minutes, tokens_per_sec, output_tokens_ratio, cache_pct)
424
+ cache_costs.append(cache_result['total_cost'])
425
+
426
+ cache_data = pd.DataFrame({
427
+ 'Cache Percentage': cache_options,
428
+ 'Total Cost': cache_costs,
429
+ 'Savings': [audio_costs['total_cost'] - cost for cost in cache_costs],
430
+ 'Savings Percentage': [(audio_costs['total_cost'] - cost) / audio_costs['total_cost'] * 100 if audio_costs['total_cost'] > 0 else 0 for cost in cache_costs]
431
+ })
432
+
433
+ fig = px.line(cache_data, x='Cache Percentage', y='Total Cost', markers=True,
434
+ title="Effect of Caching on Total Cost",
435
+ labels={'Cache Percentage': 'Cached Input Tokens (%)', 'Total Cost': 'Total Cost ($)'},
436
+ color_discrete_sequence=["#FF5722"])
437
+ st.plotly_chart(fig, use_container_width=True)
438
+
439
+ # Optimization recommendations
440
+ if margin < 50:
441
+ st.warning("Warning: Low profit margin detected. Consider optimization strategies below.")
442
+
443
+ with st.expander("📈 Cost Optimization Strategies"):
444
+ st.markdown("""
445
+ 1. **Increase Caching**: Boost cached input ratio to reduce costs by up to 97%
446
+ 2. **Hybrid Processing**: Use specialized audio services for initial transcription
447
+ 3. **Input Token Optimization**: Filter silence and implement smart chunking
448
+ 4. **Tiered Processing**: Apply different processing depths based on user needs
449
+ """)
450
+
451
+ # Calculate hybrid model savings
452
+ hybrid_cost = (audio_costs['input_tokens'] * 0.006 / 1000000) + (audio_costs['output_tokens'] * TEXT_OUTPUT_PRICE / 1000000)
453
+ hybrid_savings = audio_costs['total_cost'] - hybrid_cost
454
+ hybrid_savings_pct = (hybrid_savings / audio_costs['total_cost']) * 100 if audio_costs['total_cost'] > 0 else 0
455
+
456
+ st.info(f"""
457
+ **Hybrid Model Potential Savings**: ${hybrid_savings:.2f} ({hybrid_savings_pct:.1f}%)
458
+
459
+ By using specialized transcription services (like Whisper) at \$0.006/min and processing the resulting text with GPT-4o mini text pricing.
460
+ """)
461
+
462
+ # Comparative Analysis
463
+ elif page == "Comparative Analysis":
464
+ st.header("Text vs. Audio Comparative Analysis")
465
+
466
+ # Cost comparison by use case
467
+ st.subheader("Cost Analysis by Use Case")
468
+
469
+ use_cases = pd.DataFrame({
470
+ 'Use Case': ['Customer Service', 'Content Creation', 'Data Analysis', 'Meeting Transcription'],
471
+ 'Text Cost ($)': [0.05, 0.12, 0.08, 0.15],
472
+ 'Audio Cost ($)': [1.85, 4.20, 2.10, 11.10],
473
+ 'Cost Ratio': [37, 35, 26, 74],
474
+ 'Recommended Model': ['Text', 'Text', 'Text', 'Hybrid']
475
+ })
476
+
477
+ st.dataframe(use_cases, use_container_width=True)
478
+
479
+ # Cost scaling visualization
480
+ st.subheader("Cost Scaling with User Count")
481
+
482
+ # Toggle for linear/log scale
483
+ scale_type = st.radio("Scale Type", ["Linear", "Logarithmic"], horizontal=True)
484
+
485
+ # Generate data for comparison
486
+ users_range = [100, 500, 1000, 5000, 10000, 50000, 100000]
487
+
488
+ text_costs = [users * 0.0001 * 300 for users in users_range] # 300 msgs avg
489
+ audio_costs = [users * 0.37 * 10 for users in users_range] # 10 minutes avg
490
+
491
+ scaling_data = pd.DataFrame({
492
+ 'Users': users_range,
493
+ 'Text Processing Cost': text_costs,
494
+ 'Audio Processing Cost': audio_costs
495
+ })
496
+
497
+ # Create the chart
498
+ fig = px.line(scaling_data, x='Users', y=['Text Processing Cost', 'Audio Processing Cost'],
499
+ markers=True, title="Cost Scaling by User Count",
500
+ color_discrete_sequence=["#3366CC", "#FF9900"])
501
+
502
+ if scale_type == "Logarithmic":
503
+ fig.update_layout(yaxis_type="log")
504
+
505
+ st.plotly_chart(fig, use_container_width=True)
506
+
507
+ # Break-even analysis
508
+ st.subheader("Break-even Analysis")
509
+
510
+ col1, col2 = st.columns(2)
511
+
512
+ with col1:
513
+ monthly_subscription = st.slider("Monthly Subscription ($)",
514
+ min_value=5.0, max_value=50.0, value=12.99, step=0.99)
515
+ text_usage = st.slider("Avg. Messages per User",
516
+ min_value=50, max_value=1000, value=300, step=50)
517
+
518
+ with col2:
519
+ audio_mins = st.slider("Avg. Audio Minutes per User",
520
+ min_value=1, max_value=60, value=10, step=1)
521
+ fixed_costs = st.number_input("Monthly Fixed Costs ($)",
522
+ min_value=0, value=2000, step=100)
523
+
524
+ # Calculate break-even points
525
+ text_cost_per_user = calculate_text_costs(1, text_usage, 15, 20)['total_cost']
526
+ audio_cost_per_user = calculate_audio_costs(1, audio_mins)['total_cost']
527
+
528
+ text_contribution = monthly_subscription - text_cost_per_user
529
+ audio_contribution = monthly_subscription - audio_cost_per_user
530
+
531
+ text_break_even = fixed_costs / text_contribution if text_contribution > 0 else float('inf')
532
+ audio_break_even = fixed_costs / audio_contribution if audio_contribution > 0 else float('inf')
533
+
534
+ # Display break-even metrics
535
+ col1, col2 = st.columns(2)
536
+ with col1:
537
+ st.metric("Text Break-even Users", f"{int(text_break_even)}")
538
+ st.metric("Text Margin per User",
539
+ f"${text_contribution:.2f} ({text_contribution/monthly_subscription*100:.1f}%)")
540
+
541
+ with col2:
542
+ st.metric("Audio Break-even Users", f"{int(audio_break_even)}")
543
+ st.metric("Audio Margin per User",
544
+ f"${audio_contribution:.2f} ({audio_contribution/monthly_subscription*100:.1f}%)")
545
+
546
+ # Create a combined visualization
547
+ st.subheader("Profit Analysis")
548
+
549
+ user_counts = list(range(0, 10001, 500))
550
+ text_profits = [(monthly_subscription - text_cost_per_user) * users - fixed_costs for users in user_counts]
551
+ audio_profits = [(monthly_subscription - audio_cost_per_user) * users - fixed_costs for users in user_counts]
552
+
553
+ profit_data = pd.DataFrame({
554
+ 'Users': user_counts,
555
+ 'Text Profit': text_profits,
556
+ 'Audio Profit': audio_profits
557
+ })
558
+
559
+ fig = px.line(profit_data, x='Users', y=['Text Profit', 'Audio Profit'],
560
+ title="Profit by User Count",
561
+ labels={'value': 'Profit ($)', 'Users': 'Number of Users'},
562
+ color_discrete_sequence=["#3366CC", "#FF9900"])
563
+
564
+ fig.add_hline(y=0, line_dash="dash", line_color="red")
565
+ st.plotly_chart(fig, use_container_width=True)
566
+
567
+ # Business model recommendations
568
+ st.subheader("Business Model Recommendations")
569
+
570
+ if audio_cost_per_user > monthly_subscription:
571
+ st.warning(f"""
572
+ ⚠️ Audio processing costs (${audio_cost_per_user:.2f}/user) exceed subscription price (${monthly_subscription:.2f}).
573
+ Consider increasing subscription price or implementing usage limits for audio features.
574
+ """)
575
+
576
+ recommended_model = "Text-Only" if text_contribution > audio_contribution else "Hybrid"
577
+
578
+ st.success(f"""
579
+ ✅ Recommended Business Model: **{recommended_model}**
580
+
581
+ Based on your inputs, a {'text-focused approach' if recommended_model == 'Text-Only' else 'hybrid approach with limited audio processing'}
582
+ would maximize profitability while maintaining competitive pricing.
583
+ """)
584
+
585
+ # Cost Calculator
586
+ elif page == "Cost Calculator":
587
+ st.header("Interactive Cost Calculator")
588
+
589
+ # Choose model type
590
+ model_type = st.selectbox("Select Model Type", ["Text", "Audio", "Hybrid"])
591
+
592
+ if model_type == "Text":
593
+ st.subheader("GPT-4o mini Text Model Calculator")
594
+
595
+ # Input parameters
596
+ col1, col2 = st.columns(2)
597
+ with col1:
598
+ total_users = st.number_input("Total Users", min_value=1, value=1000, step=100)
599
+ msgs_per_user = st.number_input("Monthly Messages per User", min_value=1, value=300, step=10)
600
+ input_tokens = st.number_input("Avg. Input Tokens per Message", min_value=1, value=15, step=1)
601
+ output_tokens = st.number_input("Avg. Output Tokens per Message", min_value=1, value=20, step=1)
602
+
603
+ with col2:
604
+ subscription_price = st.number_input("Monthly Subscription Price ($)",
605
+ min_value=0.0, value=12.99, step=0.99)
606
+ free_tier_ratio = st.slider("Free:Paid User Ratio",
607
+ min_value=0.0, max_value=20.0, value=9.0, step=0.1)
608
+ cached_pct = st.slider("% Cached Input",
609
+ min_value=0, max_value=100, value=0, step=5)
610
+
611
+ # Calculate values
612
+ free_users = int(total_users * (free_tier_ratio / (free_tier_ratio + 1)))
613
+ paid_users = total_users - free_users
614
+
615
+ # Free tier calculations
616
+ free_msgs_limit = 100 # Free tier message limit
617
+ free_total_msgs = free_users * min(msgs_per_user, free_msgs_limit)
618
+
619
+ # Token calculations
620
+ free_costs = calculate_text_costs(free_users, min(msgs_per_user, free_msgs_limit), input_tokens, output_tokens, cached_pct)
621
+ paid_costs = calculate_text_costs(paid_users, msgs_per_user, input_tokens, output_tokens, cached_pct)
622
+
623
+ total_cost = free_costs['total_cost'] + paid_costs['total_cost']
624
+
625
+ # Calculate revenue and profit
626
+ revenue = paid_users * subscription_price
627
+ profit = revenue - total_cost
628
+ margin = (profit / revenue) * 100 if revenue > 0 else 0
629
+
630
+ # Display results
631
+ st.subheader("Results")
632
+
633
+ col1, col2, col3, col4 = st.columns(4)
634
+ with col1:
635
+ st.metric("Total Cost", f"${total_cost:.2f}")
636
+ with col2:
637
+ st.metric("Revenue", f"${revenue:.2f}")
638
+ with col3:
639
+ st.metric("Monthly Profit", f"${profit:.2f}")
640
+ with col4:
641
+ st.metric("Profit Margin", f"{margin:.1f}%")
642
+
643
+ # Detailed breakdown
644
+ with st.expander("See Detailed Cost Breakdown"):
645
+ st.markdown(f"""
646
+ ### User Distribution
647
+ - Total Users: {total_users:,}
648
+ - Free Tier Users: {free_users:,} ({free_users/total_users*100:.1f}%)
649
+ - Paid Tier Users: {paid_users:,} ({paid_users/total_users*100:.1f}%)
650
+
651
+ ### Token Usage
652
+ - Total Input Tokens: {free_costs['input_tokens'] + paid_costs['input_tokens']:,.0f}
653
+ - Total Output Tokens: {free_costs['output_tokens'] + paid_costs['output_tokens']:,.0f}
654
+
655
+ ### Token Cost Breakdown
656
+ - Input Cost: ${free_costs['input_cost'] + paid_costs['input_cost']:.2f}
657
+ - Output Cost: ${free_costs['output_cost'] + paid_costs['output_cost']:.2f}
658
+
659
+ ### Per User Economics
660
+ - Cost per Paid User: ${total_cost/paid_users:.4f} (if all costs allocated to paid users)
661
+ - Revenue per Paid User: ${subscription_price:.2f}
662
+ - Profit per Paid User: ${(revenue-total_cost)/paid_users:.2f}
663
+ """)
664
+
665
+ # Visualization
666
+ st.subheader("Cost vs Revenue")
667
+
668
+ fig = go.Figure()
669
+ fig.add_trace(go.Bar(
670
+ name='Free Tier Cost',
671
+ x=['Cost'],
672
+ y=[free_costs['total_cost']],
673
+ marker_color='#FF9900'
674
+ ))
675
+ fig.add_trace(go.Bar(
676
+ name='Paid Tier Cost',
677
+ x=['Cost'],
678
+ y=[paid_costs['total_cost']],
679
+ marker_color='#FF5733'
680
+ ))
681
+ fig.add_trace(go.Bar(
682
+ name='Revenue',
683
+ x=['Revenue'],
684
+ y=[revenue],
685
+ marker_color='#3366CC'
686
+ ))
687
+
688
+ fig.update_layout(barmode='stack', title="Cost vs Revenue Breakdown")
689
+ st.plotly_chart(fig, use_container_width=True)
690
+
691
+ elif model_type == "Audio":
692
+ st.subheader("GPT-4o mini Audio Model Calculator")
693
+
694
+ # Input parameters
695
+ col1, col2 = st.columns(2)
696
+ with col1:
697
+ users = st.number_input("Number of Users", min_value=1, value=100, step=10)
698
+ audio_minutes = st.number_input("Minutes of Audio per User per Month", min_value=1, value=10, step=1)
699
+ tokens_per_second = st.number_input("Audio Tokens per Second", min_value=100, value=600, step=10)
700
+ cached_pct = st.slider("% Cached Input", min_value=0, max_value=100, value=20, step=5)
701
+
702
+ with col2:
703
+ output_ratio = st.slider("Output:Input Token Ratio", min_value=0.01, max_value=0.2, value=0.05, step=0.01)
704
+ subscription = st.number_input("Monthly Subscription ($)", min_value=0.0, value=29.99, step=0.99)
705
+ silence_reduction = st.slider("Silence Reduction %", min_value=0, max_value=50, value=20, step=5)
706
+
707
+ # Apply silence reduction to effective minutes
708
+ effective_minutes = audio_minutes * (1 - silence_reduction/100)
709
+
710
+ # Calculate costs
711
+ costs = calculate_audio_costs(users, effective_minutes, tokens_per_second, output_ratio, cached_pct)
712
+
713
+ # Calculate revenue and profit
714
+ revenue = users * subscription
715
+ profit = revenue - costs['total_cost']
716
+ margin = (profit / revenue) * 100 if revenue > 0 else 0
717
+
718
+ # Display results
719
+ st.subheader("Results")
720
+
721
+ col1, col2 = st.columns(2)
722
+ with col1:
723
+ st.metric("Cost per Audio Minute", f"${costs['cost_per_minute']:.4f}")
724
+ st.metric("Total Monthly Cost", f"${costs['total_cost']:.2f}")
725
+ st.metric("Cost per User", f"${costs['total_cost']/users:.2f}")
726
+
727
+ with col2:
728
+ st.metric("Monthly Revenue", f"${revenue:.2f}")
729
+ st.metric("Monthly Profit", f"${profit:.2f}")
730
+ st.metric("Profit Margin", f"{margin:.1f}%")
731
+
732
+