PILL
Browse files
app.py
CHANGED
|
@@ -44,7 +44,7 @@ def provider_change(provider, selected_type, all_types=["text", "vision", "video
|
|
| 44 |
return new_models if new_models else all_models
|
| 45 |
|
| 46 |
# --------------------------
|
| 47 |
-
# Estimate Cost Function
|
| 48 |
# --------------------------
|
| 49 |
def estimate_cost(num_alerts, input_size, output_size, model_id):
|
| 50 |
pricing = st.session_state.get("pricing", {})
|
|
@@ -80,35 +80,68 @@ if "data_loaded" not in st.session_state:
|
|
| 80 |
with st.sidebar:
|
| 81 |
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
|
| 82 |
use_container_width=True)
|
| 83 |
-
st.markdown(
|
| 84 |
-
""" Visit: [https://www.priam.ai](https://www.priam.ai)
|
| 85 |
-
"""
|
| 86 |
-
)
|
| 87 |
st.divider()
|
| 88 |
st.sidebar.title("LLM Pricing Calculator")
|
| 89 |
|
| 90 |
-
# Track active tab in session state
|
| 91 |
-
if "active_tab" not in st.session_state:
|
| 92 |
-
st.session_state.active_tab = "Model Selection"
|
| 93 |
-
|
| 94 |
-
def switch_tab(tab_name):
|
| 95 |
-
st.session_state.active_tab = tab_name
|
| 96 |
-
st.rerun()
|
| 97 |
-
|
| 98 |
-
|
| 99 |
# --------------------------
|
| 100 |
-
#
|
| 101 |
# --------------------------
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
|
|
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
#
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
| 110 |
st.header("LLM Pricing App")
|
| 111 |
-
|
| 112 |
# --- Row 1: Provider/Type and Model Selection ---
|
| 113 |
col_left, col_right = st.columns(2)
|
| 114 |
with col_left:
|
|
@@ -118,50 +151,27 @@ with tab1:
|
|
| 118 |
index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
|
| 119 |
)
|
| 120 |
selected_type = st.radio("Select type", options=["text", "image"], index=0)
|
| 121 |
-
|
| 122 |
with col_right:
|
| 123 |
-
# Filter models based on the selected provider and type
|
| 124 |
filtered_models = provider_change(selected_provider, selected_type)
|
| 125 |
-
|
| 126 |
if filtered_models:
|
| 127 |
-
# Force "gpt-4-turbo" as default if available; otherwise, default to the first model.
|
| 128 |
default_model = "o1" if "o1" in filtered_models else filtered_models[0]
|
| 129 |
-
selected_model = st.selectbox(
|
| 130 |
-
"Select a model",
|
| 131 |
-
options=filtered_models,
|
| 132 |
-
index=filtered_models.index(default_model)
|
| 133 |
-
)
|
| 134 |
else:
|
| 135 |
selected_model = None
|
| 136 |
st.write("No models available")
|
| 137 |
-
|
| 138 |
# --- Row 2: Alert Stats ---
|
| 139 |
col1, col2, col3 = st.columns(3)
|
| 140 |
with col1:
|
| 141 |
-
num_alerts = st.number_input(
|
| 142 |
-
|
| 143 |
-
value=100,
|
| 144 |
-
min_value=1,
|
| 145 |
-
step=1,
|
| 146 |
-
help="Number of security alerts to analyze daily"
|
| 147 |
-
)
|
| 148 |
with col2:
|
| 149 |
-
input_size = st.number_input(
|
| 150 |
-
|
| 151 |
-
value=1000,
|
| 152 |
-
min_value=1,
|
| 153 |
-
step=1,
|
| 154 |
-
help="Include logs, metadata, and context per alert"
|
| 155 |
-
)
|
| 156 |
with col3:
|
| 157 |
-
output_size = st.number_input(
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
min_value=1,
|
| 161 |
-
step=1,
|
| 162 |
-
help="Expected length of security analysis and recommendations"
|
| 163 |
-
)
|
| 164 |
-
|
| 165 |
# --- Row 3: Buttons ---
|
| 166 |
btn_col1, btn_col2 = st.columns(2)
|
| 167 |
with btn_col1:
|
|
@@ -178,99 +188,34 @@ with tab1:
|
|
| 178 |
st.session_state["pricing"] = pricing
|
| 179 |
st.session_state["providers"] = providers
|
| 180 |
st.success("Pricing data refreshed!")
|
| 181 |
-
|
| 182 |
st.divider()
|
| 183 |
-
# --- Display Results ---
|
| 184 |
st.markdown("### Results")
|
| 185 |
if "result" in st.session_state:
|
| 186 |
st.write(st.session_state["result"])
|
| 187 |
else:
|
| 188 |
st.write("Use the buttons above to estimate costs.")
|
| 189 |
-
|
| 190 |
-
# --- Clear Button Below Results ---
|
| 191 |
if st.button("Clear"):
|
| 192 |
st.session_state.pop("result", None)
|
| 193 |
-
st.rerun()
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
# ----- Tab 2: On Premise Estimator -----
|
| 197 |
-
def format_analysis_report(analysis_result: dict) -> str:
|
| 198 |
-
"""Convert the raw analysis_result dict into a human-readable report."""
|
| 199 |
-
if "error" in analysis_result:
|
| 200 |
-
return f"**Error:** {analysis_result['error']}"
|
| 201 |
-
|
| 202 |
-
lines = []
|
| 203 |
-
lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
|
| 204 |
-
lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
|
| 205 |
-
lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n")
|
| 206 |
-
|
| 207 |
-
vram = analysis_result.get("vram_requirements", {})
|
| 208 |
-
lines.append("#### VRAM Requirements:")
|
| 209 |
-
lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
|
| 210 |
-
lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
|
| 211 |
-
lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
|
| 212 |
-
lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
|
| 213 |
-
lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n")
|
| 214 |
-
|
| 215 |
-
compatible_gpus = analysis_result.get("compatible_gpus", [])
|
| 216 |
-
lines.append("#### Compatible GPUs:")
|
| 217 |
-
if compatible_gpus:
|
| 218 |
-
for gpu in compatible_gpus:
|
| 219 |
-
lines.append(f"- {gpu}")
|
| 220 |
-
else:
|
| 221 |
-
lines.append("- None found")
|
| 222 |
-
lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")
|
| 223 |
-
|
| 224 |
-
gpu_perf = analysis_result.get("gpu_performance", {})
|
| 225 |
-
if gpu_perf:
|
| 226 |
-
lines.append("#### GPU Performance:")
|
| 227 |
-
for gpu, perf in gpu_perf.items():
|
| 228 |
-
lines.append(f"**{gpu}:**")
|
| 229 |
-
lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
|
| 230 |
-
lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
|
| 231 |
-
lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
|
| 232 |
-
else:
|
| 233 |
-
lines.append("#### GPU Performance: N/A\n")
|
| 234 |
-
|
| 235 |
-
#model_info = analysis_result.get("model_info", {})
|
| 236 |
-
#lines.append("#### Model Information:")
|
| 237 |
-
#if model_info:
|
| 238 |
-
# if model_info.get("description"):
|
| 239 |
-
# lines.append(f"- Description: {model_info['description']}")
|
| 240 |
-
# if model_info.get("tags"):
|
| 241 |
-
# lines.append(f"- Tags: {', '.join(model_info['tags'])}")
|
| 242 |
-
#if model_info.get("downloads") is not None:
|
| 243 |
-
# lines.append(f"- Downloads: {model_info['downloads']}")
|
| 244 |
-
#if model_info.get("library"):
|
| 245 |
-
# lines.append(f"- Library: {model_info['library']}")
|
| 246 |
-
#else:
|
| 247 |
-
# lines.append("No additional model info available.")
|
| 248 |
-
|
| 249 |
-
return "\n".join(lines)
|
| 250 |
-
|
| 251 |
|
| 252 |
-
|
| 253 |
-
|
| 254 |
st.header("On Premise Estimator")
|
| 255 |
st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
|
| 256 |
-
|
| 257 |
-
# Input for model ID with a default value
|
| 258 |
hf_model_id = st.text_input("Hugging Face Model ID", value="facebook/opt-1.3b")
|
| 259 |
|
| 260 |
if st.button("Analyze Model"):
|
| 261 |
-
st.session_state.active_tab = "On Premise Estimator"
|
| 262 |
with st.spinner("Analyzing model..."):
|
| 263 |
analysis_result = analyze_hf_model(hf_model_id)
|
| 264 |
-
st.session_state
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
# Render if analysis result exists
|
| 268 |
if "analysis_result" in st.session_state:
|
| 269 |
-
report = format_analysis_report(st.session_state
|
| 270 |
st.markdown(report)
|
| 271 |
|
| 272 |
-
|
| 273 |
-
|
| 274 |
st.markdown(
|
| 275 |
"""
|
| 276 |
## About This App
|
|
@@ -279,7 +224,7 @@ with tab3:
|
|
| 279 |
|
| 280 |
- The app downloads the latest pricing from the LiteLLM repository.
|
| 281 |
- Using simple maths to estimate the total tokens.
|
| 282 |
-
-
|
| 283 |
- Version 0.1
|
| 284 |
|
| 285 |
---
|
|
@@ -288,12 +233,10 @@ with tab3:
|
|
| 288 |
|
| 289 |
| Version | Release Date | Key Feature Updates |
|
| 290 |
|--------|--------------|---------------------|
|
| 291 |
-
| `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation|
|
| 292 |
-
| `v1.1` | 2025-04-06
|
| 293 |
-
|
| 294 |
|
| 295 |
---
|
| 296 |
-
|
| 297 |
|
| 298 |
Website: [https://www.priam.ai](https://www.priam.ai)
|
| 299 |
"""
|
|
@@ -304,4 +247,4 @@ with tab3:
|
|
| 304 |
|
| 305 |
This app is for demonstration purposes only. Actual costs may vary based on usage patterns and other factors.
|
| 306 |
"""
|
| 307 |
-
)
|
|
|
|
| 44 |
return new_models if new_models else all_models
|
| 45 |
|
| 46 |
# --------------------------
|
| 47 |
+
# Estimate Cost Function
|
| 48 |
# --------------------------
|
| 49 |
def estimate_cost(num_alerts, input_size, output_size, model_id):
|
| 50 |
pricing = st.session_state.get("pricing", {})
|
|
|
|
| 80 |
with st.sidebar:
|
| 81 |
st.image("https://cdn.prod.website-files.com/630f558f2a15ca1e88a2f774/631f1436ad7a0605fecc5e15_Logo.svg",
|
| 82 |
use_container_width=True)
|
| 83 |
+
st.markdown("Visit: [https://www.priam.ai](https://www.priam.ai)")
|
|
|
|
|
|
|
|
|
|
| 84 |
st.divider()
|
| 85 |
st.sidebar.title("LLM Pricing Calculator")
|
| 86 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
# --------------------------
|
| 88 |
+
# Pills Navigation (Using st.pills)
|
| 89 |
# --------------------------
|
| 90 |
+
# st.pills creates a pill-style selection widget.
|
| 91 |
+
page = st.pills("Head",
|
| 92 |
+
options=["Model Selection", "On Premise Estimator", "About"],selection_mode="single",default="Model Selection",label_visibility="hidden",
|
| 93 |
+
#index=0 # Change index if you want a different default
|
| 94 |
+
)
|
| 95 |
|
| 96 |
+
# --------------------------
|
| 97 |
+
# Helper: Format Analysis Report
|
| 98 |
+
# --------------------------
|
| 99 |
+
def format_analysis_report(analysis_result: dict) -> str:
|
| 100 |
+
"""Convert the raw analysis_result dict into a human-readable report."""
|
| 101 |
+
if "error" in analysis_result:
|
| 102 |
+
return f"**Error:** {analysis_result['error']}"
|
| 103 |
+
|
| 104 |
+
lines = []
|
| 105 |
+
lines.append(f"### Model Analysis Report for `{analysis_result.get('model_id', 'Unknown Model')}`\n")
|
| 106 |
+
lines.append(f"**Parameter Size:** {analysis_result.get('parameter_size', 'N/A')} Billion parameters\n")
|
| 107 |
+
lines.append(f"**Precision:** {analysis_result.get('precision', 'N/A')}\n")
|
| 108 |
+
|
| 109 |
+
vram = analysis_result.get("vram_requirements", {})
|
| 110 |
+
lines.append("#### VRAM Requirements:")
|
| 111 |
+
lines.append(f"- Model Size: {vram.get('model_size_gb', 0):.2f} GB")
|
| 112 |
+
lines.append(f"- KV Cache: {vram.get('kv_cache_gb', 0):.2f} GB")
|
| 113 |
+
lines.append(f"- Activations: {vram.get('activations_gb', 0):.2f} GB")
|
| 114 |
+
lines.append(f"- Overhead: {vram.get('overhead_gb', 0):.2f} GB")
|
| 115 |
+
lines.append(f"- **Total VRAM:** {vram.get('total_vram_gb', 0):.2f} GB\n")
|
| 116 |
+
|
| 117 |
+
compatible_gpus = analysis_result.get("compatible_gpus", [])
|
| 118 |
+
lines.append("#### Compatible GPUs:")
|
| 119 |
+
if compatible_gpus:
|
| 120 |
+
for gpu in compatible_gpus:
|
| 121 |
+
lines.append(f"- {gpu}")
|
| 122 |
+
else:
|
| 123 |
+
lines.append("- None found")
|
| 124 |
+
lines.append(f"\n**Largest Compatible GPU:** {analysis_result.get('largest_compatible_gpu', 'N/A')}\n")
|
| 125 |
+
|
| 126 |
+
gpu_perf = analysis_result.get("gpu_performance", {})
|
| 127 |
+
if gpu_perf:
|
| 128 |
+
lines.append("#### GPU Performance:")
|
| 129 |
+
for gpu, perf in gpu_perf.items():
|
| 130 |
+
lines.append(f"**{gpu}:**")
|
| 131 |
+
lines.append(f" - Tokens per Second: {perf.get('tokens_per_second', 0):.2f}")
|
| 132 |
+
lines.append(f" - FLOPs per Token: {perf.get('flops_per_token', 0):.2f}")
|
| 133 |
+
lines.append(f" - Effective TFLOPS: {perf.get('effective_tflops', 0):.2f}\n")
|
| 134 |
+
else:
|
| 135 |
+
lines.append("#### GPU Performance: N/A\n")
|
| 136 |
+
|
| 137 |
+
return "\n".join(lines)
|
| 138 |
|
| 139 |
+
# --------------------------
|
| 140 |
+
# Render Content Based on Selected Pill
|
| 141 |
+
# --------------------------
|
| 142 |
+
if page == "Model Selection":
|
| 143 |
+
st.divider()
|
| 144 |
st.header("LLM Pricing App")
|
|
|
|
| 145 |
# --- Row 1: Provider/Type and Model Selection ---
|
| 146 |
col_left, col_right = st.columns(2)
|
| 147 |
with col_left:
|
|
|
|
| 151 |
index=st.session_state["providers"].index("azure") if "azure" in st.session_state["providers"] else 0
|
| 152 |
)
|
| 153 |
selected_type = st.radio("Select type", options=["text", "image"], index=0)
|
|
|
|
| 154 |
with col_right:
|
|
|
|
| 155 |
filtered_models = provider_change(selected_provider, selected_type)
|
|
|
|
| 156 |
if filtered_models:
|
|
|
|
| 157 |
default_model = "o1" if "o1" in filtered_models else filtered_models[0]
|
| 158 |
+
selected_model = st.selectbox("Select a model", options=filtered_models, index=filtered_models.index(default_model))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
else:
|
| 160 |
selected_model = None
|
| 161 |
st.write("No models available")
|
| 162 |
+
|
| 163 |
# --- Row 2: Alert Stats ---
|
| 164 |
col1, col2, col3 = st.columns(3)
|
| 165 |
with col1:
|
| 166 |
+
num_alerts = st.number_input("Security Alerts Per Day", value=100, min_value=1, step=1,
|
| 167 |
+
help="Number of security alerts to analyze daily")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
with col2:
|
| 169 |
+
input_size = st.number_input("Alert Content Size (characters)", value=1000, min_value=1, step=1,
|
| 170 |
+
help="Include logs, metadata, and context per alert")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
with col3:
|
| 172 |
+
output_size = st.number_input("Analysis Output Size (characters)", value=500, min_value=1, step=1,
|
| 173 |
+
help="Expected length of security analysis and recommendations")
|
| 174 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
# --- Row 3: Buttons ---
|
| 176 |
btn_col1, btn_col2 = st.columns(2)
|
| 177 |
with btn_col1:
|
|
|
|
| 188 |
st.session_state["pricing"] = pricing
|
| 189 |
st.session_state["providers"] = providers
|
| 190 |
st.success("Pricing data refreshed!")
|
| 191 |
+
|
| 192 |
st.divider()
|
|
|
|
| 193 |
st.markdown("### Results")
|
| 194 |
if "result" in st.session_state:
|
| 195 |
st.write(st.session_state["result"])
|
| 196 |
else:
|
| 197 |
st.write("Use the buttons above to estimate costs.")
|
| 198 |
+
|
|
|
|
| 199 |
if st.button("Clear"):
|
| 200 |
st.session_state.pop("result", None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
+
elif page == "On Premise Estimator":
|
| 203 |
+
st.divider()
|
| 204 |
st.header("On Premise Estimator")
|
| 205 |
st.markdown("Enter a Hugging Face model ID to perform an on premise analysis using the provided estimator.")
|
|
|
|
|
|
|
| 206 |
hf_model_id = st.text_input("Hugging Face Model ID", value="facebook/opt-1.3b")
|
| 207 |
|
| 208 |
if st.button("Analyze Model"):
|
|
|
|
| 209 |
with st.spinner("Analyzing model..."):
|
| 210 |
analysis_result = analyze_hf_model(hf_model_id)
|
| 211 |
+
st.session_state["analysis_result"] = analysis_result
|
| 212 |
+
|
|
|
|
|
|
|
| 213 |
if "analysis_result" in st.session_state:
|
| 214 |
+
report = format_analysis_report(st.session_state["analysis_result"])
|
| 215 |
st.markdown(report)
|
| 216 |
|
| 217 |
+
elif page == "About":
|
| 218 |
+
st.divider()
|
| 219 |
st.markdown(
|
| 220 |
"""
|
| 221 |
## About This App
|
|
|
|
| 224 |
|
| 225 |
- The app downloads the latest pricing from the LiteLLM repository.
|
| 226 |
- Using simple maths to estimate the total tokens.
|
| 227 |
+
- Helps you estimate hardware requirements for running open-source large language models (LLMs) on-premise using only the model ID from Hugging Face.
|
| 228 |
- Version 0.1
|
| 229 |
|
| 230 |
---
|
|
|
|
| 233 |
|
| 234 |
| Version | Release Date | Key Feature Updates |
|
| 235 |
|--------|--------------|---------------------|
|
| 236 |
+
| `v1.0` | 2025-03-26 | Initial release with basic total tokens estimation |
|
| 237 |
+
| `v1.1` | 2025-04-06 | Added On Premise Estimator Tab |
|
|
|
|
| 238 |
|
| 239 |
---
|
|
|
|
| 240 |
|
| 241 |
Website: [https://www.priam.ai](https://www.priam.ai)
|
| 242 |
"""
|
|
|
|
| 247 |
|
| 248 |
This app is for demonstration purposes only. Actual costs may vary based on usage patterns and other factors.
|
| 249 |
"""
|
| 250 |
+
)
|