first commit
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
.venv/
|
||||||
|
.csv
|
||||||
|
.png
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 Eugene Rakhmatulin
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
28
README.md
Normal file
28
README.md
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# Benchy-Graph
|
||||||
|
|
||||||
|
Benchy-Graph is a tool that generates performance dashboards from llama-benchy CSV benchmark data. It visualizes key metrics like throughput, latency, and performance across different phases and concurrency levels for language model inference.
|
||||||
|
|
||||||
|
## Generating the CSV File
|
||||||
|
|
||||||
|
To generate the required CSV file, use [llama-benchy](https://github.com/eugr/llama-benchy), a benchmarking tool for llama.cpp servers.
|
||||||
|
|
||||||
|
Example command to generate the CSV:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uvx llama-benchy --base-url http://127.0.0.1:8000/v1 --model Qwen/Qwen3.6-27B --served-model-name unsloth/Qwen3.6-27B-GGUF --concurrency 1 2 4 8 16 32 --pp 128 --tg 128 --format csv
|
||||||
|
```
|
||||||
|
|
||||||
|
This will produce a CSV file with benchmark results that can be used as input for Benchy-Graph.
|
||||||
|
|
||||||
|
## Running the App
|
||||||
|
|
||||||
|
To generate a performance dashboard image from a CSV file:
|
||||||
|
|
||||||
|
1. Ensure dependencies are installed: `pip install -r requirement.txt`
|
||||||
|
2. Run the script: `python app.py <input.csv> <output.png>`
|
||||||
|
|
||||||
|
Replace `<input.csv>` with the path to your llama-benchy CSV file and `<output.png>` with the desired output image path.
|
||||||
|
|
||||||
|
## Running the Notebook
|
||||||
|
|
||||||
|
For an interactive experience, open `notebook.ipynb` in Jupyter Notebook or JupyterLab and execute the cells. The notebook contains all the necessary code and explanations for generating visualizations.
|
||||||
254
app.py
Normal file
254
app.py
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
"""
|
||||||
|
Benchy-Graph
|
||||||
|
|
||||||
|
This script generates a performance dashboard from llama-benchy CSV benchmark data,
|
||||||
|
visualizing throughput, latency, and other metrics for different phases
|
||||||
|
and concurrency levels.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python test.py <input.csv> <output.png>
|
||||||
|
|
||||||
|
Dependencies:
|
||||||
|
- pandas
|
||||||
|
- matplotlib
|
||||||
|
- seaborn
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
COLORS = {
|
||||||
|
'pp': '#2E86AB',
|
||||||
|
'tg': '#A23B72',
|
||||||
|
'pp_tsr': '#7FB3D5',
|
||||||
|
'tg_tsr': '#D98880',
|
||||||
|
'pp_light': '#A7C7E7',
|
||||||
|
'tg_light': '#E7B8D6'
|
||||||
|
}
|
||||||
|
|
||||||
|
PLOT_STYLE = {
|
||||||
|
'figure.figsize': [14, 10],
|
||||||
|
'axes.linewidth': 1.2,
|
||||||
|
'font_scale': 1.1
|
||||||
|
}
|
||||||
|
|
||||||
|
DASHBOARD_TITLE = ('Benchy-Graph | ')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
"""Parse command line arguments."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Generate Benchy-Graph from CSV data.'
|
||||||
|
)
|
||||||
|
parser.add_argument('csv_path', help='Path to input CSV file')
|
||||||
|
parser.add_argument('image_path', help='Path to output image file')
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def load_and_process_data(csv_path):
|
||||||
|
"""
|
||||||
|
Load CSV data and extract phase, param, and concurrency information.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
csv_path (str): Path to the CSV file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
pd.DataFrame: Processed dataframe with extracted columns
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: If CSV file doesn't exist
|
||||||
|
ValueError: If required columns are missing
|
||||||
|
"""
|
||||||
|
if not Path(csv_path).exists():
|
||||||
|
raise FileNotFoundError(f"CSV file not found: {csv_path}")
|
||||||
|
|
||||||
|
df = pd.read_csv(csv_path)
|
||||||
|
|
||||||
|
model_name = None
|
||||||
|
if 'model' in df.columns and not df['model'].isna().all():
|
||||||
|
model_name = str(df['model'].iloc[0]).strip()
|
||||||
|
|
||||||
|
required_columns = ['test_name']
|
||||||
|
missing_columns = [col for col in required_columns if col not in df.columns]
|
||||||
|
if missing_columns:
|
||||||
|
raise ValueError(f"Missing required columns: {missing_columns}")
|
||||||
|
|
||||||
|
# Extract phase, param, concurrency from test_name
|
||||||
|
pattern = r'(pp|tg)\s*(\d+)\s*\(\s*c(\d+)\s*\)'
|
||||||
|
extracted = df['test_name'].str.extract(pattern)
|
||||||
|
df = df.assign(
|
||||||
|
phase=extracted[0],
|
||||||
|
param=pd.to_numeric(extracted[1], errors='coerce'),
|
||||||
|
concurrency=pd.to_numeric(extracted[2], errors='coerce')
|
||||||
|
)
|
||||||
|
|
||||||
|
# Drop rows with missing phase or concurrency
|
||||||
|
df = df.dropna(subset=['phase', 'concurrency']).copy()
|
||||||
|
return df, model_name
|
||||||
|
|
||||||
|
|
||||||
|
def setup_plot_style():
|
||||||
|
"""Configure matplotlib and seaborn plot styles."""
|
||||||
|
sns.set_style("whitegrid")
|
||||||
|
sns.set_context("notebook", font_scale=PLOT_STYLE['font_scale'])
|
||||||
|
rcparams = {k: v for k, v in PLOT_STYLE.items() if k != 'font_scale'}
|
||||||
|
plt.rcParams.update(rcparams)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_throughput_subplot(ax, df):
|
||||||
|
"""Plot average throughput and request throughput on the given axis."""
|
||||||
|
unique_concurrency = sorted(df['concurrency'].dropna().unique())
|
||||||
|
phase_offsets = {'pp': -0.08, 'tg': 0.08} if len(unique_concurrency) <= 1 else {'pp': 0.0, 'tg': 0.0}
|
||||||
|
for phase, marker, color in [('pp', 'o', COLORS['pp']), ('tg', 's', COLORS['tg'])]:
|
||||||
|
# Average throughput
|
||||||
|
subset = df[(df['phase'] == phase) & (df['t_s_mean'].notna())]
|
||||||
|
if not subset.empty:
|
||||||
|
agg = subset.groupby('concurrency')['t_s_mean'].agg(['mean', 'std']).reset_index()
|
||||||
|
x = agg['concurrency'] + phase_offsets[phase]
|
||||||
|
ax.errorbar(x, agg['mean'], yerr=agg['std'],
|
||||||
|
marker=marker, label=f'{phase.upper()} avg speed',
|
||||||
|
color=color, capsize=4, linewidth=2, markersize=8)
|
||||||
|
|
||||||
|
# Request throughput
|
||||||
|
subset_req = df[(df['phase'] == phase) & (df['t_s_req_mean'].notna())]
|
||||||
|
if not subset_req.empty:
|
||||||
|
agg_req = subset_req.groupby('concurrency')['t_s_req_mean'].agg(['mean', 'std']).reset_index()
|
||||||
|
x_req = agg_req['concurrency'] + phase_offsets[phase]
|
||||||
|
ax.errorbar(x_req, agg_req['mean'], yerr=agg_req['std'],
|
||||||
|
marker='^', label=f'{phase.upper()} t/s/r',
|
||||||
|
color=COLORS[f'{phase}_tsr'], linestyle='--', capsize=4, linewidth=2, markersize=8)
|
||||||
|
|
||||||
|
ax.set_xlabel('Concurrency (number of requests)', fontsize=12)
|
||||||
|
ax.set_ylabel('Tokens/sec', fontsize=12)
|
||||||
|
ax.set_title('Avg throughput and request throughput', fontsize=13, fontweight='bold')
|
||||||
|
if unique_concurrency:
|
||||||
|
ax.set_xticks(unique_concurrency)
|
||||||
|
if len(unique_concurrency) == 1:
|
||||||
|
x = unique_concurrency[0]
|
||||||
|
ax.set_xlim(x - 0.5, x + 0.5)
|
||||||
|
else:
|
||||||
|
ax.set_xlim(min(unique_concurrency) - 0.5, max(unique_concurrency) + 0.5)
|
||||||
|
ax.legend(frameon=True, fancybox=True)
|
||||||
|
ax.grid(axis='y', alpha=0.4)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_latency_subplot(ax, df):
|
||||||
|
"""Plot start latency (TTFR) for prefill phase on the given axis."""
|
||||||
|
df_pp = df[df['phase'] == 'pp']
|
||||||
|
|
||||||
|
if df_pp['ttfr_mean'].notna().any():
|
||||||
|
agg_ttfr = df_pp.dropna(subset=['ttfr_mean']).groupby('concurrency')['ttfr_mean'].agg(['mean', 'std']).reset_index()
|
||||||
|
ax.errorbar(agg_ttfr['concurrency'], agg_ttfr['mean'], yerr=agg_ttfr['std'],
|
||||||
|
marker='o', label='TTFR (Time To First Response)', color=COLORS['pp'], capsize=4)
|
||||||
|
|
||||||
|
ax.set_xlabel('Concurrency', fontsize=12)
|
||||||
|
ax.set_ylabel('Latency (ms)', fontsize=12)
|
||||||
|
ax.set_title('Start Latency (Prefill phase)', fontsize=13, fontweight='bold')
|
||||||
|
unique_concurrency = sorted(df_pp['concurrency'].dropna().unique())
|
||||||
|
if unique_concurrency:
|
||||||
|
ax.set_xticks(unique_concurrency)
|
||||||
|
if len(unique_concurrency) == 1:
|
||||||
|
x = unique_concurrency[0]
|
||||||
|
ax.set_xlim(x - 0.5, x + 0.5)
|
||||||
|
else:
|
||||||
|
ax.set_xlim(min(unique_concurrency) - 0.5, max(unique_concurrency) + 0.5)
|
||||||
|
ax.legend(frameon=True, fancybox=True)
|
||||||
|
ax.grid(axis='y', alpha=0.4)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_heatmap_subplot(ax, df):
|
||||||
|
"""Plot throughput heatmap on the given axis."""
|
||||||
|
pivot_throughput = df.dropna(subset=['t_s_mean']).pivot_table(
|
||||||
|
values='t_s_mean', index='phase', columns='concurrency', aggfunc='mean'
|
||||||
|
)
|
||||||
|
if not pivot_throughput.empty:
|
||||||
|
sns.heatmap(pivot_throughput, annot=True, fmt='.1f', cmap='YlOrRd', ax=ax,
|
||||||
|
cbar_kws={'label': 'tokens/sec'}, linewidths=0.5)
|
||||||
|
ax.set_title('Avg Throughput (tokens/sec)', fontsize=13, fontweight='bold')
|
||||||
|
ax.set_xlabel('Concurrency')
|
||||||
|
ax.set_ylabel('Phase')
|
||||||
|
|
||||||
|
|
||||||
|
def create_summary_table(ax, df):
|
||||||
|
"""Create summary table on the given axis."""
|
||||||
|
ax.axis('off')
|
||||||
|
|
||||||
|
summary_rows = []
|
||||||
|
for phase in ['pp', 'tg']:
|
||||||
|
for conc in sorted(df['concurrency'].unique()):
|
||||||
|
subset = df[(df['phase'] == phase) & (df['concurrency'] == conc)]
|
||||||
|
if not subset.empty and subset['t_s_mean'].notna().any():
|
||||||
|
row = {
|
||||||
|
'Phase': phase.upper(),
|
||||||
|
'Conc.': conc,
|
||||||
|
'Avg Speed': (f"{subset['t_s_mean'].mean():.1f} ± {subset['t_s_mean'].std():.1f}"
|
||||||
|
if subset['t_s_mean'].notna().any() else 'N/A'),
|
||||||
|
't/s/r': (f"{subset['t_s_req_mean'].mean():.1f} ± {subset['t_s_req_mean'].std():.1f}"
|
||||||
|
if subset['t_s_req_mean'].notna().any() else 'N/A'),
|
||||||
|
'TTFR': (f"{subset['ttfr_mean'].mean():.1f}ms"
|
||||||
|
if subset['ttfr_mean'].notna().any() else "N/A"),
|
||||||
|
'PPT': (f"{subset['est_ppt_mean'].mean():.2f}ms"
|
||||||
|
if subset['est_ppt_mean'].notna().any() else "N/A")
|
||||||
|
}
|
||||||
|
summary_rows.append(row)
|
||||||
|
|
||||||
|
summary_df = pd.DataFrame(summary_rows)
|
||||||
|
if not summary_df.empty:
|
||||||
|
table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns,
|
||||||
|
cellLoc='center', loc='center', colColours=[COLORS['pp']]*len(summary_df.columns))
|
||||||
|
table.auto_set_font_size(False)
|
||||||
|
table.set_fontsize(9)
|
||||||
|
table.scale(1, 1.8)
|
||||||
|
for i in range(len(summary_df)):
|
||||||
|
color = COLORS['pp_light'] if i % 2 == 0 else COLORS['tg_light']
|
||||||
|
for j in range(len(summary_df.columns)):
|
||||||
|
table[(i+1, j)].set_facecolor(color)
|
||||||
|
ax.set_title('Summary', fontsize=13, fontweight='bold', pad=20)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_dashboard(df, image_path, model_name=None):
|
||||||
|
"""
|
||||||
|
Generate the complete dashboard plot and save to file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
df (pd.DataFrame): Processed dataframe
|
||||||
|
image_path (str): Path to save the output image
|
||||||
|
"""
|
||||||
|
setup_plot_style()
|
||||||
|
|
||||||
|
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
|
||||||
|
title = DASHBOARD_TITLE
|
||||||
|
if model_name:
|
||||||
|
title = f"{DASHBOARD_TITLE}{model_name}"
|
||||||
|
fig.suptitle(title, fontsize=18, fontweight='bold', y=1.02)
|
||||||
|
|
||||||
|
plot_throughput_subplot(axes[0, 0], df)
|
||||||
|
plot_latency_subplot(axes[0, 1], df)
|
||||||
|
plot_heatmap_subplot(axes[1, 0], df)
|
||||||
|
create_summary_table(axes[1, 1], df)
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(image_path, dpi=300, bbox_inches='tight', facecolor='white')
|
||||||
|
if plt.get_backend().lower() != 'agg':
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main entry point."""
|
||||||
|
try:
|
||||||
|
args = parse_arguments()
|
||||||
|
df, model_name = load_and_process_data(args.csv_path)
|
||||||
|
generate_dashboard(df, args.image_path, model_name=model_name)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
540
notebook.ipynb
Normal file
540
notebook.ipynb
Normal file
File diff suppressed because one or more lines are too long
42
requirement.txt
Normal file
42
requirement.txt
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
asttokens==3.0.1
|
||||||
|
comm==0.2.3
|
||||||
|
contourpy==1.3.2
|
||||||
|
cycler==0.12.1
|
||||||
|
debugpy==1.8.20
|
||||||
|
decorator==5.2.1
|
||||||
|
exceptiongroup==1.3.1
|
||||||
|
executing==2.2.1
|
||||||
|
fonttools==4.62.1
|
||||||
|
ipykernel==7.2.0
|
||||||
|
ipython==8.39.0
|
||||||
|
jedi==0.19.2
|
||||||
|
jupyter_client==8.8.0
|
||||||
|
jupyter_core==5.9.1
|
||||||
|
kiwisolver==1.5.0
|
||||||
|
matplotlib==3.10.9
|
||||||
|
matplotlib-inline==0.2.1
|
||||||
|
nest-asyncio==1.6.0
|
||||||
|
numpy==2.2.6
|
||||||
|
packaging==26.2
|
||||||
|
pandas==2.3.3
|
||||||
|
parso==0.8.6
|
||||||
|
pexpect==4.9.0
|
||||||
|
pillow==12.2.0
|
||||||
|
platformdirs==4.9.6
|
||||||
|
prompt_toolkit==3.0.52
|
||||||
|
psutil==7.2.2
|
||||||
|
ptyprocess==0.7.0
|
||||||
|
pure_eval==0.2.3
|
||||||
|
Pygments==2.20.0
|
||||||
|
pyparsing==3.3.2
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
pytz==2026.1.post1
|
||||||
|
pyzmq==27.1.0
|
||||||
|
seaborn==0.13.2
|
||||||
|
six==1.17.0
|
||||||
|
stack-data==0.6.3
|
||||||
|
tornado==6.5.5
|
||||||
|
traitlets==5.14.3
|
||||||
|
typing_extensions==4.15.0
|
||||||
|
tzdata==2026.2
|
||||||
|
wcwidth==0.6.0
|
||||||
Reference in New Issue
Block a user