first commit

This commit is contained in:
l-nmch
2026-05-09 21:31:34 +02:00
commit 2a9aca0c1b
6 changed files with 888 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
.venv/
.csv
.png

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2026 Eugene Rakhmatulin
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

28
README.md Normal file
View File

@@ -0,0 +1,28 @@
# Benchy-Graph
Benchy-Graph is a tool that generates performance dashboards from llama-benchy CSV benchmark data. It visualizes key metrics like throughput, latency, and performance across different phases and concurrency levels for language model inference.
## Generating the CSV File
To generate the required CSV file, use [llama-benchy](https://github.com/eugr/llama-benchy), a benchmarking tool for llama.cpp servers.
Example command to generate the CSV:
```bash
uvx llama-benchy --base-url http://127.0.0.1:8000/v1 --model Qwen/Qwen3.6-27B --served-model-name unsloth/Qwen3.6-27B-GGUF --concurrency 1 2 4 8 16 32 --pp 128 --tg 128 --format csv
```
This will produce a CSV file with benchmark results that can be used as input for Benchy-Graph.
## Running the App
To generate a performance dashboard image from a CSV file:
1. Ensure dependencies are installed: `pip install -r requirement.txt`
2. Run the script: `python app.py <input.csv> <output.png>`
Replace `<input.csv>` with the path to your llama-benchy CSV file and `<output.png>` with the desired output image path.
## Running the Notebook
For an interactive experience, open `notebook.ipynb` in Jupyter Notebook or JupyterLab and execute the cells. The notebook contains all the necessary code and explanations for generating visualizations.

254
app.py Normal file
View File

@@ -0,0 +1,254 @@
"""
Benchy-Graph
This script generates a performance dashboard from llama-benchy CSV benchmark data,
visualizing throughput, latency, and other metrics for different phases
and concurrency levels.
Usage:
python test.py <input.csv> <output.png>
Dependencies:
- pandas
- matplotlib
- seaborn
"""
import argparse
import re
import sys
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
COLORS = {
'pp': '#2E86AB',
'tg': '#A23B72',
'pp_tsr': '#7FB3D5',
'tg_tsr': '#D98880',
'pp_light': '#A7C7E7',
'tg_light': '#E7B8D6'
}
PLOT_STYLE = {
'figure.figsize': [14, 10],
'axes.linewidth': 1.2,
'font_scale': 1.1
}
DASHBOARD_TITLE = ('Benchy-Graph | ')
def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description='Generate Benchy-Graph from CSV data.'
)
parser.add_argument('csv_path', help='Path to input CSV file')
parser.add_argument('image_path', help='Path to output image file')
return parser.parse_args()
def load_and_process_data(csv_path):
"""
Load CSV data and extract phase, param, and concurrency information.
Args:
csv_path (str): Path to the CSV file
Returns:
pd.DataFrame: Processed dataframe with extracted columns
Raises:
FileNotFoundError: If CSV file doesn't exist
ValueError: If required columns are missing
"""
if not Path(csv_path).exists():
raise FileNotFoundError(f"CSV file not found: {csv_path}")
df = pd.read_csv(csv_path)
model_name = None
if 'model' in df.columns and not df['model'].isna().all():
model_name = str(df['model'].iloc[0]).strip()
required_columns = ['test_name']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
raise ValueError(f"Missing required columns: {missing_columns}")
# Extract phase, param, concurrency from test_name
pattern = r'(pp|tg)\s*(\d+)\s*\(\s*c(\d+)\s*\)'
extracted = df['test_name'].str.extract(pattern)
df = df.assign(
phase=extracted[0],
param=pd.to_numeric(extracted[1], errors='coerce'),
concurrency=pd.to_numeric(extracted[2], errors='coerce')
)
# Drop rows with missing phase or concurrency
df = df.dropna(subset=['phase', 'concurrency']).copy()
return df, model_name
def setup_plot_style():
"""Configure matplotlib and seaborn plot styles."""
sns.set_style("whitegrid")
sns.set_context("notebook", font_scale=PLOT_STYLE['font_scale'])
rcparams = {k: v for k, v in PLOT_STYLE.items() if k != 'font_scale'}
plt.rcParams.update(rcparams)
def plot_throughput_subplot(ax, df):
"""Plot average throughput and request throughput on the given axis."""
unique_concurrency = sorted(df['concurrency'].dropna().unique())
phase_offsets = {'pp': -0.08, 'tg': 0.08} if len(unique_concurrency) <= 1 else {'pp': 0.0, 'tg': 0.0}
for phase, marker, color in [('pp', 'o', COLORS['pp']), ('tg', 's', COLORS['tg'])]:
# Average throughput
subset = df[(df['phase'] == phase) & (df['t_s_mean'].notna())]
if not subset.empty:
agg = subset.groupby('concurrency')['t_s_mean'].agg(['mean', 'std']).reset_index()
x = agg['concurrency'] + phase_offsets[phase]
ax.errorbar(x, agg['mean'], yerr=agg['std'],
marker=marker, label=f'{phase.upper()} avg speed',
color=color, capsize=4, linewidth=2, markersize=8)
# Request throughput
subset_req = df[(df['phase'] == phase) & (df['t_s_req_mean'].notna())]
if not subset_req.empty:
agg_req = subset_req.groupby('concurrency')['t_s_req_mean'].agg(['mean', 'std']).reset_index()
x_req = agg_req['concurrency'] + phase_offsets[phase]
ax.errorbar(x_req, agg_req['mean'], yerr=agg_req['std'],
marker='^', label=f'{phase.upper()} t/s/r',
color=COLORS[f'{phase}_tsr'], linestyle='--', capsize=4, linewidth=2, markersize=8)
ax.set_xlabel('Concurrency (number of requests)', fontsize=12)
ax.set_ylabel('Tokens/sec', fontsize=12)
ax.set_title('Avg throughput and request throughput', fontsize=13, fontweight='bold')
if unique_concurrency:
ax.set_xticks(unique_concurrency)
if len(unique_concurrency) == 1:
x = unique_concurrency[0]
ax.set_xlim(x - 0.5, x + 0.5)
else:
ax.set_xlim(min(unique_concurrency) - 0.5, max(unique_concurrency) + 0.5)
ax.legend(frameon=True, fancybox=True)
ax.grid(axis='y', alpha=0.4)
def plot_latency_subplot(ax, df):
"""Plot start latency (TTFR) for prefill phase on the given axis."""
df_pp = df[df['phase'] == 'pp']
if df_pp['ttfr_mean'].notna().any():
agg_ttfr = df_pp.dropna(subset=['ttfr_mean']).groupby('concurrency')['ttfr_mean'].agg(['mean', 'std']).reset_index()
ax.errorbar(agg_ttfr['concurrency'], agg_ttfr['mean'], yerr=agg_ttfr['std'],
marker='o', label='TTFR (Time To First Response)', color=COLORS['pp'], capsize=4)
ax.set_xlabel('Concurrency', fontsize=12)
ax.set_ylabel('Latency (ms)', fontsize=12)
ax.set_title('Start Latency (Prefill phase)', fontsize=13, fontweight='bold')
unique_concurrency = sorted(df_pp['concurrency'].dropna().unique())
if unique_concurrency:
ax.set_xticks(unique_concurrency)
if len(unique_concurrency) == 1:
x = unique_concurrency[0]
ax.set_xlim(x - 0.5, x + 0.5)
else:
ax.set_xlim(min(unique_concurrency) - 0.5, max(unique_concurrency) + 0.5)
ax.legend(frameon=True, fancybox=True)
ax.grid(axis='y', alpha=0.4)
def plot_heatmap_subplot(ax, df):
"""Plot throughput heatmap on the given axis."""
pivot_throughput = df.dropna(subset=['t_s_mean']).pivot_table(
values='t_s_mean', index='phase', columns='concurrency', aggfunc='mean'
)
if not pivot_throughput.empty:
sns.heatmap(pivot_throughput, annot=True, fmt='.1f', cmap='YlOrRd', ax=ax,
cbar_kws={'label': 'tokens/sec'}, linewidths=0.5)
ax.set_title('Avg Throughput (tokens/sec)', fontsize=13, fontweight='bold')
ax.set_xlabel('Concurrency')
ax.set_ylabel('Phase')
def create_summary_table(ax, df):
"""Create summary table on the given axis."""
ax.axis('off')
summary_rows = []
for phase in ['pp', 'tg']:
for conc in sorted(df['concurrency'].unique()):
subset = df[(df['phase'] == phase) & (df['concurrency'] == conc)]
if not subset.empty and subset['t_s_mean'].notna().any():
row = {
'Phase': phase.upper(),
'Conc.': conc,
'Avg Speed': (f"{subset['t_s_mean'].mean():.1f} ± {subset['t_s_mean'].std():.1f}"
if subset['t_s_mean'].notna().any() else 'N/A'),
't/s/r': (f"{subset['t_s_req_mean'].mean():.1f} ± {subset['t_s_req_mean'].std():.1f}"
if subset['t_s_req_mean'].notna().any() else 'N/A'),
'TTFR': (f"{subset['ttfr_mean'].mean():.1f}ms"
if subset['ttfr_mean'].notna().any() else "N/A"),
'PPT': (f"{subset['est_ppt_mean'].mean():.2f}ms"
if subset['est_ppt_mean'].notna().any() else "N/A")
}
summary_rows.append(row)
summary_df = pd.DataFrame(summary_rows)
if not summary_df.empty:
table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns,
cellLoc='center', loc='center', colColours=[COLORS['pp']]*len(summary_df.columns))
table.auto_set_font_size(False)
table.set_fontsize(9)
table.scale(1, 1.8)
for i in range(len(summary_df)):
color = COLORS['pp_light'] if i % 2 == 0 else COLORS['tg_light']
for j in range(len(summary_df.columns)):
table[(i+1, j)].set_facecolor(color)
ax.set_title('Summary', fontsize=13, fontweight='bold', pad=20)
def generate_dashboard(df, image_path, model_name=None):
"""
Generate the complete dashboard plot and save to file.
Args:
df (pd.DataFrame): Processed dataframe
image_path (str): Path to save the output image
"""
setup_plot_style()
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
title = DASHBOARD_TITLE
if model_name:
title = f"{DASHBOARD_TITLE}{model_name}"
fig.suptitle(title, fontsize=18, fontweight='bold', y=1.02)
plot_throughput_subplot(axes[0, 0], df)
plot_latency_subplot(axes[0, 1], df)
plot_heatmap_subplot(axes[1, 0], df)
create_summary_table(axes[1, 1], df)
plt.tight_layout()
plt.savefig(image_path, dpi=300, bbox_inches='tight', facecolor='white')
if plt.get_backend().lower() != 'agg':
plt.show()
def main():
"""Main entry point."""
try:
args = parse_arguments()
df, model_name = load_and_process_data(args.csv_path)
generate_dashboard(df, args.image_path, model_name=model_name)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

540
notebook.ipynb Normal file

File diff suppressed because one or more lines are too long

42
requirement.txt Normal file
View File

@@ -0,0 +1,42 @@
asttokens==3.0.1
comm==0.2.3
contourpy==1.3.2
cycler==0.12.1
debugpy==1.8.20
decorator==5.2.1
exceptiongroup==1.3.1
executing==2.2.1
fonttools==4.62.1
ipykernel==7.2.0
ipython==8.39.0
jedi==0.19.2
jupyter_client==8.8.0
jupyter_core==5.9.1
kiwisolver==1.5.0
matplotlib==3.10.9
matplotlib-inline==0.2.1
nest-asyncio==1.6.0
numpy==2.2.6
packaging==26.2
pandas==2.3.3
parso==0.8.6
pexpect==4.9.0
pillow==12.2.0
platformdirs==4.9.6
prompt_toolkit==3.0.52
psutil==7.2.2
ptyprocess==0.7.0
pure_eval==0.2.3
Pygments==2.20.0
pyparsing==3.3.2
python-dateutil==2.9.0.post0
pytz==2026.1.post1
pyzmq==27.1.0
seaborn==0.13.2
six==1.17.0
stack-data==0.6.3
tornado==6.5.5
traitlets==5.14.3
typing_extensions==4.15.0
tzdata==2026.2
wcwidth==0.6.0