first commit

2026-05-09 21:31:34 +02:00
commit 2a9aca0c1b
6 changed files with 888 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 .venv/
 .csv
 .png
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2026 Eugene Rakhmatulin
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,28 @@
 # Benchy-Graph
 Benchy-Graph is a tool that generates performance dashboards from llama-benchy CSV benchmark data. It visualizes key metrics like throughput, latency, and performance across different phases and concurrency levels for language model inference.
 ## Generating the CSV File
 To generate the required CSV file, use [llama-benchy](https://github.com/eugr/llama-benchy), a benchmarking tool for llama.cpp servers.
 Example command to generate the CSV:
 ```bash
 uvx llama-benchy --base-url http://127.0.0.1:8000/v1 --model Qwen/Qwen3.6-27B --served-model-name unsloth/Qwen3.6-27B-GGUF --concurrency 1 2 4 8 16 32 --pp 128 --tg 128 --format csv
 ```
 This will produce a CSV file with benchmark results that can be used as input for Benchy-Graph.
 ## Running the App
 To generate a performance dashboard image from a CSV file:
 1. Ensure dependencies are installed: `pip install -r requirement.txt`
 2. Run the script: `python app.py <input.csv> <output.png>`
 Replace `<input.csv>` with the path to your llama-benchy CSV file and `<output.png>` with the desired output image path.
 ## Running the Notebook
 For an interactive experience, open `notebook.ipynb` in Jupyter Notebook or JupyterLab and execute the cells. The notebook contains all the necessary code and explanations for generating visualizations.
--- a/app.py
+++ b/app.py
@@ -0,0 +1,254 @@
 """
 Benchy-Graph
 This script generates a performance dashboard from llama-benchy CSV benchmark data,
 visualizing throughput, latency, and other metrics for different phases
 and concurrency levels.
 Usage:
    python test.py <input.csv> <output.png>
 Dependencies:
    - pandas
    - matplotlib
    - seaborn
 """
 import argparse
 import re
 import sys
 from pathlib import Path
 import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns
 COLORS = {
    'pp': '#2E86AB',
    'tg': '#A23B72',
    'pp_tsr': '#7FB3D5',
    'tg_tsr': '#D98880',
    'pp_light': '#A7C7E7',
    'tg_light': '#E7B8D6'
 }
 PLOT_STYLE = {
    'figure.figsize': [14, 10],
    'axes.linewidth': 1.2,
    'font_scale': 1.1
 }
 DASHBOARD_TITLE = ('Benchy-Graph | ')
 def parse_arguments():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description='Generate Benchy-Graph from CSV data.'
    )
    parser.add_argument('csv_path', help='Path to input CSV file')
    parser.add_argument('image_path', help='Path to output image file')
    return parser.parse_args()
 def load_and_process_data(csv_path):
    """
    Load CSV data and extract phase, param, and concurrency information.
    Args:
        csv_path (str): Path to the CSV file
    Returns:
        pd.DataFrame: Processed dataframe with extracted columns
    Raises:
        FileNotFoundError: If CSV file doesn't exist
        ValueError: If required columns are missing
    """
    if not Path(csv_path).exists():
        raise FileNotFoundError(f"CSV file not found: {csv_path}")
    df = pd.read_csv(csv_path)
    model_name = None
    if 'model' in df.columns and not df['model'].isna().all():
        model_name = str(df['model'].iloc[0]).strip()
    required_columns = ['test_name']
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        raise ValueError(f"Missing required columns: {missing_columns}")
    # Extract phase, param, concurrency from test_name
    pattern = r'(pp|tg)\s*(\d+)\s*\(\s*c(\d+)\s*\)'
    extracted = df['test_name'].str.extract(pattern)
    df = df.assign(
        phase=extracted[0],
        param=pd.to_numeric(extracted[1], errors='coerce'),
        concurrency=pd.to_numeric(extracted[2], errors='coerce')
    )
    # Drop rows with missing phase or concurrency
    df = df.dropna(subset=['phase', 'concurrency']).copy()
    return df, model_name
 def setup_plot_style():
    """Configure matplotlib and seaborn plot styles."""
    sns.set_style("whitegrid")
    sns.set_context("notebook", font_scale=PLOT_STYLE['font_scale'])
    rcparams = {k: v for k, v in PLOT_STYLE.items() if k != 'font_scale'}
    plt.rcParams.update(rcparams)
 def plot_throughput_subplot(ax, df):
    """Plot average throughput and request throughput on the given axis."""
    unique_concurrency = sorted(df['concurrency'].dropna().unique())
    phase_offsets = {'pp': -0.08, 'tg': 0.08} if len(unique_concurrency) <= 1 else {'pp': 0.0, 'tg': 0.0}
    for phase, marker, color in [('pp', 'o', COLORS['pp']), ('tg', 's', COLORS['tg'])]:
        # Average throughput
        subset = df[(df['phase'] == phase) & (df['t_s_mean'].notna())]
        if not subset.empty:
            agg = subset.groupby('concurrency')['t_s_mean'].agg(['mean', 'std']).reset_index()
            x = agg['concurrency'] + phase_offsets[phase]
            ax.errorbar(x, agg['mean'], yerr=agg['std'],
                       marker=marker, label=f'{phase.upper()} avg speed',
                       color=color, capsize=4, linewidth=2, markersize=8)
        # Request throughput
        subset_req = df[(df['phase'] == phase) & (df['t_s_req_mean'].notna())]
        if not subset_req.empty:
            agg_req = subset_req.groupby('concurrency')['t_s_req_mean'].agg(['mean', 'std']).reset_index()
            x_req = agg_req['concurrency'] + phase_offsets[phase]
            ax.errorbar(x_req, agg_req['mean'], yerr=agg_req['std'],
                       marker='^', label=f'{phase.upper()} t/s/r',
                       color=COLORS[f'{phase}_tsr'], linestyle='--', capsize=4, linewidth=2, markersize=8)
    ax.set_xlabel('Concurrency (number of requests)', fontsize=12)
    ax.set_ylabel('Tokens/sec', fontsize=12)
    ax.set_title('Avg throughput and request throughput', fontsize=13, fontweight='bold')
    if unique_concurrency:
        ax.set_xticks(unique_concurrency)
        if len(unique_concurrency) == 1:
            x = unique_concurrency[0]
            ax.set_xlim(x - 0.5, x + 0.5)
        else:
            ax.set_xlim(min(unique_concurrency) - 0.5, max(unique_concurrency) + 0.5)
    ax.legend(frameon=True, fancybox=True)
    ax.grid(axis='y', alpha=0.4)
 def plot_latency_subplot(ax, df):
    """Plot start latency (TTFR) for prefill phase on the given axis."""
    df_pp = df[df['phase'] == 'pp']
    if df_pp['ttfr_mean'].notna().any():
        agg_ttfr = df_pp.dropna(subset=['ttfr_mean']).groupby('concurrency')['ttfr_mean'].agg(['mean', 'std']).reset_index()
        ax.errorbar(agg_ttfr['concurrency'], agg_ttfr['mean'], yerr=agg_ttfr['std'],
                   marker='o', label='TTFR (Time To First Response)', color=COLORS['pp'], capsize=4)
    ax.set_xlabel('Concurrency', fontsize=12)
    ax.set_ylabel('Latency (ms)', fontsize=12)
    ax.set_title('Start Latency (Prefill phase)', fontsize=13, fontweight='bold')
    unique_concurrency = sorted(df_pp['concurrency'].dropna().unique())
    if unique_concurrency:
        ax.set_xticks(unique_concurrency)
        if len(unique_concurrency) == 1:
            x = unique_concurrency[0]
            ax.set_xlim(x - 0.5, x + 0.5)
        else:
            ax.set_xlim(min(unique_concurrency) - 0.5, max(unique_concurrency) + 0.5)
    ax.legend(frameon=True, fancybox=True)
    ax.grid(axis='y', alpha=0.4)
 def plot_heatmap_subplot(ax, df):
    """Plot throughput heatmap on the given axis."""
    pivot_throughput = df.dropna(subset=['t_s_mean']).pivot_table(
        values='t_s_mean', index='phase', columns='concurrency', aggfunc='mean'
    )
    if not pivot_throughput.empty:
        sns.heatmap(pivot_throughput, annot=True, fmt='.1f', cmap='YlOrRd', ax=ax,
                   cbar_kws={'label': 'tokens/sec'}, linewidths=0.5)
        ax.set_title('Avg Throughput (tokens/sec)', fontsize=13, fontweight='bold')
        ax.set_xlabel('Concurrency')
        ax.set_ylabel('Phase')
 def create_summary_table(ax, df):
    """Create summary table on the given axis."""
    ax.axis('off')
    summary_rows = []
    for phase in ['pp', 'tg']:
        for conc in sorted(df['concurrency'].unique()):
            subset = df[(df['phase'] == phase) & (df['concurrency'] == conc)]
            if not subset.empty and subset['t_s_mean'].notna().any():
                row = {
                    'Phase': phase.upper(),
                    'Conc.': conc,
                    'Avg Speed': (f"{subset['t_s_mean'].mean():.1f} ± {subset['t_s_mean'].std():.1f}"
                                if subset['t_s_mean'].notna().any() else 'N/A'),
                    't/s/r': (f"{subset['t_s_req_mean'].mean():.1f} ± {subset['t_s_req_mean'].std():.1f}"
                            if subset['t_s_req_mean'].notna().any() else 'N/A'),
                    'TTFR': (f"{subset['ttfr_mean'].mean():.1f}ms"
                           if subset['ttfr_mean'].notna().any() else "N/A"),
                    'PPT': (f"{subset['est_ppt_mean'].mean():.2f}ms"
                          if subset['est_ppt_mean'].notna().any() else "N/A")
                }
                summary_rows.append(row)
    summary_df = pd.DataFrame(summary_rows)
    if not summary_df.empty:
        table = ax.table(cellText=summary_df.values, colLabels=summary_df.columns,
                        cellLoc='center', loc='center', colColours=[COLORS['pp']]*len(summary_df.columns))
        table.auto_set_font_size(False)
        table.set_fontsize(9)
        table.scale(1, 1.8)
        for i in range(len(summary_df)):
            color = COLORS['pp_light'] if i % 2 == 0 else COLORS['tg_light']
            for j in range(len(summary_df.columns)):
                table[(i+1, j)].set_facecolor(color)
        ax.set_title('Summary', fontsize=13, fontweight='bold', pad=20)
 def generate_dashboard(df, image_path, model_name=None):
    """
    Generate the complete dashboard plot and save to file.
    Args:
        df (pd.DataFrame): Processed dataframe
        image_path (str): Path to save the output image
    """
    setup_plot_style()
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    title = DASHBOARD_TITLE
    if model_name:
        title = f"{DASHBOARD_TITLE}{model_name}"
    fig.suptitle(title, fontsize=18, fontweight='bold', y=1.02)
    plot_throughput_subplot(axes[0, 0], df)
    plot_latency_subplot(axes[0, 1], df)
    plot_heatmap_subplot(axes[1, 0], df)
    create_summary_table(axes[1, 1], df)
    plt.tight_layout()
    plt.savefig(image_path, dpi=300, bbox_inches='tight', facecolor='white')
    if plt.get_backend().lower() != 'agg':
        plt.show()
 def main():
    """Main entry point."""
    try:
        args = parse_arguments()
        df, model_name = load_and_process_data(args.csv_path)
        generate_dashboard(df, args.image_path, model_name=model_name)
    except Exception as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)
 if __name__ == '__main__':
    main()
--- a/notebook.ipynb
+++ b/notebook.ipynb
--- a/requirement.txt
+++ b/requirement.txt
@@ -0,0 +1,42 @@
 asttokens==3.0.1
 comm==0.2.3
 contourpy==1.3.2
 cycler==0.12.1
 debugpy==1.8.20
 decorator==5.2.1
 exceptiongroup==1.3.1
 executing==2.2.1
 fonttools==4.62.1
 ipykernel==7.2.0
 ipython==8.39.0
 jedi==0.19.2
 jupyter_client==8.8.0
 jupyter_core==5.9.1
 kiwisolver==1.5.0
 matplotlib==3.10.9
 matplotlib-inline==0.2.1
 nest-asyncio==1.6.0
 numpy==2.2.6
 packaging==26.2
 pandas==2.3.3
 parso==0.8.6
 pexpect==4.9.0
 pillow==12.2.0
 platformdirs==4.9.6
 prompt_toolkit==3.0.52
 psutil==7.2.2
 ptyprocess==0.7.0
 pure_eval==0.2.3
 Pygments==2.20.0
 pyparsing==3.3.2
 python-dateutil==2.9.0.post0
 pytz==2026.1.post1
 pyzmq==27.1.0
 seaborn==0.13.2
 six==1.17.0
 stack-data==0.6.3
 tornado==6.5.5
 traitlets==5.14.3
 typing_extensions==4.15.0
 tzdata==2026.2
 wcwidth==0.6.0