Skip to content

Metrics API

This page documents the quantum embedding quality metrics and analysis functions.

Core Metrics Functions

expressibility

expressibility(embedding, n_samples=1000, n_bins=50, random_seed=None)

Compute the expressibility of a quantum embedding.

Expressibility measures how well the embedding can generate diverse quantum states across the Hilbert space. It compares the distribution of fidelities from randomly sampled states with the uniform (Haar) distribution.

Parameters:

Name Type Description Default
embedding BaseEmbedding

Quantum embedding to evaluate

required
n_samples int

Number of random samples for evaluation

1000
n_bins int

Number of bins for histogram comparison

50
random_seed int

Random seed for reproducibility

None

Returns:

Name Type Description
expressibility float

Expressibility score (0 = poor, 1 = excellent)

Source code in quantum_data_embedding_suite\metrics.py
@requires_license()
def expressibility(
    embedding: Any,
    n_samples: int = 1000,
    n_bins: int = 50,
    random_seed: Optional[int] = None
) -> float:
    """
    Compute the expressibility of a quantum embedding.

    Expressibility measures how well the embedding can generate
    diverse quantum states across the Hilbert space. It compares
    the distribution of fidelities from randomly sampled states
    with the uniform (Haar) distribution.

    Parameters
    ----------
    embedding : BaseEmbedding
        Quantum embedding to evaluate
    n_samples : int, default=1000
        Number of random samples for evaluation
    n_bins : int, default=50
        Number of bins for histogram comparison
    random_seed : int, optional
        Random seed for reproducibility

    Returns
    -------
    expressibility : float
        Expressibility score (0 = poor, 1 = excellent)
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    n_qubits = embedding.n_qubits
    n_features = embedding.get_feature_dimension()

    # Generate random parameter sets
    params1 = np.random.uniform(-2*np.pi, 2*np.pi, (n_samples, n_features))
    params2 = np.random.uniform(-2*np.pi, 2*np.pi, (n_samples, n_features))

    # Compute fidelities between random states
    fidelities = []
    for i in range(n_samples):
        try:
            circuit1 = embedding.create_circuit(params1[i])
            circuit2 = embedding.create_circuit(params2[i])

            # Get statevectors
            psi1 = embedding.backend.get_statevector(circuit1)
            psi2 = embedding.backend.get_statevector(circuit2)

            # Compute fidelity
            fidelity = np.abs(np.vdot(psi1, psi2)) ** 2
            fidelities.append(fidelity)

        except Exception:
            # Skip failed computations
            continue

    if len(fidelities) < 10:
        return 0.0  # Not enough successful computations

    fidelities = np.array(fidelities)

    # Generate reference Haar distribution
    haar_fidelities = _generate_haar_fidelities(n_qubits, len(fidelities), random_seed)

    # Compare distributions using Kolmogorov-Smirnov statistic
    try:
        from scipy.stats import ks_2samp
        ks_stat, _ = ks_2samp(fidelities, haar_fidelities)
        # Convert to expressibility score (lower KS distance = higher expressibility)
        expressibility_score = 1.0 - ks_stat
        return max(0.0, min(1.0, expressibility_score))
    except ImportError:
        # Fallback: use histogram comparison
        return _histogram_expressibility(fidelities, haar_fidelities, n_bins)

trainability

trainability(embedding, data, n_samples=100, epsilon=0.0001, random_seed=None)

Compute the trainability of a quantum embedding.

Trainability measures the variance of gradients, which indicates whether the embedding is trainable or suffers from barren plateaus.

Parameters:

Name Type Description Default
embedding BaseEmbedding

Quantum embedding to evaluate

required
data array - like

Sample data points for evaluation

required
n_samples int

Number of samples for gradient estimation

100
epsilon float

Finite difference step size

1e-4
random_seed int

Random seed for reproducibility

None

Returns:

Name Type Description
trainability float

Trainability score (higher = more trainable)

Source code in quantum_data_embedding_suite\metrics.py
@requires_license()
def trainability(
    embedding: Any,
    data: np.ndarray,
    n_samples: int = 100,
    epsilon: float = 1e-4,
    random_seed: Optional[int] = None
) -> float:
    """
    Compute the trainability of a quantum embedding.

    Trainability measures the variance of gradients, which indicates
    whether the embedding is trainable or suffers from barren plateaus.

    Parameters
    ----------
    embedding : BaseEmbedding
        Quantum embedding to evaluate
    data : array-like
        Sample data points for evaluation
    n_samples : int, default=100
        Number of samples for gradient estimation
    epsilon : float, default=1e-4
        Finite difference step size
    random_seed : int, optional
        Random seed for reproducibility

    Returns
    -------
    trainability : float
        Trainability score (higher = more trainable)
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    # Check if embedding has trainable parameters
    if not hasattr(embedding, 'get_parameters') or embedding.n_parameters == 0:
        # Data-dependent embeddings: analyze sensitivity to data
        return _data_sensitivity_trainability(embedding, data, n_samples, epsilon)

    # Parameter-dependent embeddings: analyze gradient variance
    return _parameter_gradient_trainability(embedding, data, n_samples, epsilon)

gradient_variance

gradient_variance(embedding, data, observable=None, n_samples=100, epsilon=0.0001)

Compute gradient variance for barren plateau analysis.

Parameters:

Name Type Description Default
embedding BaseEmbedding

Quantum embedding to analyze

required
data array - like

Sample data points

required
observable observable

Observable to measure (defaults to Z on first qubit)

None
n_samples int

Number of samples for gradient estimation

100
epsilon float

Finite difference step size

1e-4

Returns:

Name Type Description
grad_var float

Gradient variance

Source code in quantum_data_embedding_suite\metrics.py
@requires_license()
def gradient_variance(
    embedding: Any,
    data: np.ndarray,
    observable: Optional[Any] = None,
    n_samples: int = 100,
    epsilon: float = 1e-4
) -> float:
    """
    Compute gradient variance for barren plateau analysis.

    Parameters
    ----------
    embedding : BaseEmbedding
        Quantum embedding to analyze
    data : array-like
        Sample data points
    observable : observable, optional
        Observable to measure (defaults to Z on first qubit)
    n_samples : int, default=100
        Number of samples for gradient estimation
    epsilon : float, default=1e-4
        Finite difference step size

    Returns
    -------
    grad_var : float
        Gradient variance
    """
    if observable is None:
        # Default observable: Z measurement on first qubit
        observable = embedding.backend.create_observable("Z", [0])

    data = np.asarray(data)
    gradients = []

    for _ in range(min(n_samples, len(data))):
        idx = np.random.randint(len(data))
        x = data[idx]

        try:
            # Compute gradient using finite differences
            grad = _compute_finite_difference_gradient(
                embedding, x, observable, epsilon
            )
            gradients.extend(grad)
        except Exception:
            continue

    if len(gradients) == 0:
        return 0.0

    return float(np.var(gradients))

effective_dimension

effective_dimension(kernel_matrix, threshold=0.95)

Compute effective dimension of a kernel matrix.

Parameters:

Name Type Description Default
kernel_matrix array - like

Kernel matrix

required
threshold float

Variance threshold for determining effective dimension

0.95

Returns:

Name Type Description
eff_dim int

Effective dimension

Source code in quantum_data_embedding_suite\metrics.py
def effective_dimension(kernel_matrix: np.ndarray, threshold: float = 0.95) -> int:
    """
    Compute effective dimension of a kernel matrix.

    Parameters
    ----------
    kernel_matrix : array-like
        Kernel matrix
    threshold : float, default=0.95
        Variance threshold for determining effective dimension

    Returns
    -------
    eff_dim : int
        Effective dimension
    """
    try:
        eigenvals = np.linalg.eigvals(kernel_matrix)
        eigenvals = np.real(eigenvals)
        eigenvals = np.sort(eigenvals)[::-1]
        eigenvals = np.maximum(eigenvals, 0)  # Remove negative eigenvalues

        if np.sum(eigenvals) == 0:
            return 0

        # Cumulative variance explained
        cumvar = np.cumsum(eigenvals) / np.sum(eigenvals)

        # Find effective dimension
        eff_dim = np.searchsorted(cumvar, threshold) + 1

        return min(eff_dim, len(eigenvals))
    except np.linalg.LinAlgError:
        return len(kernel_matrix)

Composite Metrics

compute_all_metrics

compute_all_metrics(embedding, data, n_samples=1000)

Compute all embedding quality metrics.

Parameters:

Name Type Description Default
embedding BaseEmbedding

Quantum embedding to evaluate

required
data array - like

Sample data for evaluation

required
n_samples int

Number of samples for stochastic metrics

1000

Returns:

Name Type Description
metrics dict

Dictionary containing all computed metrics

Source code in quantum_data_embedding_suite\metrics.py
@requires_license()
def compute_all_metrics(
    embedding: Any,
    data: np.ndarray,
    n_samples: int = 1000
) -> Dict[str, float]:
    """
    Compute all embedding quality metrics.

    Parameters
    ----------
    embedding : BaseEmbedding
        Quantum embedding to evaluate
    data : array-like
        Sample data for evaluation
    n_samples : int, default=1000
        Number of samples for stochastic metrics

    Returns
    -------
    metrics : dict
        Dictionary containing all computed metrics
    """
    data = np.asarray(data)

    metrics = {}

    # Expressibility
    try:
        expr = expressibility(embedding, n_samples=n_samples)
        metrics['expressibility'] = expr
    except Exception as e:
        metrics['expressibility'] = 0.0
        metrics['expressibility_error'] = str(e)

    # Trainability
    try:
        train = trainability(embedding, data, n_samples=min(100, n_samples))
        metrics['trainability'] = train
    except Exception as e:
        metrics['trainability'] = 0.0
        metrics['trainability_error'] = str(e)

    # Gradient variance
    try:
        grad_var = gradient_variance(embedding, data, n_samples=min(100, n_samples))
        metrics['gradient_variance'] = grad_var
    except Exception as e:
        metrics['gradient_variance'] = 0.0
        metrics['gradient_variance_error'] = str(e)

    return metrics

quantum_advantage_score

quantum_advantage_score(embedding, data, classical_baseline=None, n_samples=1000)

Compute quantum advantage score by comparing to classical baselines.

This function evaluates the potential quantum advantage of an embedding by comparing its performance metrics against classical feature engineering techniques and traditional machine learning preprocessing methods.

Parameters:

Name Type Description Default
embedding BaseEmbedding

Quantum embedding to evaluate

required
data array - like

Sample data for evaluation

required
classical_baseline dict

Classical baseline metrics for comparison. If None, will compute standard classical baselines (PCA, polynomial features, etc.)

None
n_samples int

Number of samples for stochastic metrics

1000

Returns:

Name Type Description
advantage_metrics dict

Dictionary containing quantum advantage scores and comparisons

Examples:

>>> from quantum_data_embedding_suite import AngleEmbedding
>>> import numpy as np
>>> data = np.random.randn(100, 4)
>>> embedding = AngleEmbedding(n_qubits=4)
>>> scores = quantum_advantage_score(embedding, data)
>>> print(f"Quantum advantage: {scores['overall_advantage']:.3f}")
Source code in quantum_data_embedding_suite\metrics.py
def quantum_advantage_score(
    embedding: Any,
    data: np.ndarray,
    classical_baseline: Optional[Dict[str, float]] = None,
    n_samples: int = 1000
) -> Dict[str, float]:
    """
    Compute quantum advantage score by comparing to classical baselines.

    This function evaluates the potential quantum advantage of an embedding
    by comparing its performance metrics against classical feature engineering
    techniques and traditional machine learning preprocessing methods.

    Parameters
    ----------
    embedding : BaseEmbedding
        Quantum embedding to evaluate
    data : array-like
        Sample data for evaluation
    classical_baseline : dict, optional
        Classical baseline metrics for comparison. If None, will compute
        standard classical baselines (PCA, polynomial features, etc.)
    n_samples : int, default=1000
        Number of samples for stochastic metrics

    Returns
    -------
    advantage_metrics : dict
        Dictionary containing quantum advantage scores and comparisons

    Examples
    --------
    >>> from quantum_data_embedding_suite import AngleEmbedding
    >>> import numpy as np
    >>> data = np.random.randn(100, 4)
    >>> embedding = AngleEmbedding(n_qubits=4)
    >>> scores = quantum_advantage_score(embedding, data)
    >>> print(f"Quantum advantage: {scores['overall_advantage']:.3f}")
    """
    data = np.asarray(data)

    # Compute quantum metrics
    quantum_metrics = compute_all_metrics(embedding, data, n_samples=n_samples)

    # Compute or use provided classical baselines
    if classical_baseline is None:
        classical_baseline = _compute_classical_baselines(data)

    # Calculate advantage scores
    advantage_scores = {}

    # Expressibility advantage
    classical_expr = classical_baseline.get('expressibility', 0.5)
    quantum_expr = quantum_metrics.get('expressibility', 0.0)
    advantage_scores['expressibility_advantage'] = quantum_expr - classical_expr

    # Trainability advantage
    classical_train = classical_baseline.get('trainability', 0.3)
    quantum_train = quantum_metrics.get('trainability', 0.0)
    advantage_scores['trainability_advantage'] = quantum_train - classical_train

    # Feature richness (based on gradient variance)
    classical_features = classical_baseline.get('feature_variance', 0.1)
    quantum_features = quantum_metrics.get('gradient_variance', 0.0)
    if classical_features > 0:
        advantage_scores['feature_richness_ratio'] = quantum_features / classical_features
    else:
        advantage_scores['feature_richness_ratio'] = 1.0

    # Overall quantum advantage score
    # Weighted combination of individual advantages
    weights = {
        'expressibility': 0.4,
        'trainability': 0.4,
        'feature_richness': 0.2
    }

    overall_advantage = (
        weights['expressibility'] * max(0, advantage_scores['expressibility_advantage']) +
        weights['trainability'] * max(0, advantage_scores['trainability_advantage']) +
        weights['feature_richness'] * min(2.0, advantage_scores['feature_richness_ratio'])
    )

    advantage_scores['overall_advantage'] = overall_advantage

    # Add baseline and quantum metrics for reference
    advantage_scores['quantum_metrics'] = quantum_metrics
    advantage_scores['classical_baseline'] = classical_baseline

    # Interpretation
    if overall_advantage > 0.7:
        advantage_scores['interpretation'] = 'Strong quantum advantage'
    elif overall_advantage > 0.4:
        advantage_scores['interpretation'] = 'Moderate quantum advantage'
    elif overall_advantage > 0.1:
        advantage_scores['interpretation'] = 'Weak quantum advantage'
    else:
        advantage_scores['interpretation'] = 'No clear quantum advantage'

    return advantage_scores

Kernel Metrics

kernel_alignment

kernel_alignment(kernel_matrix1, kernel_matrix2)

Compute kernel alignment between two kernel matrices.

Kernel alignment measures the similarity between two kernel matrices, which is useful for comparing quantum and classical kernels.

Parameters:

Name Type Description Default
kernel_matrix1 array - like

First kernel matrix

required
kernel_matrix2 array - like

Second kernel matrix

required

Returns:

Name Type Description
alignment float

Kernel alignment score (0 = no alignment, 1 = perfect alignment)

Source code in quantum_data_embedding_suite\metrics.py
def kernel_alignment(
    kernel_matrix1: np.ndarray,
    kernel_matrix2: np.ndarray
) -> float:
    """
    Compute kernel alignment between two kernel matrices.

    Kernel alignment measures the similarity between two kernel matrices,
    which is useful for comparing quantum and classical kernels.

    Parameters
    ----------
    kernel_matrix1 : array-like
        First kernel matrix
    kernel_matrix2 : array-like
        Second kernel matrix

    Returns
    -------
    alignment : float
        Kernel alignment score (0 = no alignment, 1 = perfect alignment)
    """
    K1 = np.asarray(kernel_matrix1)
    K2 = np.asarray(kernel_matrix2)

    if K1.shape != K2.shape:
        raise ValueError("Kernel matrices must have the same shape")

    # Compute Frobenius inner product
    numerator = np.sum(K1 * K2)

    # Compute norms
    norm_K1 = np.sqrt(np.sum(K1 * K1))
    norm_K2 = np.sqrt(np.sum(K2 * K2))

    if norm_K1 == 0 or norm_K2 == 0:
        return 0.0

    # Centered kernel alignment
    alignment = numerator / (norm_K1 * norm_K2)

    return float(alignment)

kernel_expressivity

kernel_expressivity(kernel_matrix, n_bins=50)

Compute expressivity of a kernel matrix.

Kernel expressivity measures how well the kernel can distinguish between different data points based on the distribution of kernel values.

Parameters:

Name Type Description Default
kernel_matrix array - like

Kernel matrix to analyze

required
n_bins int

Number of bins for histogram analysis

50

Returns:

Name Type Description
expressivity float

Kernel expressivity score (higher = more expressive)

Source code in quantum_data_embedding_suite\metrics.py
def kernel_expressivity(
    kernel_matrix: np.ndarray,
    n_bins: int = 50
) -> float:
    """
    Compute expressivity of a kernel matrix.

    Kernel expressivity measures how well the kernel can distinguish
    between different data points based on the distribution of kernel values.

    Parameters
    ----------
    kernel_matrix : array-like
        Kernel matrix to analyze
    n_bins : int, default=50
        Number of bins for histogram analysis

    Returns
    -------
    expressivity : float
        Kernel expressivity score (higher = more expressive)
    """
    K = np.asarray(kernel_matrix)

    # Extract upper triangular values (excluding diagonal)
    n = K.shape[0]
    triu_indices = np.triu_indices(n, k=1)
    kernel_values = K[triu_indices]

    if len(kernel_values) == 0:
        return 0.0

    # Compute histogram
    hist, bins = np.histogram(kernel_values, bins=n_bins, density=True)

    # Compute entropy as measure of expressivity
    # Higher entropy = more uniform distribution = better expressivity
    hist_normalized = hist / np.sum(hist)

    # Add small epsilon to avoid log(0)
    epsilon = 1e-10
    hist_normalized = hist_normalized + epsilon

    # Shannon entropy
    entropy = -np.sum(hist_normalized * np.log2(hist_normalized))

    # Normalize by maximum possible entropy
    max_entropy = np.log2(n_bins)
    expressivity = entropy / max_entropy if max_entropy > 0 else 0.0

    return float(expressivity)

kernel_matrix_rank

kernel_matrix_rank(kernel_matrix, threshold=1e-10)

Compute the numerical rank of a kernel matrix.

Parameters:

Name Type Description Default
kernel_matrix array - like

Kernel matrix to analyze

required
threshold float

Threshold for determining rank

1e-10

Returns:

Name Type Description
rank int

Numerical rank of the kernel matrix

Source code in quantum_data_embedding_suite\metrics.py
def kernel_matrix_rank(kernel_matrix: np.ndarray, threshold: float = 1e-10) -> int:
    """
    Compute the numerical rank of a kernel matrix.

    Parameters
    ----------
    kernel_matrix : array-like
        Kernel matrix to analyze
    threshold : float, default=1e-10
        Threshold for determining rank

    Returns
    -------
    rank : int
        Numerical rank of the kernel matrix
    """
    K = np.asarray(kernel_matrix)

    try:
        # Compute singular values
        singular_values = np.linalg.svd(K, compute_uv=False)

        # Count singular values above threshold
        rank = np.sum(singular_values > threshold)

        return int(rank)

    except np.linalg.LinAlgError:
        # Return matrix size as fallback
        return K.shape[0]

kernel_condition_number

kernel_condition_number(kernel_matrix)

Compute the condition number of a kernel matrix.

Parameters:

Name Type Description Default
kernel_matrix array - like

Kernel matrix to analyze

required

Returns:

Name Type Description
condition_number float

Condition number of the kernel matrix

Source code in quantum_data_embedding_suite\metrics.py
def kernel_condition_number(kernel_matrix: np.ndarray) -> float:
    """
    Compute the condition number of a kernel matrix.

    Parameters
    ----------
    kernel_matrix : array-like
        Kernel matrix to analyze

    Returns
    -------
    condition_number : float
        Condition number of the kernel matrix
    """
    K = np.asarray(kernel_matrix)

    try:
        # Compute condition number
        cond_num = np.linalg.cond(K)

        return float(cond_num)

    except np.linalg.LinAlgError:
        # Return large number as fallback for singular matrices
        return 1e12

Advanced Metrics

embedding_capacity

embedding_capacity(embedding, data, n_samples=100)

Compute the capacity of a quantum embedding.

Capacity measures how much information the embedding can encode about the input data.

Parameters:

Name Type Description Default
embedding BaseEmbedding

Quantum embedding to evaluate

required
data array - like

Sample data points

required
n_samples int

Number of samples for evaluation

100

Returns:

Name Type Description
capacity float

Embedding capacity score

Source code in quantum_data_embedding_suite\metrics.py
def embedding_capacity(
    embedding: Any,
    data: np.ndarray,
    n_samples: int = 100
) -> float:
    """
    Compute the capacity of a quantum embedding.

    Capacity measures how much information the embedding can encode
    about the input data.

    Parameters
    ----------
    embedding : BaseEmbedding
        Quantum embedding to evaluate
    data : array-like
        Sample data points
    n_samples : int, default=100
        Number of samples for evaluation

    Returns
    -------
    capacity : float
        Embedding capacity score
    """
    data = np.asarray(data)

    if len(data) < 2:
        return 0.0

    # Sample random pairs of data points
    n_pairs = min(n_samples, len(data) * (len(data) - 1) // 2)
    pairs = []

    for _ in range(n_pairs):
        i, j = np.random.choice(len(data), size=2, replace=False)
        pairs.append((i, j))

    # Compute distances in input space and quantum state space
    input_distances = []
    quantum_distances = []

    for i, j in pairs:
        # Input space distance
        input_dist = np.linalg.norm(data[i] - data[j])
        input_distances.append(input_dist)

        try:
            # Quantum state distance (fidelity)
            circuit_i = embedding.create_circuit(data[i])
            circuit_j = embedding.create_circuit(data[j])

            # Compute state fidelity (placeholder - would need actual implementation)
            fidelity = 0.5 + 0.5 * np.exp(-input_dist)  # Approximate relationship
            quantum_dist = 1 - fidelity
            quantum_distances.append(quantum_dist)

        except Exception:
            quantum_distances.append(0.0)

    if len(input_distances) == 0 or len(quantum_distances) == 0:
        return 0.0

    # Compute correlation between input and quantum distances
    try:
        correlation = np.corrcoef(input_distances, quantum_distances)[0, 1]

        # Convert to capacity score (higher correlation = better capacity)
        capacity = abs(correlation) if not np.isnan(correlation) else 0.0

        return float(capacity)

    except Exception:
        return 0.0

separability_measure

separability_measure(embedding, data, labels, n_samples=100)

Compute how well the embedding separates different classes.

Parameters:

Name Type Description Default
embedding BaseEmbedding

Quantum embedding to evaluate

required
data array - like

Sample data points

required
labels array - like

Class labels for data points

required
n_samples int

Number of samples for evaluation

100

Returns:

Name Type Description
separability float

Class separability score (higher = better separation)

Source code in quantum_data_embedding_suite\metrics.py
def separability_measure(
    embedding: Any,
    data: np.ndarray,
    labels: np.ndarray,
    n_samples: int = 100
) -> float:
    """
    Compute how well the embedding separates different classes.

    Parameters
    ----------
    embedding : BaseEmbedding
        Quantum embedding to evaluate
    data : array-like
        Sample data points
    labels : array-like
        Class labels for data points
    n_samples : int, default=100
        Number of samples for evaluation

    Returns
    -------
    separability : float
        Class separability score (higher = better separation)
    """
    data = np.asarray(data)
    labels = np.asarray(labels)

    if len(data) != len(labels):
        raise ValueError("Data and labels must have same length")

    unique_labels = np.unique(labels)

    if len(unique_labels) < 2:
        return 0.0

    # Compute within-class and between-class distances
    within_distances = []
    between_distances = []

    for _ in range(n_samples):
        # Sample two points from same class
        label = np.random.choice(unique_labels)
        same_class_indices = np.where(labels == label)[0]

        if len(same_class_indices) >= 2:
            i, j = np.random.choice(same_class_indices, size=2, replace=False)

            try:
                # Compute quantum distance
                circuit_i = embedding.create_circuit(data[i])
                circuit_j = embedding.create_circuit(data[j])

                # Placeholder distance computation
                dist = np.linalg.norm(data[i] - data[j])
                within_distances.append(dist)

            except Exception:
                continue

        # Sample two points from different classes
        if len(unique_labels) >= 2:
            label1, label2 = np.random.choice(unique_labels, size=2, replace=False)

            class1_indices = np.where(labels == label1)[0]
            class2_indices = np.where(labels == label2)[0]

            if len(class1_indices) > 0 and len(class2_indices) > 0:
                i = np.random.choice(class1_indices)
                j = np.random.choice(class2_indices)

                try:
                    # Compute quantum distance
                    circuit_i = embedding.create_circuit(data[i])
                    circuit_j = embedding.create_circuit(data[j])

                    # Placeholder distance computation
                    dist = np.linalg.norm(data[i] - data[j])
                    between_distances.append(dist)

                except Exception:
                    continue

    if len(within_distances) == 0 or len(between_distances) == 0:
        return 0.0

    # Separability is ratio of between-class to within-class distances
    mean_within = np.mean(within_distances)
    mean_between = np.mean(between_distances)

    if mean_within == 0:
        return 1.0 if mean_between > 0 else 0.0

    separability = mean_between / mean_within

    return float(min(separability, 1.0))  # Cap at 1.0

Usage Examples

Basic Metrics Computation

from quantum_data_embedding_suite.metrics import (
    expressibility, trainability, gradient_variance, effective_dimension
)
from quantum_data_embedding_suite.embeddings import AngleEmbedding
import numpy as np

# Create embedding and sample data
embedding = AngleEmbedding(n_qubits=4)
X = np.random.randn(100, 4)

# Compute individual metrics
expr_score = expressibility(
    embedding=embedding,
    n_samples=1000,
    random_seed=42
)

train_score = trainability(
    embedding=embedding,
    data=X,
    n_samples=100
)

grad_var = gradient_variance(
    embedding=embedding,
    data=X,
    n_samples=100
)

# For effective dimension, we need a kernel matrix
from quantum_data_embedding_suite.kernels import FidelityKernel
kernel = FidelityKernel(embedding=embedding)
kernel_matrix = kernel.compute_kernel_matrix(X[:20])  # Use subset for speed
eff_dim = effective_dimension(kernel_matrix=kernel_matrix)

print(f"Expressibility: {expr_score:.4f}")
print(f"Trainability: {train_score:.4f}")
print(f"Gradient Variance: {grad_var:.6f}")
print(f"Effective Dimension: {eff_dim:.1f}")

Comprehensive Metrics Analysis

from quantum_data_embedding_suite.metrics import compute_all_metrics

# Compute all metrics at once
all_metrics = compute_all_metrics(
    embedding=embedding,
    data=X,
    n_samples=500
)

print("Complete Metrics Report:")
print("=" * 40)
for metric_name, value in all_metrics.items():
    if isinstance(value, float):
        print(f"  {metric_name}: {value:.6f}")
    else:
        print(f"  {metric_name}: {value}")

Embedding Comparison

from quantum_data_embedding_suite.embeddings import (
    AngleEmbedding, IQPEmbedding, AmplitudeEmbedding
)

# Create different embeddings
embeddings = {
    'angle': AngleEmbedding(n_qubits=4),
    'iqp': IQPEmbedding(n_qubits=4, depth=2),
    'amplitude': AmplitudeEmbedding(n_qubits=4)
}

# Compare metrics across embeddings
comparison_results = {}

for name, emb in embeddings.items():
    try:
        metrics = compute_all_metrics(
            embedding=emb,
            data=X[:50],  # Use smaller subset for speed
            n_samples=200
        )
        )

        comparison_results[name] = {
            'expressibility': metrics['expressibility'],
            'trainability': metrics['trainability'],
            'gradient_variance': metrics['gradient_variance']
        }

    except Exception as e:
        print(f"Error computing metrics for {name}: {e}")
        comparison_results[name] = None

# Display comparison
import pandas as pd
df = pd.DataFrame(comparison_results).T
print("\nEmbedding Comparison:")
print(df.round(4))

# Find best embedding for each metric
for metric in df.columns:
    best_embedding = df[metric].idxmax()
    best_value = df[metric].max()
    print(f"Best {metric}: {best_embedding} ({best_value:.4f})")

Advanced Metrics Analysis

Statistical Significance Testing

from scipy import stats
import numpy as np

def compare_embeddings_statistically(embedding1, embedding2, X, n_trials=20):
    """Compare two embeddings with statistical significance testing"""

    # Collect metrics over multiple trials
    metrics1 = []
    metrics2 = []

    for trial in range(n_trials):
        # Add noise to data for each trial
        X_trial = X + np.random.normal(0, 0.01, X.shape)

        # Compute metrics
        expr1 = expressibility(embedding1, n_samples=500)
        expr2 = expressibility(embedding2, n_samples=500)

        metrics1.append(expr1)
        metrics2.append(expr2)

    metrics1 = np.array(metrics1)
    metrics2 = np.array(metrics2)

    # Perform statistical tests
    t_stat, p_value = stats.ttest_ind(metrics1, metrics2)
    mannwhitney_stat, mannwhitney_p = stats.mannwhitneyu(metrics1, metrics2)

    results = {
        'embedding1_mean': np.mean(metrics1),
        'embedding1_std': np.std(metrics1),
        'embedding2_mean': np.mean(metrics2),
        'embedding2_std': np.std(metrics2),
        't_test_p_value': p_value,
        'mann_whitney_p_value': mannwhitney_p,
        'significant_difference': p_value < 0.05
    }

    return results

# Compare embeddings statistically
angle_emb = AngleEmbedding(n_qubits=4)
iqp_emb = IQPEmbedding(n_qubits=4, depth=2)

stat_results = compare_embeddings_statistically(angle_emb, iqp_emb, X)

print("Statistical Comparison Results:")
for key, value in stat_results.items():
    print(f"{key}: {value}")

Hyperparameter Sensitivity Analysis

def analyze_hyperparameter_sensitivity(embedding_class, param_ranges, X, metric_func):
    """Analyze sensitivity of metrics to hyperparameter changes"""

    results = {}

    for param_name, param_values in param_ranges.items():
        param_results = []

        for param_value in param_values:
            try:
                # Create embedding with specific parameter
                kwargs = {param_name: param_value}
                embedding = embedding_class(n_qubits=4, **kwargs)

                # Compute metric
                metric_value = metric_func(embedding, X)
                param_results.append(metric_value)

            except Exception as e:
                print(f"Error with {param_name}={param_value}: {e}")
                param_results.append(np.nan)

        results[param_name] = {
            'values': param_values,
            'metrics': param_results,
            'sensitivity': np.std(param_results) if not np.all(np.isnan(param_results)) else 0
        }

    return results

# Analyze sensitivity for IQP embedding
param_ranges = {
    'depth': [1, 2, 3, 4],
    # Add other parameters as needed
}

def expr_metric(embedding, X):
    return expressibility(embedding, n_samples=500)

sensitivity_results = analyze_hyperparameter_sensitivity(
    IQPEmbedding, param_ranges, X, expr_metric
)

print("Hyperparameter Sensitivity Analysis:")
for param, results in sensitivity_results.items():
    print(f"{param}: sensitivity = {results['sensitivity']:.4f}")

Temporal Metrics Evolution

class MetricsTracker:
    """Track metrics evolution during training/optimization"""

    def __init__(self):
        self.history = {
            'expressibility': [],
            'trainability': [],
            'gradient_variance': [],
            'iteration': []
        }

    def record_metrics(self, embedding, X, iteration):
        """Record metrics at current iteration"""
        try:
            expr = expressibility(embedding, n_samples=200)  # Reduced for speed
            train = trainability(embedding, data=X[:20])  # Small subset
            grad_var = gradient_variance(embedding, data=X[:20], n_samples=50)

            self.history['expressibility'].append(expr)
            self.history['trainability'].append(train)
            self.history['gradient_variance'].append(grad_var)
            self.history['iteration'].append(iteration)

        except Exception as e:
            print(f"Error recording metrics at iteration {iteration}: {e}")

    def plot_evolution(self):
        """Plot metrics evolution"""
        import matplotlib.pyplot as plt

        fig, axes = plt.subplots(2, 2, figsize=(12, 8))
        axes = axes.ravel()

        metrics = ['expressibility', 'trainability', 'gradient_variance']

        for i, metric in enumerate(metrics):
            axes[i].plot(self.history['iteration'], self.history[metric], 'o-')
            axes[i].set_title(f'{metric.capitalize()} Evolution')
            axes[i].set_xlabel('Iteration')
            axes[i].set_ylabel(metric.replace('_', ' ').title())
            axes[i].grid(True)

        # Summary plot
        normalized_metrics = {}
        for metric in metrics:
            values = np.array(self.history[metric])
            if np.std(values) > 0:
                normalized_metrics[metric] = (values - np.mean(values)) / np.std(values)
            else:
                normalized_metrics[metric] = values

        for metric in metrics:
            axes[3].plot(self.history['iteration'], normalized_metrics[metric], 
                        'o-', label=metric)

        axes[3].set_title('Normalized Metrics Evolution')
        axes[3].set_xlabel('Iteration')
        axes[3].set_ylabel('Normalized Value')
        axes[3].legend()
        axes[3].grid(True)

        plt.tight_layout()
        plt.show()

    def get_summary_statistics(self):
        """Get summary statistics of metrics evolution"""
        summary = {}

        for metric in ['expressibility', 'trainability', 'gradient_variance']:
            values = np.array(self.history[metric])
            if len(values) > 0:
                summary[metric] = {
                    'initial': values[0],
                    'final': values[-1],
                    'mean': np.mean(values),
                    'std': np.std(values),
                    'min': np.min(values),
                    'max': np.max(values),
                    'trend': 'increasing' if values[-1] > values[0] else 'decreasing'
                }

        return summary

# Example usage during optimization
tracker = MetricsTracker()

# Simulate optimization process
embedding = AngleEmbedding(n_qubits=4)
for iteration in range(10):
    # Simulate parameter updates
    # embedding.update_parameters(...)

    # Record metrics
    tracker.record_metrics(embedding, X, iteration)

# Analyze evolution
summary = tracker.get_summary_statistics()
print("Metrics Evolution Summary:")
for metric, stats in summary.items():
    print(f"\n{metric.upper()}:")
    for stat_name, value in stats.items():
        if isinstance(value, float):
            print(f"  {stat_name}: {value:.6f}")
        else:
            print(f"  {stat_name}: {value}")

Kernel-Specific Metrics

Kernel Quality Assessment

from quantum_data_embedding_suite.kernels import FidelityKernel
from quantum_data_embedding_suite.metrics import kernel_alignment, kernel_expressivity

def assess_kernel_quality(kernel, X, y=None):
    """Comprehensive kernel quality assessment"""

    # Compute kernel matrix
    K = kernel.compute_kernel(X)

    # Basic kernel properties
    from quantum_data_embedding_suite.api.kernels import analyze_kernel_properties
    properties = analyze_kernel_properties(K)

    # Kernel-specific metrics
    expressivity = kernel_expressivity(kernel, X, method='entropy')

    # Compare with classical kernels
    from sklearn.metrics.pairwise import rbf_kernel, polynomial_kernel, linear_kernel

    classical_kernels = {
        'rbf': rbf_kernel(X),
        'polynomial': polynomial_kernel(X),
        'linear': linear_kernel(X)
    }

    alignments = {}
    for name, K_classical in classical_kernels.items():
        alignment = kernel_alignment(K, K_classical)
        alignments[f'{name}_alignment'] = alignment

    # Combine results
    quality_report = {
        'properties': properties,
        'expressivity': expressivity,
        'classical_alignments': alignments
    }

    # Add classification performance if labels available
    if y is not None:
        from sklearn.svm import SVC
        from sklearn.model_selection import cross_val_score

        svm = SVC(kernel='precomputed')
        cv_scores = cross_val_score(svm, K, y, cv=5)

        quality_report['classification_performance'] = {
            'cv_mean': np.mean(cv_scores),
            'cv_std': np.std(cv_scores),
            'cv_scores': cv_scores
        }

    return quality_report

# Assess kernel quality
kernel = FidelityKernel(embedding)
quality_report = assess_kernel_quality(kernel, X[:50], y[:50] if 'y' in locals() else None)

print("Kernel Quality Assessment:")
for category, metrics in quality_report.items():
    print(f"\n{category.upper()}:")
    if isinstance(metrics, dict):
        for metric_name, value in metrics.items():
            if isinstance(value, (int, float)):
                print(f"  {metric_name}: {value:.6f}")
            else:
                print(f"  {metric_name}: {value}")
    else:
        print(f"  {metrics}")

Metrics Visualization

Comprehensive Metrics Dashboard

import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

def create_metrics_dashboard(embeddings_dict, X, save_path=None):
    """Create comprehensive metrics visualization dashboard"""

    # Collect metrics for all embeddings
    all_metrics = {}
    for name, embedding in embeddings_dict.items():
        try:
            metrics = compute_all_metrics(embedding, X[:30])  # Small subset for speed
            all_metrics[name] = metrics['core']
        except Exception as e:
            print(f"Error computing metrics for {name}: {e}")
            continue

    if not all_metrics:
        print("No metrics computed successfully")
        return

    # Create dashboard
    fig = plt.figure(figsize=(16, 12))

    # 1. Metrics comparison bar plot
    ax1 = plt.subplot(3, 3, 1)
    metrics_df = pd.DataFrame(all_metrics).T
    metrics_df.plot(kind='bar', ax=ax1)
    ax1.set_title('Metrics Comparison')
    ax1.set_ylabel('Metric Value')
    ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

    # 2. Expressibility vs Trainability scatter
    ax2 = plt.subplot(3, 3, 2)
    expr_vals = [metrics['expressibility'] for metrics in all_metrics.values()]
    train_vals = [metrics['trainability'] for metrics in all_metrics.values()]
    names = list(all_metrics.keys())

    scatter = ax2.scatter(expr_vals, train_vals, s=100, alpha=0.7)
    for i, name in enumerate(names):
        ax2.annotate(name, (expr_vals[i], train_vals[i]), 
                    xytext=(5, 5), textcoords='offset points')
    ax2.set_xlabel('Expressibility')
    ax2.set_ylabel('Trainability')
    ax2.set_title('Expressibility vs Trainability')
    ax2.grid(True, alpha=0.3)

    # 3. Radar chart
    ax3 = plt.subplot(3, 3, 3, projection='polar')

    metric_names = list(metrics_df.columns)
    angles = np.linspace(0, 2 * np.pi, len(metric_names), endpoint=False)
    angles = np.concatenate((angles, [angles[0]]))

    for name in names:
        values = metrics_df.loc[name].values
        values = np.concatenate((values, [values[0]]))
        ax3.plot(angles, values, 'o-', linewidth=2, label=name)
        ax3.fill(angles, values, alpha=0.25)

    ax3.set_xticks(angles[:-1])
    ax3.set_xticklabels(metric_names)
    ax3.set_title('Metrics Radar Chart')
    ax3.legend(bbox_to_anchor=(1.3, 1.1))

    # 4. Metrics correlation heatmap
    ax4 = plt.subplot(3, 3, 4)
    correlation_matrix = metrics_df.corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', 
                center=0, ax=ax4)
    ax4.set_title('Metrics Correlation')

    # 5. Individual metric distributions
    ax5 = plt.subplot(3, 3, 5)
    for metric in metric_names:
        ax5.hist(metrics_df[metric], alpha=0.5, label=metric, bins=10)
    ax5.set_xlabel('Metric Value')
    ax5.set_ylabel('Frequency')
    ax5.set_title('Metric Distributions')
    ax5.legend()

    # 6. Embedding ranking
    ax6 = plt.subplot(3, 3, 6)

    # Simple ranking based on average normalized metrics
    normalized_metrics = metrics_df.apply(lambda x: (x - x.min()) / (x.max() - x.min()) if x.max() > x.min() else x)
    rankings = normalized_metrics.mean(axis=1).sort_values(ascending=False)

    bars = ax6.bar(range(len(rankings)), rankings.values)
    ax6.set_xticks(range(len(rankings)))
    ax6.set_xticklabels(rankings.index, rotation=45)
    ax6.set_ylabel('Average Normalized Score')
    ax6.set_title('Embedding Rankings')

    # Color bars by rank
    colors = plt.cm.RdYlGn(np.linspace(0.3, 1, len(bars)))
    for bar, color in zip(bars, colors):
        bar.set_color(color)

    # 7-9. Individual embedding analysis
    for i, (name, embedding) in enumerate(list(embeddings_dict.items())[:3]):
        ax = plt.subplot(3, 3, 7 + i)

        # Create sample circuit for visualization
        sample_data = X[0] if len(X) > 0 else np.random.randn(4)
        circuit = embedding.embed(sample_data)

        # Plot circuit depth over different data points
        depths = []
        for j in range(min(20, len(X))):
            circ = embedding.embed(X[j])
            depths.append(circ.depth())

        ax.plot(depths, 'o-')
        ax.set_xlabel('Data Point Index')
        ax.set_ylabel('Circuit Depth')
        ax.set_title(f'{name} - Circuit Depth Variation')
        ax.grid(True, alpha=0.3)

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')

    plt.show()

    return fig

# Create dashboard
embeddings_for_dashboard = {
    'Angle': AngleEmbedding(n_qubits=4),
    'IQP': IQPEmbedding(n_qubits=4, depth=2),
    'Amplitude': AmplitudeEmbedding(n_qubits=4)
}

dashboard_fig = create_metrics_dashboard(embeddings_for_dashboard, X)

Performance Optimization

Efficient Metrics Computation

class MetricsCache:
    """Cache for expensive metrics computations"""

    def __init__(self, max_size=100):
        self.cache = {}
        self.max_size = max_size
        self.access_count = {}

    def _get_key(self, embedding, X, metric_name, **kwargs):
        """Generate cache key"""
        import hashlib

        # Create key from embedding type, data hash, and parameters
        embedding_str = f"{type(embedding).__name__}_{embedding.n_qubits}"
        data_hash = hashlib.md5(X.tobytes()).hexdigest()[:8]
        params_str = "_".join([f"{k}_{v}" for k, v in sorted(kwargs.items())])

        return f"{embedding_str}_{data_hash}_{metric_name}_{params_str}"

    def get(self, embedding, X, metric_name, **kwargs):
        """Get cached result"""
        key = self._get_key(embedding, X, metric_name, **kwargs)

        if key in self.cache:
            self.access_count[key] = self.access_count.get(key, 0) + 1
            return self.cache[key]

        return None

    def set(self, embedding, X, metric_name, result, **kwargs):
        """Cache result"""
        key = self._get_key(embedding, X, metric_name, **kwargs)

        # Remove least accessed item if cache is full
        if len(self.cache) >= self.max_size:
            least_accessed = min(self.access_count.items(), key=lambda x: x[1])
            del self.cache[least_accessed[0]]
            del self.access_count[least_accessed[0]]

        self.cache[key] = result
        self.access_count[key] = 1

# Global cache instance
_metrics_cache = MetricsCache()

def cached_expressibility(embedding, X=None, n_samples=1000, **kwargs):
    """Cached version of expressibility computation"""

    # Check cache
    cached_result = _metrics_cache.get(embedding, X or np.array([]), 
                                     'expressibility', 
                                     n_samples=n_samples, **kwargs)
    if cached_result is not None:
        return cached_result

    # Compute if not cached
    result = expressibility(embedding, n_samples=n_samples, **kwargs)

    # Cache result
    _metrics_cache.set(embedding, X or np.array([]), 
                      'expressibility', result, 
                      n_samples=n_samples, **kwargs)

    return result

# Use cached function
expr_score = cached_expressibility(embedding, n_samples=1000)

Parallel Metrics Computation

from multiprocessing import Pool
import functools

def compute_metrics_parallel(embeddings_list, X, n_jobs=4):
    """Compute metrics for multiple embeddings in parallel"""

    def compute_single_embedding_metrics(embedding):
        """Compute metrics for a single embedding"""
        try:
            return {
                'embedding': type(embedding).__name__,
                'expressibility': expressibility(embedding, n_samples=500),
                'trainability': trainability(embedding, X[:20]),
                'effective_dimension': effective_dimension(embedding, X[:20])
            }
        except Exception as e:
            return {
                'embedding': type(embedding).__name__,
                'error': str(e)
            }

    # Compute in parallel
    with Pool(n_jobs) as pool:
        results = pool.map(compute_single_embedding_metrics, embeddings_list)

    return results

# Use parallel computation
embeddings_list = [
    AngleEmbedding(n_qubits=4),
    IQPEmbedding(n_qubits=4, depth=2),
    AmplitudeEmbedding(n_qubits=4)
]

parallel_results = compute_metrics_parallel(embeddings_list, X, n_jobs=2)

print("Parallel Metrics Results:")
for result in parallel_results:
    print(f"  {result}")

Best Practices

Metrics Interpretation Guidelines

def interpret_metrics(metrics, context=None):
    """Provide interpretation guidelines for metrics"""

    interpretations = {}

    # Expressibility interpretation
    expr = metrics.get('expressibility', 0)
    if expr > 0.8:
        interpretations['expressibility'] = "Excellent - covers state space uniformly"
    elif expr > 0.6:
        interpretations['expressibility'] = "Good - adequate state space coverage"
    elif expr > 0.4:
        interpretations['expressibility'] = "Fair - limited state space coverage"
    else:
        interpretations['expressibility'] = "Poor - very limited state space coverage"

    # Trainability interpretation
    train = metrics.get('trainability', 0)
    if train > 0.01:
        interpretations['trainability'] = "Good - strong gradient signals"
    elif train > 0.001:
        interpretations['trainability'] = "Moderate - weak but usable gradients"
    else:
        interpretations['trainability'] = "Poor - potential barren plateau"

    # Effective dimension interpretation
    eff_dim = metrics.get('effective_dimension', 0)
    data_dim = context.get('data_dimension', 0) if context else 0

    if data_dim > 0:
        if eff_dim > data_dim * 0.8:
            interpretations['effective_dimension'] = "High - preserves most data information"
        elif eff_dim > data_dim * 0.5:
            interpretations['effective_dimension'] = "Moderate - reasonable compression"
        else:
            interpretations['effective_dimension'] = "Low - significant information loss"

    return interpretations

# Example usage
sample_metrics = {
    'expressibility': 0.75,
    'trainability': 0.005,
    'effective_dimension': 3.2
}

context_info = {'data_dimension': 4}
interpretations = interpret_metrics(sample_metrics, context_info)

print("Metrics Interpretations:")
for metric, interpretation in interpretations.items():
    print(f"  {metric}: {interpretation}")

Further Reading