Generating Data: Synthesis and Sampling
Master the art of generating high-quality synthetic data using trained quantum GANs with advanced sampling techniques, post-processing, and quality control.
🎨 Basic Data Generation
Simple Generation
from qgans_pro import QuantumGAN
# Load pre-trained model
qgan = QuantumGAN.load_from_checkpoint("path/to/checkpoint.pth")
# Generate samples
n_samples = 100
synthetic_data = qgan.generate_samples(n_samples)
print(f"Generated {len(synthetic_data)} samples")
print(f"Sample shape: {synthetic_data.shape}")
Batch Generation
def generate_large_dataset(qgan, total_samples=10000, batch_size=100):
"""Generate large datasets efficiently."""
all_samples = []
n_batches = total_samples // batch_size
for batch in tqdm(range(n_batches), desc="Generating samples"):
batch_samples = qgan.generate_samples(batch_size)
all_samples.append(batch_samples)
# Memory management
if batch % 10 == 0:
torch.cuda.empty_cache() # Clear GPU memory
# Handle remaining samples
remaining = total_samples % batch_size
if remaining > 0:
final_samples = qgan.generate_samples(remaining)
all_samples.append(final_samples)
return torch.cat(all_samples, dim=0)
🎛️ Advanced Sampling Techniques
Conditional Generation
Generate samples conditioned on specific attributes:
class ConditionalQuantumGAN(QuantumGAN):
def __init__(self, *args, n_classes=10, **kwargs):
super().__init__(*args, **kwargs)
self.n_classes = n_classes
self.setup_conditional_layers()
def generate_conditional_samples(self, n_samples, condition):
"""Generate samples with specific conditions."""
# Create conditional noise
noise = torch.randn(n_samples, self.noise_dim)
# One-hot encode condition
condition_onehot = torch.zeros(n_samples, self.n_classes)
condition_onehot[range(n_samples), condition] = 1
# Concatenate noise and condition
conditional_input = torch.cat([noise, condition_onehot], dim=1)
return self.generator(conditional_input)
# Usage
conditional_qgan = ConditionalQuantumGAN(n_classes=10)
# Generate 50 samples of class 3
class_3_samples = conditional_qgan.generate_conditional_samples(50, 3)
Latent Space Interpolation
Explore the quantum latent space:
def interpolate_in_latent_space(qgan, start_noise, end_noise, steps=10):
"""Generate interpolated samples between two points."""
interpolated_samples = []
for i in range(steps):
alpha = i / (steps - 1)
interpolated_noise = (1 - alpha) * start_noise + alpha * end_noise
sample = qgan.generate_samples_from_noise(interpolated_noise.unsqueeze(0))
interpolated_samples.append(sample)
return torch.cat(interpolated_samples, dim=0)
# Create smooth transitions
start_point = torch.randn(1, qgan.noise_dim)
end_point = torch.randn(1, qgan.noise_dim)
transition = interpolate_in_latent_space(qgan, start_point, end_point, steps=20)
Quantum-Enhanced Sampling
Leverage quantum properties for better sampling:
class QuantumEnhancedSampler:
def __init__(self, qgan, enhancement_type='superposition'):
self.qgan = qgan
self.enhancement_type = enhancement_type
def superposition_sampling(self, n_samples):
"""Use quantum superposition for diverse sampling."""
# Create superposition states as input
superposition_noise = self.create_superposition_states(n_samples)
# Generate with quantum interference
samples = self.qgan.generator.generate_with_superposition(superposition_noise)
return samples
def entanglement_sampling(self, n_samples, correlation_strength=0.5):
"""Use quantum entanglement for correlated sampling."""
# Create entangled noise pairs
entangled_noise = self.create_entangled_states(n_samples, correlation_strength)
# Generate correlated samples
samples = self.qgan.generator(entangled_noise)
return samples
def create_superposition_states(self, n_samples):
"""Create quantum superposition states as input."""
# Hadamard-like transformation for creating superposition
base_noise = torch.randn(n_samples, self.qgan.noise_dim)
# Apply quantum-inspired transformation
superposition_matrix = self.get_hadamard_matrix(self.qgan.noise_dim)
superposition_noise = torch.matmul(base_noise, superposition_matrix)
return superposition_noise
🎯 Quality-Controlled Generation
Rejection Sampling
Generate high-quality samples by rejecting low-quality ones:
def quality_controlled_generation(qgan, n_samples, quality_threshold=0.8):
"""Generate samples with quality control via rejection sampling."""
accepted_samples = []
attempts = 0
max_attempts = n_samples * 5 # Prevent infinite loops
while len(accepted_samples) < n_samples and attempts < max_attempts:
# Generate candidate samples
candidates = qgan.generate_samples(min(100, n_samples * 2))
# Evaluate quality
quality_scores = evaluate_sample_quality(qgan, candidates)
# Accept samples above threshold
accepted_indices = quality_scores > quality_threshold
accepted = candidates[accepted_indices]
accepted_samples.append(accepted)
attempts += len(candidates)
print(f"Accepted {len(accepted)}/{len(candidates)} samples")
# Combine and truncate to desired number
all_accepted = torch.cat(accepted_samples, dim=0)
return all_accepted[:n_samples]
def evaluate_sample_quality(qgan, samples):
"""Evaluate the quality of generated samples."""
# Use discriminator as quality metric
with torch.no_grad():
quality_scores = torch.sigmoid(qgan.discriminator(samples))
return quality_scores.squeeze()
Progressive Quality Enhancement
Iteratively improve sample quality:
class ProgressiveQualityEnhancer:
def __init__(self, qgan, enhancement_steps=5):
self.qgan = qgan
self.enhancement_steps = enhancement_steps
def enhance_samples(self, initial_samples):
"""Progressively enhance sample quality."""
enhanced_samples = initial_samples.clone()
for step in range(self.enhancement_steps):
print(f"Enhancement step {step + 1}/{self.enhancement_steps}")
# Evaluate current quality
quality_scores = evaluate_sample_quality(self.qgan, enhanced_samples)
# Identify low-quality samples
low_quality_mask = quality_scores < 0.7
n_low_quality = low_quality_mask.sum().item()
if n_low_quality == 0:
break
# Regenerate low-quality samples
new_samples = self.qgan.generate_samples(n_low_quality)
enhanced_samples[low_quality_mask] = new_samples
print(f"Regenerated {n_low_quality} low-quality samples")
return enhanced_samples
📊 Post-Processing and Refinement
Data Normalization and Scaling
def post_process_generated_data(samples, processing_type='normalize'):
"""Apply post-processing to generated samples."""
if processing_type == 'normalize':
# Normalize to [0, 1]
samples = (samples - samples.min()) / (samples.max() - samples.min())
elif processing_type == 'standardize':
# Standardize to mean=0, std=1
samples = (samples - samples.mean()) / samples.std()
elif processing_type == 'clip':
# Clip to valid range
samples = torch.clamp(samples, 0, 1)
elif processing_type == 'quantize':
# Quantize to discrete values (e.g., for images)
samples = torch.round(samples * 255) / 255
return samples
Noise Reduction
Remove quantum noise artifacts:
class QuantumNoiseReducer:
def __init__(self, method='gaussian_filter'):
self.method = method
def reduce_noise(self, samples):
"""Reduce quantum noise in generated samples."""
if self.method == 'gaussian_filter':
return self.gaussian_filter_denoising(samples)
elif self.method == 'median_filter':
return self.median_filter_denoising(samples)
elif self.method == 'wavelet':
return self.wavelet_denoising(samples)
else:
return samples
def gaussian_filter_denoising(self, samples):
"""Apply Gaussian filtering for noise reduction."""
from scipy.ndimage import gaussian_filter
denoised = []
for sample in samples:
if sample.dim() == 3: # Image data (C, H, W)
denoised_sample = torch.zeros_like(sample)
for c in range(sample.shape[0]):
denoised_sample[c] = torch.from_numpy(
gaussian_filter(sample[c].cpu().numpy(), sigma=0.5)
)
denoised.append(denoised_sample)
else:
# For other data types, apply simpler smoothing
smoothed = torch.from_numpy(
gaussian_filter(sample.cpu().numpy(), sigma=0.3)
)
denoised.append(smoothed)
return torch.stack(denoised)
Artifact Removal
Remove quantum circuit artifacts:
def remove_quantum_artifacts(samples, artifact_types=['boundary_effects', 'discretization']):
"""Remove common quantum circuit artifacts."""
cleaned_samples = samples.clone()
if 'boundary_effects' in artifact_types:
# Remove boundary artifacts from periodic quantum gates
cleaned_samples = remove_boundary_effects(cleaned_samples)
if 'discretization' in artifact_types:
# Smooth discretization artifacts
cleaned_samples = smooth_discretization(cleaned_samples)
if 'phase_errors' in artifact_types:
# Correct phase-related artifacts
cleaned_samples = correct_phase_errors(cleaned_samples)
return cleaned_samples
def remove_boundary_effects(samples):
"""Remove boundary effects from quantum measurements."""
# Apply padding and cropping to remove edge artifacts
if samples.dim() == 4: # Batch of images
padding = 2
padded = F.pad(samples, (padding, padding, padding, padding), mode='reflect')
return padded[:, :, padding:-padding, padding:-padding]
return samples
🔄 Sampling Strategies
Importance Sampling
Focus on important regions of the data distribution:
def importance_sampling(qgan, n_samples, importance_weights=None):
"""Generate samples using importance sampling."""
if importance_weights is None:
# Use discriminator confidence as importance weight
importance_weights = compute_importance_weights(qgan)
# Sample according to importance weights
samples = []
for _ in range(n_samples):
# Sample noise according to importance distribution
importance_noise = sample_from_importance_distribution(importance_weights)
# Generate sample
sample = qgan.generate_samples_from_noise(importance_noise)
samples.append(sample)
return torch.cat(samples, dim=0)
def compute_importance_weights(qgan, n_probe_samples=1000):
"""Compute importance weights based on discriminator confidence."""
# Generate probe samples
probe_samples = qgan.generate_samples(n_probe_samples)
# Get discriminator confidence
with torch.no_grad():
confidence = torch.sigmoid(qgan.discriminator(probe_samples))
# Convert to importance weights (inverse of confidence)
weights = 1.0 / (confidence + 1e-8)
return weights / weights.sum() # Normalize
Metropolis-Hastings Sampling
Use MCMC for better sampling:
class QuantumMCMCSampler:
def __init__(self, qgan, step_size=0.1, n_chains=4):
self.qgan = qgan
self.step_size = step_size
self.n_chains = n_chains
def sample(self, n_samples, burn_in=100):
"""Generate samples using Metropolis-Hastings."""
# Initialize chains
current_states = [torch.randn(1, self.qgan.noise_dim) for _ in range(self.n_chains)]
samples = []
for iteration in range(burn_in + n_samples):
for chain_idx in range(self.n_chains):
# Propose new state
proposal = current_states[chain_idx] + self.step_size * torch.randn_like(current_states[chain_idx])
# Compute acceptance probability
current_logp = self.log_probability(current_states[chain_idx])
proposal_logp = self.log_probability(proposal)
alpha = torch.exp(proposal_logp - current_logp)
# Accept or reject
if torch.rand(1) < alpha:
current_states[chain_idx] = proposal
# Collect samples after burn-in
if iteration >= burn_in:
sample = self.qgan.generate_samples_from_noise(current_states[chain_idx])
samples.append(sample)
return torch.cat(samples, dim=0)
def log_probability(self, noise):
"""Compute log probability of noise vector."""
# Use discriminator output as proxy for log probability
sample = self.qgan.generate_samples_from_noise(noise)
with torch.no_grad():
logp = torch.log(torch.sigmoid(self.qgan.discriminator(sample)) + 1e-8)
return logp
📈 Generation Monitoring and Validation
Real-time Quality Monitoring
class GenerationMonitor:
def __init__(self, qgan, reference_data=None):
self.qgan = qgan
self.reference_data = reference_data
self.metrics_history = []
def monitor_generation_batch(self, generated_samples):
"""Monitor quality of a generation batch."""
metrics = {}
# Basic statistics
metrics['mean'] = generated_samples.mean().item()
metrics['std'] = generated_samples.std().item()
metrics['min'] = generated_samples.min().item()
metrics['max'] = generated_samples.max().item()
# Discriminator confidence
with torch.no_grad():
confidence = torch.sigmoid(self.qgan.discriminator(generated_samples))
metrics['avg_confidence'] = confidence.mean().item()
metrics['min_confidence'] = confidence.min().item()
# Diversity metrics
metrics['diversity'] = self.compute_diversity(generated_samples)
# Quality metrics (if reference data available)
if self.reference_data is not None:
metrics['fid'] = self.compute_fid(generated_samples)
metrics['mmd'] = self.compute_mmd(generated_samples)
self.metrics_history.append(metrics)
return metrics
def compute_diversity(self, samples):
"""Compute sample diversity."""
# Pairwise distances
n_samples = min(100, len(samples)) # Limit for efficiency
subset = samples[:n_samples]
distances = torch.cdist(subset.view(n_samples, -1), subset.view(n_samples, -1))
# Average pairwise distance as diversity measure
return distances.mean().item()
def detect_generation_issues(self):
"""Detect issues in generated samples."""
if len(self.metrics_history) < 10:
return []
issues = []
recent_metrics = self.metrics_history[-10:]
# Check for decreasing quality
confidences = [m['avg_confidence'] for m in recent_metrics]
if np.polyfit(range(10), confidences, 1)[0] < -0.01:
issues.append("Decreasing generation quality detected")
# Check for mode collapse
diversities = [m['diversity'] for m in recent_metrics]
if np.mean(diversities) < 0.1:
issues.append("Potential mode collapse in generation")
# Check for saturation
stds = [m['std'] for m in recent_metrics]
if np.std(stds) < 0.01:
issues.append("Output saturation detected")
return issues
A/B Testing for Generation Quality
def ab_test_generation_methods(qgan, method_a, method_b, n_samples=1000, n_tests=10):
"""Compare two generation methods using A/B testing."""
results_a = []
results_b = []
for test in range(n_tests):
print(f"A/B Test {test + 1}/{n_tests}")
# Generate samples with method A
samples_a = method_a(qgan, n_samples)
score_a = evaluate_generation_quality(samples_a)
results_a.append(score_a)
# Generate samples with method B
samples_b = method_b(qgan, n_samples)
score_b = evaluate_generation_quality(samples_b)
results_b.append(score_b)
# Statistical comparison
from scipy.stats import ttest_ind
stat, p_value = ttest_ind(results_a, results_b)
print(f"Method A mean score: {np.mean(results_a):.4f} ± {np.std(results_a):.4f}")
print(f"Method B mean score: {np.mean(results_b):.4f} ± {np.std(results_b):.4f}")
print(f"Statistical significance (p-value): {p_value:.4f}")
if p_value < 0.05:
winner = "A" if np.mean(results_a) > np.mean(results_b) else "B"
print(f"Method {winner} is significantly better")
else:
print("No significant difference between methods")
return results_a, results_b, p_value
🎨 Specialized Generation Techniques
Style Transfer Generation
Apply specific styles to generated content:
class StyleTransferGenerator:
def __init__(self, qgan, style_encoder):
self.qgan = qgan
self.style_encoder = style_encoder
def generate_with_style(self, n_samples, style_image):
"""Generate samples with specific style."""
# Extract style features
style_features = self.style_encoder(style_image)
# Modify generator to incorporate style
styled_samples = []
for _ in range(n_samples):
# Generate base sample
base_sample = self.qgan.generate_samples(1)
# Apply style transfer
styled_sample = self.apply_style_transfer(base_sample, style_features)
styled_samples.append(styled_sample)
return torch.cat(styled_samples, dim=0)
def apply_style_transfer(self, content, style_features):
"""Apply neural style transfer."""
# Implementation depends on specific style transfer method
pass
Multi-Scale Generation
Generate at multiple resolutions:
class MultiScaleGenerator:
def __init__(self, qgan, scales=[64, 128, 256]):
self.qgan = qgan
self.scales = scales
def generate_multiscale(self, n_samples):
"""Generate samples at multiple scales."""
multiscale_samples = {}
for scale in self.scales:
# Adjust generator for this scale
scaled_generator = self.adapt_generator_for_scale(scale)
# Generate samples
samples = scaled_generator.generate_samples(n_samples)
multiscale_samples[scale] = samples
return multiscale_samples
def progressive_upscaling(self, base_samples):
"""Progressively upscale base samples."""
current_samples = base_samples
for target_scale in self.scales[1:]: # Skip the first scale
current_samples = self.upscale_samples(current_samples, target_scale)
return current_samples
🎯 Generation Best Practices
Performance Optimization
# Efficient batch generation
def efficient_batch_generation(qgan, total_samples, batch_size=100):
"""Generate large numbers of samples efficiently."""
# Pre-allocate memory
all_samples = torch.empty((total_samples, *qgan.sample_shape),
dtype=torch.float32, device=qgan.device)
# Generate in batches
for start_idx in range(0, total_samples, batch_size):
end_idx = min(start_idx + batch_size, total_samples)
batch_samples = qgan.generate_samples(end_idx - start_idx)
all_samples[start_idx:end_idx] = batch_samples
# Clear cache periodically
if start_idx % (batch_size * 10) == 0:
torch.cuda.empty_cache()
return all_samples
Quality Assurance
def quality_assurance_pipeline(samples):
"""Apply quality assurance to generated samples."""
# 1. Remove obviously corrupted samples
valid_samples = remove_corrupted_samples(samples)
# 2. Apply noise reduction
denoised_samples = apply_noise_reduction(valid_samples)
# 3. Normalize and clip values
normalized_samples = normalize_samples(denoised_samples)
# 4. Final quality check
quality_scores = evaluate_sample_quality_scores(normalized_samples)
high_quality_mask = quality_scores > 0.7
return normalized_samples[high_quality_mask]
Generation Guidelines
Do's: ✅ Use batch generation for efficiency ✅ Apply post-processing for quality improvement ✅ Monitor generation quality in real-time ✅ Use appropriate sampling techniques for your use case ✅ Validate generated samples against real data ✅ Save generation parameters for reproducibility
Don'ts: ❌ Generate too many samples at once (memory issues) ❌ Ignore quantum noise artifacts ❌ Skip quality validation ❌ Use the same noise patterns repeatedly ❌ Forget to set random seeds for reproducibility
Memory Management
For large-scale generation, use generators and process samples in batches to avoid memory overflow.
Quantum Noise
Generated samples may contain quantum noise artifacts. Apply appropriate post-processing techniques to improve quality.
Computational Cost
Quantum circuit simulation for generation can be computationally expensive. Plan your resources accordingly.