Stability-First Neural Agent: Robust Training, Adaptive Learning and Intelligent Decisions
Overview
This tutorial walks through building an Advanced Neural Agent that prioritizes stability at every stage: initialization, activations, backward passes, and training loop controls. The design combines classic neural techniques (Xavier init, momentum, L2 weight decay) with modern stability measures (clipping, normalization, adaptive LR, resets) and agent-like features (experience replay, exploratory decisions). The full Python implementation is included below so you can run demos and adapt configurations.
Key imports and setup
We rely on NumPy, Matplotlib and scikit-learn for data generation, preprocessing and splitting. Warnings are suppressed to keep outputs focused.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification, make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
Core architecture and stable building blocks
The agent is implemented as a lightweight feedforward network built with Xavier-style limits for balanced gradients. Activations include leaky ReLU, sigmoid and tanh, and all pre-activation values are clipped to avoid overflow. Momentum buffers and simple weight decay stabilize updates.
class AdvancedNeuralAgent:
def __init__(self, input_size, hidden_layers=[64, 32], output_size=1, learning_rate=0.001):
"""Advanced AI Agent with stable training and decision making capabilities"""
self.lr = learning_rate
self.initial_lr = learning_rate
self.layers = []
self.memory = []
self.performance_history = []
self.epsilon = 1e-8
layer_sizes = [input_size] + hidden_layers + [output_size]
for i in range(len(layer_sizes) - 1):
fan_in, fan_out = layer_sizes[i], layer_sizes[i+1]
limit = np.sqrt(6.0 / (fan_in + fan_out))
layer = {
'weights': np.random.uniform(-limit, limit, (layer_sizes[i], layer_sizes[i+1])),
'bias': np.zeros((1, layer_sizes[i+1])),
'momentum_w': np.zeros((layer_sizes[i], layer_sizes[i+1])),
'momentum_b': np.zeros((1, layer_sizes[i+1]))
}
self.layers.append(layer)
def activation(self, x, func='relu'):
"""Stable activation functions with clipping"""
x = np.clip(x, -50, 50)
if func == 'relu':
return np.maximum(0, x)
elif func == 'sigmoid':
return 1 / (1 + np.exp(-x))
elif func == 'tanh':
return np.tanh(x)
elif func == 'leaky_relu':
return np.where(x > 0, x, x * 0.01)
elif func == 'linear':
return x
def activation_derivative(self, x, func='relu'):
"""Stable derivatives"""
x = np.clip(x, -50, 50)
if func == 'relu':
return (x > 0).astype(float)
elif func == 'sigmoid':
s = self.activation(x, 'sigmoid')
return s * (1 - s)
elif func == 'tanh':
return 1 - np.tanh(x)**2
elif func == 'leaky_relu':
return np.where(x > 0, 1, 0.01)
elif func == 'linear':
return np.ones_like(x)
def forward(self, X):
"""Forward pass with gradient clipping"""
self.activations = [X]
self.z_values = []
current_input = X
for i, layer in enumerate(self.layers):
z = np.dot(current_input, layer['weights']) + layer['bias']
z = np.clip(z, -50, 50)
self.z_values.append(z)
if i < len(self.layers) - 1:
a = self.activation(z, 'leaky_relu')
else:
a = self.activation(z, 'linear')
self.activations.append(a)
current_input = a
return current_input
def clip_gradients(self, gradients, max_norm=1.0):
"""Gradient clipping to prevent explosion"""
grad_norm = np.linalg.norm(gradients)
if grad_norm > max_norm:
gradients = gradients * (max_norm / (grad_norm + self.epsilon))
return gradients
def backward(self, X, y, output):
"""Stable backpropagation with gradient clipping"""
m = X.shape[0]
dz = (output - y.reshape(-1, 1)) / m
dz = np.clip(dz, -10, 10)
for i in reversed(range(len(self.layers))):
layer = self.layers[i]
dw = np.dot(self.activations[i].T, dz)
db = np.sum(dz, axis=0, keepdims=True)
dw = self.clip_gradients(dw, max_norm=1.0)
db = self.clip_gradients(db, max_norm=1.0)
momentum = 0.9
layer['momentum_w'] = momentum * layer['momentum_w'] + (1 - momentum) * dw
layer['momentum_b'] = momentum * layer['momentum_b'] + (1 - momentum) * db
weight_decay = 0.0001
layer['weights'] -= self.lr * (layer['momentum_w'] + weight_decay * layer['weights'])
layer['bias'] -= self.lr * layer['momentum_b']
if i > 0:
activation_func = 'leaky_relu' if i > 1 else 'leaky_relu'
dz = np.dot(dz, layer['weights'].T) * self.activation_derivative(
self.z_values[i-1], activation_func)
dz = np.clip(dz, -10, 10)
def adapt_learning_rate(self, epoch, performance_history):
"""Adaptive learning rate with performance-based adjustment"""
if epoch > 10:
recent_performance = performance_history[-10:]
if len(recent_performance) >= 5:
if recent_performance[-1] >= recent_performance[-5]:
self.lr = max(self.lr * 0.95, self.initial_lr * 0.01)
elif recent_performance[-1] < recent_performance[-5] * 0.98:
self.lr = min(self.lr * 1.02, self.initial_lr * 2)
def calculate_loss(self, y_true, y_pred):
"""Stable loss calculation"""
y_true = y_true.reshape(-1, 1)
y_pred = np.clip(y_pred, -1e6, 1e6)
mse = np.mean((y_true - y_pred) ** 2)
mae = np.mean(np.abs(y_true - y_pred))
if not np.isfinite(mse):
mse = 1e6
if not np.isfinite(mae):
mae = 1e6
return mse, mae
def store_experience(self, state, action, reward, next_state):
"""Experience replay for RL aspects"""
experience = {
'state': state,
'action': action,
'reward': reward,
'next_state': next_state,
'timestamp': len(self.memory)
}
self.memory.append(experience)
if len(self.memory) > 1000:
self.memory.pop(0)
def make_decision(self, X, exploration_rate=0.1):
"""Stable decision making"""
prediction = self.forward(X)
if np.random.random() < exploration_rate:
noise_scale = np.std(prediction) * 0.1 if np.std(prediction) > 0 else 0.1
noise = np.random.normal(0, noise_scale, prediction.shape)
prediction += noise
return np.clip(prediction, -1e6, 1e6)
def reset_if_unstable(self):
"""Reset network if training becomes unstable"""
print(" Resetting network due to instability...")
for i, layer in enumerate(self.layers):
fan_in, fan_out = layer['weights'].shape
limit = np.sqrt(6.0 / (fan_in + fan_out))
layer['weights'] = np.random.uniform(-limit, limit, (fan_in, fan_out))
layer['bias'] = np.zeros((1, fan_out))
layer['momentum_w'] = np.zeros((fan_in, fan_out))
layer['momentum_b'] = np.zeros((1, fan_out))
self.lr = self.initial_lr
def train(self, X, y, epochs=500, batch_size=32, validation_split=0.2, verbose=True):
"""Robust training with stability checks"""
y_mean, y_std = np.mean(y), np.std(y)
y_normalized = (y - y_mean) / (y_std + self.epsilon)
X_trn, X_val, y_trn, y_val = train_test_split(
X, y_normalized, test_size=validation_split, random_state=42)
best_val_loss = float('inf')
patience = 30
patience_counter = 0
train_losses, val_losses = [], []
reset_count = 0
for epoch in range(epochs):
if epoch > 0 and (not np.isfinite(train_losses[-1]) or train_losses[-1] > 1e6):
if reset_count < 2:
self.reset_if_unstable()
reset_count += 1
continue
else:
print(" Training unstable, stopping...")
break
indices = np.random.permutation(len(X_train))
X_train_shuffled = X_train[indices]
y_train_shuffled = y_train[indices]
epoch_loss = 0
batches = 0
for i in range(0, len(X_trn), batch_size):
batch_X = X_train_shuffled[i:i+batch_size]
batch_y = y_train_shuffled[i:i+batch_size]
if len(batch_X) == 0:
continue
output = self.forward(batch_X)
self.backward(batch_X, batch_y, output)
loss, _ = self.calculate_loss(batch_y, output)
epoch_loss += loss
batches += 1
avg_train_loss = epoch_loss / max(batches, 1)
val_output = self.forward(X_val)
val_loss, val_mae = self.calculate_loss(y_val, val_output)
train_losses.append(avg_train_loss)
val_losses.append(val_loss)
self.performance_history.append(val_loss)
if val_loss < best_val_loss:
best_val_loss = val_loss
patience_counter = 0
else:
patience_counter += 1
if patience_counter >= patience:
if verbose:
print(f" Early stopping at epoch {epoch}")
break
if epoch > 0:
self.adapt_learning_rate(epoch, self.performance_history)
if verbose and (epoch % 50 == 0 or epoch < 10):
print(f"Epoch {epoch:3d}: Train Loss = {avg_train_loss:.4f}, "
f"Val Loss = {val_loss:.4f}, LR = {self.lr:.6f}")
self.y_mean, self.y_std = y_mean, y_std
return train_losses, val_losses
def predict(self, X):
"""Make predictions with denormalization"""
normalized_pred = self.forward(X)
if hasattr(self, 'y_mean') and hasattr(self, 'y_std'):
return normalized_pred * self.y_std + self.y_mean
return normalized_pred
def evaluate_performance(self, X, y):
"""Comprehensive performance evaluation"""
predictions = self.predict(X)
mse, mae = self.calculate_loss(y, predictions)
y_mean = np.mean(y)
ss_tot = np.sum((y - y_mean) ** 2)
ss_res = np.sum((y.reshape(-1, 1) - predictions) ** 2)
r2 = 1 - (ss_res / (ss_tot + self.epsilon))
return {
'mse': float(mse) if np.isfinite(mse) else float('inf'),
'mae': float(mae) if np.isfinite(mae) else float('inf'),
'r2': float(r2) if np.isfinite(r2) else -float('inf'),
'predictions': predictions.flatten()
}
def visualize_training(self, train_losses, val_losses):
"""Visualize training progress"""
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.plot(train_losses, label='Training Loss', alpha=0.8)
plt.plot(val_losses, label='Validation Loss', alpha=0.8)
plt.title('Training Progress')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.yscale('log')
plt.subplot(1, 3, 2)
if len(self.performance_history) > 0:
plt.plot(self.performance_history)
plt.title('Performance History')
plt.xlabel('Epoch')
plt.ylabel('Validation Loss')
plt.grid(True, alpha=0.3)
plt.yscale('log')
plt.subplot(1, 3, 3)
if hasattr(self, 'lr_history'):
plt.plot(self.lr_history)
plt.title('Learning Rate Schedule')
plt.xlabel('Epoch')
plt.ylabel('Learning Rate')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
Training practices and robustness features
The training loop normalizes targets, uses mini-batches, applies gradient clipping and momentum-based updates, includes L2 regularization, and features adaptive LR and early stopping. If the training signals blow up, the agent can reset its weights and retry, limiting the number of resets to avoid endless loops.
Losses and metrics
The implementation computes MSE and MAE while guarding against numerical issues (clipping predictions and substituting large finite placeholders). R² is computed in evaluation to provide a familiar regression metric. These choices ensure reliable logging and comparisons between different runs.
Decision-making and agent behavior
Experience replay is implemented via a simple memory buffer with a cap. make_decision adds controlled Gaussian noise when exploring. This turns the model from a pure regressor into an RL-style agent capable of exploration and exploitation trade-offs.
Demo utilities and experiment harness
AIAgentDemo orchestrates dataset generation (regression, complex regression, classification-to-regression), parameter sweeps across ’lightweight’, ‘standard’, ‘deep’ and ‘wide’ configurations, and records performances to rank best performers. The demo prints summary metrics and can probe memory, exploration vs exploitation behavior, and network complexity.
class AIAgentDemo:
"""Demo class for testing the AI Agent with various scenarios"""
def __init__(self):
self.agents = {}
self.results = {}
def generate_datasets(self):
"""Generate multiple test datasets"""
datasets = {}
X1, y1 = make_regression(n_samples=600, n_features=5, n_informative=4,
noise=0.1, random_state=42)
datasets['simple'] = (X1, y1, "Simple Regression")
X2, y2 = make_regression(n_samples=800, n_features=10, n_informative=8,
noise=0.2, random_state=123)
datasets['complex'] = (X2, y2, "Complex Regression")
X3, y3 = make_classification(n_samples=700, n_features=8, n_informative=6,
n_classes=2, random_state=456)
y3 = y3.astype(float) + np.random.normal(0, 0.1, len(y3))
datasets['classification'] = (X3, y3, "Classification-to-Regression")
return datasets
def test_agent_configuration(self, config_name, X, y, **agent_params):
"""Test agent with specific configuration"""
print(f"\n Testing {config_name}...")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
default_params = {
'input_size': X_scaled.shape[1],
'hidden_layers': [32, 16],
'output_size': 1,
'learning_rate': 0.005
}
default_params.update(agent_params)
agent = AdvancedNeuralAgent(**default_params)
try:
train_losses, val_losses = agent.train(
X_scaled, y, epochs=150, batch_size=32, verbose=False)
X_trn, X_test, y_trn, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42)
performance = agent.evaluate_performance(X_test, y_test)
self.agents[config_name] = agent
self.results[config_name] = {
'performance': performance,
'train_losses': train_losses,
'val_losses': val_losses,
'data_shape': X_scaled.shape
}
print(f" {config_name}: R²={performance['r2']:.3f}, MSE={performance['mse']:.3f}")
return True
except Exception as e:
print(f" {config_name} failed: {str(e)[:50]}...")
return False
def run_comprehensive_demo(self):
"""Run comprehensive testing of the AI agent"""
print(" COMPREHENSIVE AI AGENT DEMO")
print("=" * 60)
datasets = self.generate_datasets()
configs = {
'lightweight': {'hidden_layers': [16, 8], 'learning_rate': 0.01},
'standard': {'hidden_layers': [32, 16], 'learning_rate': 0.005},
'deep': {'hidden_layers': [64, 32, 16], 'learning_rate': 0.003},
'wide': {'hidden_layers': [128, 64], 'learning_rate': 0.002}
}
success_count = 0
total_tests = len(datasets) * len(configs)
for dataset_name, (X, y, desc) in datasets.items():
print(f"\n Dataset: {desc} - Shape: {X.shape}")
print(f"Target range: [{np.min(y):.2f}, {np.max(y):.2f}]")
for config_name, config_params in configs.items():
test_name = f"{dataset_name}_{config_name}"
if self.test_agent_configuration(test_name, X, y, **config_params):
success_count += 1
print(f"\n OVERALL RESULTS: {success_count}/{total_tests} tests successful")
if self.results:
self.show_best_performers()
self.demonstrate_agent_intelligence()
def show_best_performers(self):
"""Show top performing configurations"""
print(f"\n TOP PERFORMERS:")
sorted_results = sorted(self.results.items(),
key=lambda x: x[1]['performance']['r2'],
reverse=True)
for i, (name, result) in enumerate(sorted_results[:5]):
perf = result['performance']
print(f"{i+1}. {name}: R²={perf['r2']:.3f}, MSE={perf['mse']:.3f}, MAE={perf['mae']:.3f}")
def demonstrate_agent_intelligence(self):
"""Demonstrate advanced AI capabilities"""
if not self.agents:
return
print(f"\n INTELLIGENCE DEMONSTRATION:")
best_name = max(self.results.keys(),
key=lambda x: self.results[x]['performance']['r2'])
best_agent = self.agents[best_name]
print(f"Using best agent: {best_name}")
print(f" Memory capacity: {len(best_agent.memory)} experiences")
dummy_input = np.random.randn(3, best_agent.layers[0]['weights'].shape[0])
conservative_decisions = best_agent.make_decision(dummy_input, exploration_rate=0.0)
exploratory_decisions = best_agent.make_decision(dummy_input, exploration_rate=0.3)
print(f" Decision making:")
print(f" Conservative: {conservative_decisions.flatten()[:3]}")
print(f" Exploratory: {exploratory_decisions.flatten()[:3]}")
if len(best_agent.performance_history) > 10:
initial_perf = np.mean(best_agent.performance_history[:5])
final_perf = np.mean(best_agent.performance_history[-5:])
improvement = ((initial_perf - final_perf) / initial_perf) * 100
print(f" Learning improvement: {improvement:.1f}%")
total_params = sum(layer['weights'].size + layer['bias'].size
for layer in best_agent.layers)
print(f" Network complexity: {total_params} parameters")
return best_agent
Quick demo and main entry
A quick demo utility builds a small regression dataset, trains a compact agent, evaluates R²/MSE/MAE and visualizes training curves for rapid feedback.
def run_quick_demo():
"""Quick demo for immediate testing"""
print(" QUICK AI AGENT DEMO")
print("=" * 40)
X, y = make_regression(n_samples=500, n_features=6, noise=0.15, random_state=42)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print(f"Dataset: {X_scaled.shape[0]} samples, {X_scaled.shape[1]} features")
agent = AdvancedNeuralAgent(
input_size=X_scaled.shape[1],
hidden_layers=[24, 12],
output_size=1,
learning_rate=0.008
)
print("Training agent...")
train_losses, val_losses = agent.train(X_scaled, y, epochs=100, verbose=False)
X_trn, X_test, y_trn, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
performance = agent.evaluate_performance(X_test, y_test)
print(f"\n RESULTS:")
print(f"R² Score: {performance['r2']:.3f}")
print(f"MSE: {performance['mse']:.3f}")
print(f"MAE: {performance['mae']:.3f}")
agent.visualize_training(train_losses, val_losses)
return agent
if __name__ == "__main__":
print("Choose demo type:")
print("1. Quick Demo (fast)")
print("2. Comprehensive Demo (detailed)")
demo = AIAgentDemo()
best_agent = demo.run_comprehensive_demo()
Takeaways
Following simple but consistent stability practices—Xavier-style initialization, clipping of activations and gradients, momentum, weight decay, target normalization, and adaptive LR—makes the training process far more robust across datasets and model sizes. The code demonstrates how to turn a stable regressor into an agent with memory and exploration mechanics, and includes demo harnesses to evaluate different architectures and tasks.