340 lines
10 KiB
Plaintext
Raw Normal View History

2025-02-05 15:54:00 -05:00
// To use this enhanced version, you'll need to:
// Install additional dependencies:
// npm install winston nodemailer node-fetch
// Set up environment variables:
// export NOTIFICATION_EMAIL=alerts@yourdomain.com
// export ADMIN_EMAIL=admin@yourdomain.com
// export SMTP_HOST=smtp.yourdomain.com
// export SMTP_PORT=587
// export SMTP_USER=your-user
// export SMTP_PASS=your-password
const { spawn, exec } = require('child_process');
const pm2 = require('pm2');
const fs = require('fs');
const path = require('path');
const winston = require('winston');
const nodemailer = require('nodemailer');
// Configuration file
const config = {
app: {
name: 'server',
script: 'server.js',
instances: 4,
max_memory_restart: '300M',
max_restarts: 10,
exp_backoff_restart_delay: 100
},
monitoring: {
healthCheckInterval: 30000,
healthCheckEndpoint: 'http://localhost:3000/health',
metricsInterval: 60000,
maxRestartAttempts: 5,
criticalErrors: ['page timed out', 'parse html timed out', 'Timed out waiting for']
},
notifications: {
email: {
enabled: true,
from: process.env.NOTIFICATION_EMAIL,
to: process.env.ADMIN_EMAIL,
smtp: {
host: process.env.SMTP_HOST,
port: process.env.SMTP_PORT,
auth: {
user: process.env.SMTP_USER,
pass: process.env.SMTP_PASS
}
}
}
},
logging: {
dir: 'logs',
maxSize: '10m',
maxFiles: '7d'
}
};
// Save config to file
fs.writeFileSync('pm2-config.json', JSON.stringify(config, null, 2));
// Initialize logger
const logger = winston.createLogger({
level: 'info',
format: winston.format.combine(
winston.format.timestamp(),
winston.format.json()
),
transports: [
new winston.transports.Console(),
new winston.transports.File({
filename: path.join(config.logging.dir, 'error.log'),
level: 'error',
maxsize: 5242880, // 5MB
maxFiles: 5
}),
new winston.transports.File({
filename: path.join(config.logging.dir, 'combined.log'),
maxsize: 5242880,
maxFiles: 5
})
]
});
// Metrics tracking
class MetricsTracker {
constructor() {
this.metrics = {
startTime: Date.now(),
restarts: 0,
lastRestart: null,
uptimePercentage: 100,
memoryUsage: [],
healthCheckFailures: 0,
criticalErrors: 0
};
// Create metrics directory if it doesn't exist
if (!fs.existsSync('metrics')) {
fs.mkdirSync('metrics');
}
}
updateMetrics(type, value) {
switch(type) {
case 'restart':
this.metrics.restarts++;
this.metrics.lastRestart = new Date().toISOString();
break;
case 'memory':
this.metrics.memoryUsage.push({
timestamp: new Date().toISOString(),
value: value
});
// Keep only last 100 measurements
if (this.metrics.memoryUsage.length > 100) {
this.metrics.memoryUsage.shift();
}
break;
case 'healthCheck':
if (!value) this.metrics.healthCheckFailures++;
break;
case 'criticalError':
this.metrics.criticalErrors++;
break;
}
// Calculate uptime percentage
const totalTime = Date.now() - this.metrics.startTime;
const downtime = this.metrics.restarts * 10000; // Assuming 10s downtime per restart
this.metrics.uptimePercentage = ((totalTime - downtime) / totalTime * 100).toFixed(2);
// Save metrics to file
fs.writeFileSync(
path.join('metrics', 'metrics.json'),
JSON.stringify(this.metrics, null, 2)
);
}
}
const metrics = new MetricsTracker();
// Notification system
const notifier = nodemailer.createTransport(config.notifications.email.smtp);
async function sendNotification(subject, message, critical = false) {
if (!config.notifications.email.enabled) return;
try {
await notifier.sendMail({
from: config.notifications.email.from,
to: config.notifications.email.to,
subject: `[${critical ? 'CRITICAL' : 'INFO'}] ${subject}`,
text: message
});
logger.info(`Notification sent: ${subject}`);
} catch (error) {
logger.error('Failed to send notification:', error);
}
}
// Health check function
async function performHealthCheck() {
try {
const response = await fetch(config.monitoring.healthCheckEndpoint);
const healthy = response.status === 200;
metrics.updateMetrics('healthCheck', healthy);
if (!healthy) {
logger.warn('Health check failed');
sendNotification('Health Check Failed', 'Application health check failed. Investigating...');
return false;
}
return true;
} catch (error) {
logger.error('Health check error:', error);
metrics.updateMetrics('healthCheck', false);
return false;
}
}
let yourCommand;
let isRestarting = false;
async function cleanup() {
return new Promise(async (resolve) => {
try {
if (process.env.CHROME_PID) {
exec(`kill ${process.env.CHROME_PID}`);
}
if (yourCommand) {
yourCommand.stdout.removeAllListeners('data');
yourCommand.kill();
}
await new Promise(resolve => pm2.disconnect(resolve));
logger.info('Cleanup completed successfully');
} catch (error) {
logger.error('Cleanup error:', error);
}
resolve();
});
}
async function pm2Start() {
if (isRestarting) {
logger.info('Restart already in progress, skipping...');
return;
}
try {
isRestarting = true;
await cleanup();
await new Promise((resolve, reject) => {
pm2.connect((err) => {
if (err) reject(err);
else resolve();
});
});
// Start the application with config
await new Promise((resolve, reject) => {
pm2.start(config.app, (err, apps) => {
if (err) reject(err);
else resolve(apps);
});
});
// Start monitoring processes
yourCommand = spawn('pm2', ['log']);
yourCommand.on('error', (error) => {
logger.error('PM2 log process error:', error);
metrics.updateMetrics('criticalError');
restartWithDelay();
});
yourCommand.on('exit', (code, signal) => {
if (code !== 0) {
logger.error(`PM2 log process exited with code ${code}, signal: ${signal}`);
restartWithDelay();
}
});
yourCommand.stdout.on('data', (data) => {
const dataStr = data.toString();
if (config.monitoring.criticalErrors.some(err => dataStr.includes(err))) {
logger.error("Critical error detected in logs:", dataStr);
metrics.updateMetrics('criticalError');
sendNotification(
'Critical Error Detected',
`Error in application logs: ${dataStr}`,
true
);
restartWithDelay();
}
});
// Start health check interval
setInterval(async () => {
const isHealthy = await performHealthCheck();
if (!isHealthy) restartWithDelay();
}, config.monitoring.healthCheckInterval);
// Start metrics collection interval
setInterval(() => {
pm2.describe(config.app.name, (err, processDescription) => {
if (!err && processDescription[0]) {
metrics.updateMetrics('memory', processDescription[0].monit.memory);
}
});
}, config.monitoring.metricsInterval);
logger.info('Application started successfully');
sendNotification('Application Started', 'The application has been started successfully');
} catch (error) {
logger.error('PM2 start error:', error);
metrics.updateMetrics('criticalError');
sendNotification(
'Application Start Failed',
`Failed to start application: ${error.message}`,
true
);
await cleanup();
restartWithDelay();
} finally {
isRestarting = false;
}
}
let restartAttempts = 0;
function restartWithDelay() {
if (restartAttempts >= config.monitoring.maxRestartAttempts) {
logger.error('Maximum restart attempts reached. Exiting...');
sendNotification(
'Maximum Restart Attempts Reached',
'Application has reached maximum restart attempts and will now exit.',
true
);
process.exit(1);
}
const delay = Math.min(1000 * Math.pow(2, restartAttempts), 30000);
restartAttempts++;
metrics.updateMetrics('restart');
logger.info(`Scheduling restart in ${delay}ms. Attempt ${restartAttempts}/${config.monitoring.maxRestartAttempts}`);
setTimeout(pm2Start, delay);
}
// Process termination handlers
process.on('SIGTERM', async () => {
logger.info('Received SIGTERM. Cleaning up...');
await cleanup();
process.exit(0);
});
process.on('SIGINT', async () => {
logger.info('Received SIGINT. Cleaning up...');
await cleanup();
process.exit(0);
});
// Create necessary directories
if (!fs.existsSync(config.logging.dir)) {
fs.mkdirSync(config.logging.dir);
}
// Start the application
pm2Start().catch(error => {
logger.error('Fatal error:', error);
sendNotification('Fatal Error', `Fatal error occurred: ${error.message}`, true);
process.exit(1);
});