340 lines
10 KiB
Plaintext
340 lines
10 KiB
Plaintext
|
// To use this enhanced version, you'll need to:
|
||
|
|
||
|
// Install additional dependencies:
|
||
|
// npm install winston nodemailer node-fetch
|
||
|
|
||
|
// Set up environment variables:
|
||
|
// export NOTIFICATION_EMAIL=alerts@yourdomain.com
|
||
|
// export ADMIN_EMAIL=admin@yourdomain.com
|
||
|
// export SMTP_HOST=smtp.yourdomain.com
|
||
|
// export SMTP_PORT=587
|
||
|
// export SMTP_USER=your-user
|
||
|
// export SMTP_PASS=your-password
|
||
|
|
||
|
|
||
|
const { spawn, exec } = require('child_process');
|
||
|
const pm2 = require('pm2');
|
||
|
const fs = require('fs');
|
||
|
const path = require('path');
|
||
|
const winston = require('winston');
|
||
|
const nodemailer = require('nodemailer');
|
||
|
|
||
|
// Configuration file
|
||
|
const config = {
|
||
|
app: {
|
||
|
name: 'server',
|
||
|
script: 'server.js',
|
||
|
instances: 4,
|
||
|
max_memory_restart: '300M',
|
||
|
max_restarts: 10,
|
||
|
exp_backoff_restart_delay: 100
|
||
|
},
|
||
|
monitoring: {
|
||
|
healthCheckInterval: 30000,
|
||
|
healthCheckEndpoint: 'http://localhost:3000/health',
|
||
|
metricsInterval: 60000,
|
||
|
maxRestartAttempts: 5,
|
||
|
criticalErrors: ['page timed out', 'parse html timed out', 'Timed out waiting for']
|
||
|
},
|
||
|
notifications: {
|
||
|
email: {
|
||
|
enabled: true,
|
||
|
from: process.env.NOTIFICATION_EMAIL,
|
||
|
to: process.env.ADMIN_EMAIL,
|
||
|
smtp: {
|
||
|
host: process.env.SMTP_HOST,
|
||
|
port: process.env.SMTP_PORT,
|
||
|
auth: {
|
||
|
user: process.env.SMTP_USER,
|
||
|
pass: process.env.SMTP_PASS
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
logging: {
|
||
|
dir: 'logs',
|
||
|
maxSize: '10m',
|
||
|
maxFiles: '7d'
|
||
|
}
|
||
|
};
|
||
|
|
||
|
// Save config to file
|
||
|
fs.writeFileSync('pm2-config.json', JSON.stringify(config, null, 2));
|
||
|
|
||
|
// Initialize logger
|
||
|
const logger = winston.createLogger({
|
||
|
level: 'info',
|
||
|
format: winston.format.combine(
|
||
|
winston.format.timestamp(),
|
||
|
winston.format.json()
|
||
|
),
|
||
|
transports: [
|
||
|
new winston.transports.Console(),
|
||
|
new winston.transports.File({
|
||
|
filename: path.join(config.logging.dir, 'error.log'),
|
||
|
level: 'error',
|
||
|
maxsize: 5242880, // 5MB
|
||
|
maxFiles: 5
|
||
|
}),
|
||
|
new winston.transports.File({
|
||
|
filename: path.join(config.logging.dir, 'combined.log'),
|
||
|
maxsize: 5242880,
|
||
|
maxFiles: 5
|
||
|
})
|
||
|
]
|
||
|
});
|
||
|
|
||
|
// Metrics tracking
|
||
|
class MetricsTracker {
|
||
|
constructor() {
|
||
|
this.metrics = {
|
||
|
startTime: Date.now(),
|
||
|
restarts: 0,
|
||
|
lastRestart: null,
|
||
|
uptimePercentage: 100,
|
||
|
memoryUsage: [],
|
||
|
healthCheckFailures: 0,
|
||
|
criticalErrors: 0
|
||
|
};
|
||
|
|
||
|
// Create metrics directory if it doesn't exist
|
||
|
if (!fs.existsSync('metrics')) {
|
||
|
fs.mkdirSync('metrics');
|
||
|
}
|
||
|
}
|
||
|
|
||
|
updateMetrics(type, value) {
|
||
|
switch(type) {
|
||
|
case 'restart':
|
||
|
this.metrics.restarts++;
|
||
|
this.metrics.lastRestart = new Date().toISOString();
|
||
|
break;
|
||
|
case 'memory':
|
||
|
this.metrics.memoryUsage.push({
|
||
|
timestamp: new Date().toISOString(),
|
||
|
value: value
|
||
|
});
|
||
|
// Keep only last 100 measurements
|
||
|
if (this.metrics.memoryUsage.length > 100) {
|
||
|
this.metrics.memoryUsage.shift();
|
||
|
}
|
||
|
break;
|
||
|
case 'healthCheck':
|
||
|
if (!value) this.metrics.healthCheckFailures++;
|
||
|
break;
|
||
|
case 'criticalError':
|
||
|
this.metrics.criticalErrors++;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// Calculate uptime percentage
|
||
|
const totalTime = Date.now() - this.metrics.startTime;
|
||
|
const downtime = this.metrics.restarts * 10000; // Assuming 10s downtime per restart
|
||
|
this.metrics.uptimePercentage = ((totalTime - downtime) / totalTime * 100).toFixed(2);
|
||
|
|
||
|
// Save metrics to file
|
||
|
fs.writeFileSync(
|
||
|
path.join('metrics', 'metrics.json'),
|
||
|
JSON.stringify(this.metrics, null, 2)
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const metrics = new MetricsTracker();
|
||
|
|
||
|
// Notification system
|
||
|
const notifier = nodemailer.createTransport(config.notifications.email.smtp);
|
||
|
|
||
|
async function sendNotification(subject, message, critical = false) {
|
||
|
if (!config.notifications.email.enabled) return;
|
||
|
|
||
|
try {
|
||
|
await notifier.sendMail({
|
||
|
from: config.notifications.email.from,
|
||
|
to: config.notifications.email.to,
|
||
|
subject: `[${critical ? 'CRITICAL' : 'INFO'}] ${subject}`,
|
||
|
text: message
|
||
|
});
|
||
|
logger.info(`Notification sent: ${subject}`);
|
||
|
} catch (error) {
|
||
|
logger.error('Failed to send notification:', error);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Health check function
|
||
|
async function performHealthCheck() {
|
||
|
try {
|
||
|
const response = await fetch(config.monitoring.healthCheckEndpoint);
|
||
|
const healthy = response.status === 200;
|
||
|
metrics.updateMetrics('healthCheck', healthy);
|
||
|
|
||
|
if (!healthy) {
|
||
|
logger.warn('Health check failed');
|
||
|
sendNotification('Health Check Failed', 'Application health check failed. Investigating...');
|
||
|
return false;
|
||
|
}
|
||
|
return true;
|
||
|
} catch (error) {
|
||
|
logger.error('Health check error:', error);
|
||
|
metrics.updateMetrics('healthCheck', false);
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
let yourCommand;
|
||
|
let isRestarting = false;
|
||
|
|
||
|
async function cleanup() {
|
||
|
return new Promise(async (resolve) => {
|
||
|
try {
|
||
|
if (process.env.CHROME_PID) {
|
||
|
exec(`kill ${process.env.CHROME_PID}`);
|
||
|
}
|
||
|
|
||
|
if (yourCommand) {
|
||
|
yourCommand.stdout.removeAllListeners('data');
|
||
|
yourCommand.kill();
|
||
|
}
|
||
|
|
||
|
await new Promise(resolve => pm2.disconnect(resolve));
|
||
|
logger.info('Cleanup completed successfully');
|
||
|
} catch (error) {
|
||
|
logger.error('Cleanup error:', error);
|
||
|
}
|
||
|
resolve();
|
||
|
});
|
||
|
}
|
||
|
|
||
|
async function pm2Start() {
|
||
|
if (isRestarting) {
|
||
|
logger.info('Restart already in progress, skipping...');
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
try {
|
||
|
isRestarting = true;
|
||
|
await cleanup();
|
||
|
|
||
|
await new Promise((resolve, reject) => {
|
||
|
pm2.connect((err) => {
|
||
|
if (err) reject(err);
|
||
|
else resolve();
|
||
|
});
|
||
|
});
|
||
|
|
||
|
// Start the application with config
|
||
|
await new Promise((resolve, reject) => {
|
||
|
pm2.start(config.app, (err, apps) => {
|
||
|
if (err) reject(err);
|
||
|
else resolve(apps);
|
||
|
});
|
||
|
});
|
||
|
|
||
|
// Start monitoring processes
|
||
|
yourCommand = spawn('pm2', ['log']);
|
||
|
|
||
|
yourCommand.on('error', (error) => {
|
||
|
logger.error('PM2 log process error:', error);
|
||
|
metrics.updateMetrics('criticalError');
|
||
|
restartWithDelay();
|
||
|
});
|
||
|
|
||
|
yourCommand.on('exit', (code, signal) => {
|
||
|
if (code !== 0) {
|
||
|
logger.error(`PM2 log process exited with code ${code}, signal: ${signal}`);
|
||
|
restartWithDelay();
|
||
|
}
|
||
|
});
|
||
|
|
||
|
yourCommand.stdout.on('data', (data) => {
|
||
|
const dataStr = data.toString();
|
||
|
if (config.monitoring.criticalErrors.some(err => dataStr.includes(err))) {
|
||
|
logger.error("Critical error detected in logs:", dataStr);
|
||
|
metrics.updateMetrics('criticalError');
|
||
|
sendNotification(
|
||
|
'Critical Error Detected',
|
||
|
`Error in application logs: ${dataStr}`,
|
||
|
true
|
||
|
);
|
||
|
restartWithDelay();
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// Start health check interval
|
||
|
setInterval(async () => {
|
||
|
const isHealthy = await performHealthCheck();
|
||
|
if (!isHealthy) restartWithDelay();
|
||
|
}, config.monitoring.healthCheckInterval);
|
||
|
|
||
|
// Start metrics collection interval
|
||
|
setInterval(() => {
|
||
|
pm2.describe(config.app.name, (err, processDescription) => {
|
||
|
if (!err && processDescription[0]) {
|
||
|
metrics.updateMetrics('memory', processDescription[0].monit.memory);
|
||
|
}
|
||
|
});
|
||
|
}, config.monitoring.metricsInterval);
|
||
|
|
||
|
logger.info('Application started successfully');
|
||
|
sendNotification('Application Started', 'The application has been started successfully');
|
||
|
|
||
|
} catch (error) {
|
||
|
logger.error('PM2 start error:', error);
|
||
|
metrics.updateMetrics('criticalError');
|
||
|
sendNotification(
|
||
|
'Application Start Failed',
|
||
|
`Failed to start application: ${error.message}`,
|
||
|
true
|
||
|
);
|
||
|
await cleanup();
|
||
|
restartWithDelay();
|
||
|
} finally {
|
||
|
isRestarting = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
let restartAttempts = 0;
|
||
|
|
||
|
function restartWithDelay() {
|
||
|
if (restartAttempts >= config.monitoring.maxRestartAttempts) {
|
||
|
logger.error('Maximum restart attempts reached. Exiting...');
|
||
|
sendNotification(
|
||
|
'Maximum Restart Attempts Reached',
|
||
|
'Application has reached maximum restart attempts and will now exit.',
|
||
|
true
|
||
|
);
|
||
|
process.exit(1);
|
||
|
}
|
||
|
|
||
|
const delay = Math.min(1000 * Math.pow(2, restartAttempts), 30000);
|
||
|
restartAttempts++;
|
||
|
metrics.updateMetrics('restart');
|
||
|
|
||
|
logger.info(`Scheduling restart in ${delay}ms. Attempt ${restartAttempts}/${config.monitoring.maxRestartAttempts}`);
|
||
|
setTimeout(pm2Start, delay);
|
||
|
}
|
||
|
|
||
|
// Process termination handlers
|
||
|
process.on('SIGTERM', async () => {
|
||
|
logger.info('Received SIGTERM. Cleaning up...');
|
||
|
await cleanup();
|
||
|
process.exit(0);
|
||
|
});
|
||
|
|
||
|
process.on('SIGINT', async () => {
|
||
|
logger.info('Received SIGINT. Cleaning up...');
|
||
|
await cleanup();
|
||
|
process.exit(0);
|
||
|
});
|
||
|
|
||
|
// Create necessary directories
|
||
|
if (!fs.existsSync(config.logging.dir)) {
|
||
|
fs.mkdirSync(config.logging.dir);
|
||
|
}
|
||
|
|
||
|
// Start the application
|
||
|
pm2Start().catch(error => {
|
||
|
logger.error('Fatal error:', error);
|
||
|
sendNotification('Fatal Error', `Fatal error occurred: ${error.message}`, true);
|
||
|
process.exit(1);
|
||
|
});
|