Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions client/src/Hooks/useMonitorForm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ const getBaseDefaults = (data?: Monitor | null) => ({
description: data?.description || "",
interval: data?.interval || 60000,
notifications: data?.notifications || [],
escalationDelayMinutes: data?.escalationDelayMinutes ?? undefined,
escalationNotificationId: data?.escalationNotificationId ?? undefined,
statusWindowSize: data?.statusWindowSize || 5,
statusWindowThreshold: data?.statusWindowThreshold || 60,
geoCheckEnabled: data?.geoCheckEnabled ?? false,
Expand Down
71 changes: 66 additions & 5 deletions client/src/Pages/CreateMonitor/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,14 @@ const CreateMonitorPage = () => {
}, [defaults, form]);

const watchedType = watch("type") as MonitorType;
const notificationOptions = useMemo(
() =>
(notifications ?? []).map((notification) => ({
...notification,
name: notification.notificationName,
})),
[notifications]
);

const watchedUseAdvancedMatching = watch("useAdvancedMatching") as boolean;
const watchGeoCheckEnabled = watch("geoCheckEnabled") as boolean;
Expand Down Expand Up @@ -705,11 +713,6 @@ const CreateMonitorPage = () => {
name="notifications"
control={control}
render={({ field }) => {
// Map notifications to have 'name' property for Autocomplete
const notificationOptions = (notifications ?? []).map((n) => ({
...n,
name: n.notificationName,
}));
const selectedNotifications = notificationOptions.filter((n) =>
(field.value ?? []).includes(n.id)
);
Expand Down Expand Up @@ -765,6 +768,64 @@ const CreateMonitorPage = () => {
}
/>

<ConfigBox
title={t("pages.createMonitor.form.escalation.title")}
subtitle={t("pages.createMonitor.form.escalation.description")}
rightContent={
<Stack spacing={theme.spacing(LAYOUT.MD)}>
<Controller
name="escalationDelayMinutes"
control={control}
render={({ field, fieldState }) => (
<TextField
{...field}
value={field.value ?? ""}
onChange={(e) => {
const value = e.target.value;
field.onChange(value === "" ? undefined : Number(value));
}}
type="number"
fieldLabel={t("pages.createMonitor.form.escalation.option.delay.label")}
placeholder={t("pages.createMonitor.form.escalation.option.delay.placeholder")}
fullWidth
inputProps={{ min: 1, step: 1 }}
error={!!fieldState.error}
helperText={fieldState.error?.message ?? ""}
/>
)}
/>
<Controller
name="escalationNotificationId"
control={control}
render={({ field, fieldState }) => (
<Select
{...field}
value={field.value ?? ""}
onChange={(e) => {
const value = e.target.value;
field.onChange(value === "" ? undefined : value);
}}
fieldLabel={t("pages.createMonitor.form.escalation.option.channel.label")}
error={!!fieldState.error}
>
<MenuItem value="">
{t("pages.createMonitor.form.escalation.option.channel.placeholder")}
</MenuItem>
{notificationOptions.map((notification) => (
<MenuItem
key={notification.id}
value={notification.id}
>
{notification.notificationName}
</MenuItem>
))}
</Select>
)}
/>
</Stack>
}
/>

{(watchedType === "http" ||
watchedType === "grpc" ||
watchedType === "websocket") && (
Expand Down
2 changes: 2 additions & 0 deletions client/src/Types/Monitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ export interface Monitor {
interval: number;
uptimePercentage?: number;
notifications: string[];
escalationDelayMinutes?: number;
escalationNotificationId?: string;
secret?: string;
cpuAlertThreshold: number;
cpuAlertCounter: number;
Expand Down
23 changes: 22 additions & 1 deletion client/src/Validation/monitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ const baseSchema = z.object({
.number({ message: "Threshold percentage is required" })
.min(1, "Incident percentage must be at least 1")
.max(100, "Incident percentage must be at most 100"),
escalationDelayMinutes: z.number().int().min(1, "Escalation delay must be at least 1 minute").optional(),
escalationNotificationId: z.string().min(1, "Escalation notification channel is required").optional(),
geoCheckEnabled: z.boolean().optional(),
geoCheckLocations: z.array(z.enum(GeoContinents)).optional(),
geoCheckInterval: z
Expand Down Expand Up @@ -133,7 +135,26 @@ export const monitorSchema = z.discriminatedUnion("type", [
pagespeedSchema,
hardwareSchema,
websocketSchema,
]);
]).superRefine((data, ctx) => {
const hasDelay = data.escalationDelayMinutes !== undefined && data.escalationDelayMinutes !== null;
const hasChannel = Boolean(data.escalationNotificationId);

if (hasDelay && !hasChannel) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "Escalation notification channel is required",
path: ["escalationNotificationId"],
});
}

if (hasChannel && !hasDelay) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: "Escalation delay must be at least 1 minute",
path: ["escalationDelayMinutes"],
});
}
});

export type MonitorFormData = z.infer<typeof monitorSchema>;

Expand Down
14 changes: 14 additions & 0 deletions client/src/locales/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,20 @@
"description": "Select the notification channels you want to use",
"title": "Notifications"
},
"escalation": {
"description": "Send a second notification to a chosen channel after the first alert has been sent.",
"option": {
"delay": {
"label": "Escalation delay (minutes)",
"placeholder": "e.g. 15"
},
"channel": {
"label": "Escalation channel",
"placeholder": "Select a notification channel"
}
},
"title": "Escalation"
},
"type": {
"description": "Select the type of check to perform",
"optionDockerDescription": "Use Docker to monitor if a container is running.",
Expand Down
5 changes: 5 additions & 0 deletions server/src/db/models/Incident.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ type IncidentDocumentBase = Omit<Incident, "id" | "monitorId" | "teamId" | "reso
resolvedBy?: Types.ObjectId | null;
startTime: Date;
endTime: Date | null;
escalationSentAt: Date | null;
createdAt: Date;
updatedAt: Date;
};
Expand Down Expand Up @@ -72,6 +73,10 @@ const IncidentSchema = new Schema<IncidentDocument>(
type: String,
default: null,
},
escalationSentAt: {
type: Date,
default: null,
},
},
{ timestamps: true }
);
Expand Down
9 changes: 9 additions & 0 deletions server/src/db/models/Monitor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,15 @@ const MonitorSchema = new Schema<MonitorDocument>(
ref: "Notification",
},
],
escalationDelayMinutes: {
type: Number,
default: null,
},
escalationNotificationId: {
type: Schema.Types.ObjectId,
ref: "Notification",
default: null,
},
secret: {
type: String,
},
Expand Down
1 change: 1 addition & 0 deletions server/src/repositories/incidents/IIncidentsRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export interface IIncidentsRepository {
findById(incidentId: string, teamId: string): Promise<Incident>;
findActiveByIncidentId(incidentId: string, teamId: string): Promise<Incident | null>;
findActiveByMonitorId(monitorId: string, teamId: string): Promise<Incident | null>;
findActive(): Promise<Incident[]>;
findByTeamId(
teamId: string,
startDate: Date | undefined,
Expand Down
6 changes: 6 additions & 0 deletions server/src/repositories/incidents/MongoIncidentRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class MongoIncidentRepository implements IIncidentsRepository {
resolvedBy: doc.resolvedBy ? this.toStringId(doc.resolvedBy) : null,
resolvedByEmail: doc.resolvedByEmail ?? null,
comment: doc.comment ?? null,
escalationSentAt: doc.escalationSentAt ? this.toDateString(doc.escalationSentAt) : null,
createdAt: this.toDateString(doc.createdAt),
updatedAt: this.toDateString(doc.updatedAt),
};
Expand Down Expand Up @@ -115,6 +116,11 @@ class MongoIncidentRepository implements IIncidentsRepository {
return this.toEntity(incident);
};

findActive = async (): Promise<Incident[]> => {
const incidents = await IncidentModel.find({ status: true, escalationSentAt: null });
return this.mapDocuments(incidents);
};

findByTeamId = async (
teamId: string,
startDate: Date | undefined,
Expand Down
12 changes: 11 additions & 1 deletion server/src/repositories/monitors/MongoMonitorsRepository.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,13 @@ class MongoMonitorsRepository implements IMonitorsRepository {
};

removeNotificationFromMonitors = async (notificationId: string): Promise<void> => {
await MonitorModel.updateMany({ notifications: notificationId }, { $pull: { notifications: notificationId } });
await MonitorModel.updateMany(
{ $or: [{ notifications: notificationId }, { escalationNotificationId: new mongoose.Types.ObjectId(notificationId) }] },
{
$pull: { notifications: notificationId },
$unset: { escalationNotificationId: "" },
}
);
};

updateNotifications = async (
Expand Down Expand Up @@ -374,6 +380,8 @@ class MongoMonitorsRepository implements IMonitorsRepository {
interval: doc.interval,
uptimePercentage: doc.uptimePercentage ?? undefined,
notifications: notificationIds,
escalationDelayMinutes: doc.escalationDelayMinutes ?? undefined,
escalationNotificationId: doc.escalationNotificationId ? toStringId(doc.escalationNotificationId) : undefined,
secret: doc.secret ?? undefined,
cpuAlertThreshold: doc.cpuAlertThreshold,
cpuAlertCounter: doc.cpuAlertCounter,
Expand Down Expand Up @@ -433,6 +441,8 @@ class MongoMonitorsRepository implements IMonitorsRepository {
interval: doc.interval,
uptimePercentage: doc.uptimePercentage ?? undefined,
notifications: notificationIds,
escalationDelayMinutes: doc.escalationDelayMinutes ?? undefined,
escalationNotificationId: doc.escalationNotificationId ? toStringId(doc.escalationNotificationId) : undefined,
secret: doc.secret ?? undefined,
cpuAlertThreshold: doc.cpuAlertThreshold,
cpuAlertCounter: doc.cpuAlertCounter,
Expand Down
1 change: 1 addition & 0 deletions server/src/service/business/incidentService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ export class IncidentService implements IIncidentService {
status: true,
statusCode,
message,
escalationSentAt: null,
};
return await this.incidentsRepository.create(incident);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ export class SuperSimpleQueue implements ISuperSimpleQueue {

this.scheduler.addTemplate("monitor-job", this.helper.getHeartbeatJob());
this.scheduler.addTemplate("geo-check-job", this.helper.getHeartbeatGeoJob());
this.scheduler.addTemplate("escalation-sweep-job", this.helper.getEscalationSweepJob());
this.scheduler.addTemplate("cleanup-orphaned", this.helper.getCleanupOrphanedJob());
this.scheduler.addTemplate("cleanup-retention-job", this.helper.getCleanupRetentionJob());
const monitors = await this.monitorsRepository.findAll();
Expand All @@ -105,6 +106,7 @@ export class SuperSimpleQueue implements ISuperSimpleQueue {
}

this.scheduler.addJob({ id: "cleanup-orphaned", template: "cleanup-orphaned", active: true });
this.scheduler.addJob({ id: "escalation-sweep", template: "escalation-sweep-job", active: true, repeat: 60 * 1000 });
this.scheduler.addJob({ id: "cleanup-retention", template: "cleanup-retention-job", active: true, repeat: 24 * 60 * 60 * 1000 });

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export interface ISuperSimpleQueueHelper {
readonly serviceName: string;
getHeartbeatJob(): (monitor: Monitor) => Promise<void>;
getHeartbeatGeoJob(): (monitor: Monitor) => Promise<void>;
getEscalationSweepJob(): () => Promise<void>;
getCleanupOrphanedJob(): () => Promise<void>;
getCleanupRetentionJob(): () => Promise<void>;
isInMaintenanceWindow(monitorId: string, teamId: string): Promise<boolean>;
Expand Down Expand Up @@ -354,6 +355,58 @@ export class SuperSimpleQueueHelper implements ISuperSimpleQueueHelper {
};
};

getEscalationSweepJob = () => {
return async () => {
try {
const activeIncidents = await this.incidentsRepository.findActive();
if (!activeIncidents.length) {
return;
}

for (const incident of activeIncidents) {
try {
if (incident.escalationSentAt) {
continue;
}

const monitor = await this.monitorsRepository.findById(incident.monitorId, incident.teamId);
if (!monitor.escalationNotificationId || !monitor.escalationDelayMinutes) {
continue;
}

const incidentStartTime = new Date(incident.startTime).getTime();
const delayInMs = monitor.escalationDelayMinutes * 60 * 1000;
if (Date.now() - incidentStartTime < delayInMs) {
continue;
}

const sent = await this.notificationsService.sendEscalationNotification(monitor, incident);
if (sent) {
await this.incidentsRepository.updateById(incident.id, incident.teamId, {
escalationSentAt: new Date().toISOString(),
});
}
} catch (error: unknown) {
this.logger.warn({
message: error instanceof Error ? error.message : "Unknown error",
service: SERVICE_NAME,
method: "getEscalationSweepJob",
stack: error instanceof Error ? error.stack : undefined,
});
}
}
} catch (error: unknown) {
this.logger.warn({
message: error instanceof Error ? error.message : "Unknown error",
service: SERVICE_NAME,
method: "getEscalationSweepJob",
stack: error instanceof Error ? error.stack : undefined,
});
throw error;
}
};
};

async isInMaintenanceWindow(monitorId: string, teamId: string) {
const maintenanceWindows = await this.maintenanceWindowsRepository.findByMonitorId(monitorId, teamId);
// Check for active maintenance window:
Expand Down
Loading