You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
465 lines
23 KiB
465 lines
23 KiB
|
|
[Host Status Alert (mstats)]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.suppress = 0
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
description = Alerts when Host Status is inactive
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.show = 0
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats count(*) where index=winmetrics BY host span=1m\
|
|
| stats latest(_time) as _time by host\
|
|
| eval checkin_age=now()-_time\
|
|
| eval status=if(timer>900,"inactive","active")\
|
|
| search checkin_age="inactive"\
|
|
| table host _time checkin_age status
|
|
|
|
[Host Status Alert (Lookup)]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.suppress = 0
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
description = Alerts when Host Status is inactive\
|
|
This alert will utilize the host_status.csv for faster runtime
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.show = 0
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats count(*) where index=winmetrics BY host span=1m\
|
|
| stats latest(_time) as _time by host\
|
|
| eval checkin_age=now()-_time\
|
|
| eval status=if(timer>900,"inactive","active")\
|
|
| search checkin_age="inactive"\
|
|
| table host _time checkin_age status
|
|
|
|
[Create Host_List]
|
|
alert.track = 0
|
|
cron_schedule = 30 * * * *
|
|
description = creates a list of the hosts in the environment
|
|
dispatch.earliest_time = -60m@m
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.show = 0
|
|
enableSched = 1
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats count(*) where index=winmetrics BY host \
|
|
| stats count by host\
|
|
| append \
|
|
[ | inputlookup host_list.csv]\
|
|
| stats count by host\
|
|
| fields - count\
|
|
| outputlookup host_list.csv
|
|
|
|
[Host Memory Threshold Lookup Create]
|
|
action.email.useNSSubject = 1
|
|
alert.track = 0
|
|
dispatch.earliest_time = -24h@h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.tab = statistics
|
|
display.visualizations.charting.chart = line
|
|
display.visualizations.show = 0
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% Committed Bytes In Use") as perc_mem_used where `winmetrics_index` BY host instance \
|
|
| fields - instance\
|
|
| append [|inputlookup host_memory_thresholds.csv | stats count by host]\
|
|
| stats count by host\
|
|
| fields - count\
|
|
| lookup host_memory_thresholds.csv host\
|
|
| eval high_memory_utilisation_free_MBytes=if(isnull(high_memory_utilisation_free_MBytes),1000,high_memory_utilisation_free_MBytes)\
|
|
| eval high_memory_utilisation_perc_mem_used=if(isnull(high_memory_utilisation_perc_mem_used),95,high_memory_utilisation_perc_mem_used)\
|
|
| eval high_hard_faults=if(isnull(high_hard_faults),1000,high_hard_faults)\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| fields - perc_mem_used\
|
|
| outputlookup host_memory_thresholds.csv
|
|
|
|
[Host CPU Threshold Lookup Create]
|
|
action.email.useNSSubject = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.track = 0
|
|
cron_schedule = 0 * * * *
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.show = 0
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% Processor Time") as processor_time where index=winmetrics BY host \
|
|
| append [|inputlookup host_cpu_thresholds.csv | stats count by host]\
|
|
| stats count by host \
|
|
| fields - count\
|
|
| lookup host_cpu_thresholds.csv host\
|
|
| eval high_system_usage_Privileged_time=if(isnull(high_system_usage_Privileged_time),30,high_system_usage_Privileged_time)\
|
|
| eval high_system_usage_Processor_time=if(isnull(high_system_usage_Processor_time),80,high_system_usage_Processor_time)\
|
|
| eval high_interrupt_time=if(isnull(high_interrupt_time),20,high_interrupt_time)\
|
|
| eval high_DPC_time_with_processor_bottleneck_dpc_time=if(isnull(High_DPC_time_with_processor_bottleneck_dpc_time),15,High_DPC_time_with_processor_bottleneck_dpc_time)\
|
|
| eval high_DPC_time_with_processor_bottleneck_Processor_time=if(isnull(High_DPC_time_with_processor_bottleneck_Processor_time),80,High_DPC_time_with_processor_bottleneck_Processor_time)\
|
|
| eval high_DPC_Time=if(isnull(high_DPC_Time),20,high_DPC_Time)\
|
|
| eval high_processor_usage=if(isnull(high_processor_usage),80,high_processor_usage)\
|
|
| eval high_processor_queue=if(isnull(high_processor_queue),5,high_processor_queue)\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| fields - processor_time\
|
|
| outputlookup host_cpu_thresholds.csv
|
|
|
|
[Host PageFile Threshold Lookup Create]
|
|
action.email.useNSSubject = 1
|
|
alert.track = 0
|
|
dispatch.earliest_time = -24h@h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.tab = statistics
|
|
display.visualizations.charting.chart = line
|
|
display.visualizations.show = 0
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% Usage") as Pagefile_Usage where index=winmetrics BY host instance span=15m \
|
|
| append [|inputlookup host_pagefile_thresholds.csv | stats count by host]\
|
|
| stats count by host \
|
|
| fields - count\
|
|
| lookup host_pagefile_thresholds.csv host\
|
|
| eval pagefile_high=if(isnull(pagefile_high),90,pagefile_high)\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| outputlookup host_pagefile_thresholds.csv
|
|
|
|
[Host Network Threshold Lookup Create]
|
|
action.email.useNSSubject = 1
|
|
alert.track = 0
|
|
dispatch.earliest_time = -24h@h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.tab = statistics
|
|
display.visualizations.charting.chart = line
|
|
display.visualizations.show = 0
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("Bytes Sent/sec") as Bytes_Sent where index=winmetrics BY host instance \
|
|
| append [|inputlookup host_network_thresholds.csv | stats count by host]\
|
|
| stats count by host instance\
|
|
| fields - count\
|
|
| lookup host_network_thresholds.csv host instance\
|
|
| eval send_utilisation_high=if(isnull(send_utilisation_high),90,send_utilisation_high)\
|
|
| eval receive_utilisation_high=if(isnull(receive_utilisation_high),90,receive_utilisation_high)\
|
|
| eval packet_issues_packets_outbound_discarded=if(isnull(packet_issues_packets_outbound_discarded),0,packet_issues_packets_outbound_discarded)\
|
|
| eval packet_issues_packets_outbound_errors=if(isnull(packet_issues_packets_outbound_errors),0,packet_issues_packets_outbound_errors)\
|
|
| eval packet_issues_packets_received_discarded=if(isnull(packet_issues_packets_received_discarded),0,packet_issues_packets_received_discarded)\
|
|
| eval packet_issues_packets_received_errors=if(isnull(packet_issues_packets_received_errors),0,packet_issues_packets_received_errors)\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| outputlookup host_network_thresholds.csv
|
|
|
|
[Host Disk Threshold Lookup Create]
|
|
action.email.useNSSubject = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.track = 0
|
|
cron_schedule = 0 * * * *
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.show = 0
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% Free Space") as perc_free_space where index=winmetrics BY host instance\
|
|
| append [|inputlookup host_cpu_thresholds.csv | stats count by host instance]\
|
|
| stats count by host instance\
|
|
| fields - count\
|
|
| lookup host_disk_thresholds.csv host instance\
|
|
| search instance=*: \
|
|
| eval low_disk_space_perc_free_space=if(isnull(low_disk_space_perc_free_space),10,low_disk_space_perc_free_space)\
|
|
| eval low_disk_space_free_MB=if(isnull(low_disk_space_free_MB),5000,low_disk_space_free_MB)\
|
|
| eval high_disk_queue=if(isnull(high_disk_queue),20,high_disk_queue)\
|
|
| eval high_latency=if(isnull(high_latency),50,high_latency)\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| fields - perc_free_space\
|
|
| outputlookup host_disk_thresholds.csv
|
|
|
|
[PageFile Alert]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.digest_mode = 0
|
|
alert.suppress = 0
|
|
alert.suppress.fields = host
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% Usage") as Pagefile_Usage where index=winmetrics BY host instance span=15m \
|
|
| search instance=_Total \
|
|
| fields - instance\
|
|
| lookup host_pagefile_thresholds.csv host\
|
|
| eval pagefile_high=if(isnull(pagefile_high),90,pagefile_high)\
|
|
| eval alert_type=case(Pagefile_Usage > pagefile_high,"Pagefile High")\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| search alert_type=* active=true\
|
|
| mvexpand alert_type
|
|
|
|
[Network Alerts]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.digest_mode = 0
|
|
alert.suppress = 0
|
|
alert.suppress.fields = host
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("Bytes Sent/sec") as Bytes_Sent avg("Bytes Received/sec") as Bytes_Received avg("Current Bandwidth") as Current_Bandwidth avg("Packets Outbound Discarded") as Packets_Outbound_Discarded avg("Packets Outbound Errors") as Packets_Outbound_Errors avg("Packets Received Discarded") as Packets_Received_Discarded avg("Packets Received Errors") as Packets_Received_Errors where index=winmetrics BY host instance span=15m\
|
|
| eval send_utilisation=round((bytes_sent/Current_Bandwidth)*100,5)\
|
|
| eval receive_utilisation=round((Bytes_Received/Current_Bandwidth)*100,5)\
|
|
| eval Packets_Outbound_Discarded=if(isnull(Packets_Outbound_Discarded),0,Packets_Outbound_Discarded)\
|
|
| eval Packets_Outbound_Errors=if(isnull(Packets_Outbound_Errors),0,Packets_Outbound_Errors)\
|
|
| eval Packets_Received_Discarded=if(isnull(Packets_Received_Discarded),0,Packets_Received_Discarded)\
|
|
| eval Packets_Received_Errors=if(isnull(Packets_Received_Errors),0,Packets_Received_Errors)\
|
|
| eval Bytes_Sent=if(isnull(Bytes_Sent),0,Bytes_Sent)\
|
|
| eval Bytes_Received=if(isnull(Bytes_Received),0,Bytes_Received)\
|
|
| lookup host_network_thresholds.csv host\
|
|
| eval send_utilisation_high=if(isnull(send_utilisation_high),90,send_utilisation_high)\
|
|
| eval receive_utilisation_high=if(isnull(receive_utilisation_high),90,receive_utilisation_high)\
|
|
| eval packet_issues_packets_outbound_discarded=if(isnull(packet_issues_packets_outbound_discarded),0,packet_issues_packets_outbound_discarded)\
|
|
| eval packet_issues_packets_outbound_errors=if(isnull(packet_issues_packets_outbound_errors),0,packet_issues_packets_outbound_errors)\
|
|
| eval packet_issues_packets_received_discarded=if(isnull(packet_issues_packets_received_discarded),0,packet_issues_packets_received_discarded)\
|
|
| eval packet_issues_packets_received_errors=if(isnull(packet_issues_packets_received_errors),0,packet_issues_packets_received_errors)\
|
|
| eval alert_type=case(send_utilisation > send_utilisation_high,"Send Utilisation High")\
|
|
| eval alert_type=if(isnull(alert_type),case(receive_utilisation > receive_utilisation_high,"Receive Utilisation High"),mvappend(case(receive_utilisation > receive_utilisation_high,"Receive Utilisation High"),alert_type))\
|
|
| eval alert_type=if(isnull(alert_type),case(Packets_Outbound_Discarded> packet_issues_packets_outbound_discarded OR Packets_Received_Discarded > packet_issues_packets_received_discarded, "Packet Discarded"),mvappend(case(Packets_Outbound_Discarded> packet_issues_packets_outbound_discarded OR Packets_Received_Discarded > packet_issues_packets_received_discarded,"Packets Discarded"),alert_type))\
|
|
| eval alert_type=if(isnull(alert_type),case(Packets_Outbound_Errors > packet_issues_packets_outbound_errors OR Packets_Received_Errors> packet_issues_packets_received_errors,"Packet Errors"),mvappend(case(Packets_Outbound_Errors > packet_issues_packets_outbound_errors OR Packets_Received_Errors> packet_issues_packets_received_errors,"Packet Errors"),alert_type))\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| search alert_type=* active=true\
|
|
| mvexpand alert_type
|
|
|
|
[Update Host_Status]
|
|
alert.track = 0
|
|
cron_schedule = 30 * * * *
|
|
description = updates the host_status csv, monitors the last time the host updated metric data in Splunk
|
|
dispatch.earliest_time = -60m@m
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.show = 0
|
|
enableSched = 1
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats count(*) where index=winmetrics BY host span=1m\
|
|
| stats latest(_time) as _time by host\
|
|
| eval checkin_age=now()-_time\
|
|
| eval status=if(timer>900,"inactive","active")\
|
|
| append [|inputlookup host_status.csv]\
|
|
| eval removal=now()-_time\
|
|
| search removal<864000\
|
|
| table host _time checkin_age status\
|
|
| outputlookup host_status.csv
|
|
|
|
[CPU Alerts]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.digest_mode = 0
|
|
alert.suppress = 0
|
|
alert.suppress.fields = host
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% DPC Time") as dpc_time avg("% Interrupt Time") as Interrupt_time avg("% User Time") as User_time avg("% Processor Time") as Processor_time avg("% Privileged Time") as Privileged_time where index=winmetrics BY host instance span=15m \
|
|
| eventstats dc(instance) as processor_count\
|
|
| join type=outer instance _time [|mstats avg("Processor Queue Length") as Processor_Queue_Length where index=winmetrics BY host instance span=15m \
|
|
| eval instance="_Total"]\
|
|
| search instance=_Total \
|
|
| eval queue_to_processor=(processor_count-1)/Processor_Queue_Length\
|
|
| fields - instance\
|
|
| lookup host_cpu_thresholds.csv host\
|
|
| eval high_system_usage_Privileged_time=if(isnull(high_system_usage_Privileged_time),1,high_system_usage_Privileged_time)\
|
|
| eval high_system_usage_Processor_time=if(isnull(high_system_usage_Processor_time),1,high_system_usage_Processor_time)\
|
|
| eval high_interrupt_time=if(isnull(high_interrupt_time),20,high_interrupt_time)\
|
|
| eval high_DPC_time_with_processor_bottleneck_dpc_time=if(isnull(High_DPC_time_with_processor_bottleneck_dpc_time),15,High_DPC_time_with_processor_bottleneck_dpc_time)\
|
|
| eval high_DPC_time_with_processor_bottleneck_Processor_time=if(isnull(High_DPC_time_with_processor_bottleneck_Processor_time),80,High_DPC_time_with_processor_bottleneck_Processor_time)\
|
|
| eval high_DPC_Time=if(isnull(high_DPC_Time),20,high_DPC_Time)\
|
|
| eval high_processor_usage=if(isnull(high_processor_usage),80,high_processor_usage)\
|
|
| eval high_processor_queue=if(isnull(high_processor_queue),5,high_processor_queue)\
|
|
| eval alert_type=case(Privileged_time > high_system_usage_Privileged_time AND Processor_time > high_system_usage_Processor_time,"High System Usage")\
|
|
| eval alert_type=if(isnull(alert_type),case(Interrupt_time > high_interrupt_time,"High Interrupt time"),mvappend(case(Interrupt_time > high_interrupt_time,"High Interrupt time"),alert_type))\
|
|
| eval alert_type=if(isnull(alert_type),case(dpc_time > high_DPC_time_with_processor_bottleneck_dpc_time AND Processor_time > high_DPC_time_with_processor_bottleneck_Processor_time,"High DPC Time with Processor bottleneck"),mvappend(case(dpc_time > high_DPC_time_with_processor_bottleneck_dpc_time AND Processor_time > high_DPC_time_with_processor_bottleneck_Processor_time,"High DPC Time with Processor bottleneck"),alert_type))\
|
|
| eval alert_type=if(isnull(alert_type),case(dpc_time > high_DPC_Time,"High DPC Time"),mvappend(case(dpc_time > high_DPC_Time,"High DPC Time"),alert_type))\
|
|
| eval alert_type=if(isnull(alert_type),case(Processor_time > high_processor_usage,"High Processor Usage"),mvappend(case(Processor_time > high_processor_usage,"High Processor Usage"),alert_type))\
|
|
| eval alert_type=if(isnull(alert_type),case(queue_to_processor> high_processor_queue,"High Processor Queue"),mvappend(case(queue_to_processor> high_processor_queue,"High Processor Queue"),alert_type))\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| search alert_type=* active=true\
|
|
| mvexpand alert_type
|
|
|
|
[Disk Alert]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.digest_mode = 0
|
|
alert.suppress = 0
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% Free Space") as perc_free_space avg("Free Megabytes") as Free_MB avg("Avg. Disk Queue Length") as Disk_queue avg("Avg. Disk sec/Transfer") as Latency where index=winmetrics BY host instance span=15m \
|
|
| search instance=*: \
|
|
| lookup host_disk_thresholds.csv host instance\
|
|
| eval low_disk_space_perc_free_space=if(isnull(low_disk_space_perc_free_space),10,low_disk_space_perc_free_space)\
|
|
| eval low_disk_space_free_MB=if(isnull(low_disk_space_free_MB),5000,low_disk_space_free_MB)\
|
|
| eval high_disk_queue=if(isnull(high_disk_queue),20,high_disk_queue)\
|
|
| eval high_latency=if(isnull(high_latency),50,high_latency)\
|
|
| eval alert_type=case(perc_free_space < low_disk_space_perc_free_space AND Free_MB < low_disk_space_free_MB,"Low Disk Space")\
|
|
| eval alert_type=if(isnull(alert_type),case(Disk_queue > high_disk_queue,"High Disk Queue"),mvappend(case(Disk_queue > high_disk_queue,"High Disk Queue"),alert_type))\
|
|
| eval alert_type=if(isnull(alert_type),case(Latency > high_latency,"High Latency"),mvappend(case(Latency > high_latency,"High Latency"),alert_type))\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| search alert_type=* active=true\
|
|
| mvexpand alert_type
|
|
|
|
[Memory Alerts]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.digest_mode = 0
|
|
alert.suppress = 0
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = | mstats avg("% Committed Bytes In Use") as perc_mem_used avg("Available MBytes") as Free_MBytes avg("Page Faults/sec") as Page_Faults avg("Page Reads/sec") as Page_Reads where `winmetrics_index` BY host instance span=15m\
|
|
| fields - instance\
|
|
| lookup host_memory_thresholds.csv host\
|
|
| eval high_memory_utilisation_free_MBytes=if(isnull(high_memory_utilisation_free_MBytes),1000,high_memory_utilisation_free_MBytes)\
|
|
| eval high_memory_utilisation_perc_mem_used=if(isnull(high_memory_utilisation_perc_mem_used),95,high_memory_utilisation_perc_mem_used)\
|
|
| eval high_hard_faults=if(isnull(high_hard_faults),1000,high_hard_faults)\
|
|
| eval alert_type=case(perc_mem_used > high_memory_utilisation_perc_mem_used AND Free_MBytes < high_memory_utilisation_free_MBytes,"High Memory Utilisation")\
|
|
| eval alert_type=if(isnull(alert_type),case(Page_Reads > high_hard_faults,"High Hard Faults"),mvappend(case(Page_Reads > high_hard_faults,"High Hard Faults"),alert_type))\
|
|
| eval active=if(isnull(active),"true",active)\
|
|
| search alert_type=* active=true\
|
|
| mvexpand alert_type
|
|
|
|
[Create Service Monitors]
|
|
action.email.useNSSubject = 1
|
|
alert.track = 0
|
|
description = Create services monitoring based on the check lookup
|
|
dispatch.earliest_time = -24h@h
|
|
dispatch.latest_time = now
|
|
display.general.timeRangePicker.show = 0
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.charting.chart = line
|
|
display.visualizations.show = 0
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = `winservice_index` source="service" sourcetype="WinHostMon" [|inputlookup service_check.csv | search check_enabled="TRUE" | return 9999 service_name] | stats count by host Type Name DisplayName State Status Started service_name | eval enabled="TRUE" | fields - count | outputlookup service_monitor.csv
|
|
|
|
[Service Alerts]
|
|
action.lookup = 1
|
|
action.lookup.append = 1
|
|
action.lookup.filename = distinct_alerts.csv
|
|
alert.suppress = 0
|
|
alert.track = 1
|
|
counttype = number of events
|
|
cron_schedule = 0 * * * *
|
|
description = Alerts for when a Service is in a stopped state
|
|
dispatch.earliest_time = -1h
|
|
dispatch.latest_time = now
|
|
display.general.type = statistics
|
|
display.page.search.mode = fast
|
|
display.page.search.tab = statistics
|
|
display.visualizations.charting.chart = line
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
request.ui_dispatch_app = distinct_windows_performance
|
|
request.ui_dispatch_view = search
|
|
search = source="service" `winservice_index` sourcetype="WinHostMon" [|inputlookup service_monitor.csv | search enabled="TRUE" | return 9999 service_name host] | stats count by host Type Name DisplayName State Status Started service_name | fields - count | search State="Stopped" | eval alert_type="Service Alerts"
|
|
|
|
|