[Host Status Alert (mstats)] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.suppress = 0 alert.track = 1 counttype = number of events cron_schedule = 0 * * * * description = Alerts when Host Status is inactive dispatch.earliest_time = -1h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.show = 0 enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats count(*) where index=winmetrics BY host span=1m\ | stats latest(_time) as _time by host\ | eval checkin_age=now()-_time\ | eval status=if(timer>900,"inactive","active")\ | search checkin_age="inactive"\ | table host _time checkin_age status [Host Status Alert (Lookup)] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.suppress = 0 alert.track = 1 counttype = number of events cron_schedule = 0 * * * * description = Alerts when Host Status is inactive\ This alert will utilize the host_status.csv for faster runtime dispatch.earliest_time = -1h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.show = 0 enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats count(*) where index=winmetrics BY host span=1m\ | stats latest(_time) as _time by host\ | eval checkin_age=now()-_time\ | eval status=if(timer>900,"inactive","active")\ | search checkin_age="inactive"\ | table host _time checkin_age status [Create Host_List] alert.track = 0 cron_schedule = 30 * * * * description = creates a list of the hosts in the environment dispatch.earliest_time = -60m@m dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.show = 0 enableSched = 1 request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats count(*) where index=winmetrics BY host \ | stats count by host\ | append \ [ | inputlookup host_list.csv]\ | stats count by host\ | fields - count\ | outputlookup host_list.csv [Host Memory Threshold Lookup Create] action.email.useNSSubject = 1 alert.track = 0 dispatch.earliest_time = -24h@h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.tab = statistics display.visualizations.charting.chart = line display.visualizations.show = 0 request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% Committed Bytes In Use") as perc_mem_used where `winmetrics_index` BY host instance \ | fields - instance\ | append [|inputlookup host_memory_thresholds.csv | stats count by host]\ | stats count by host\ | fields - count\ | lookup host_memory_thresholds.csv host\ | eval high_memory_utilisation_free_MBytes=if(isnull(high_memory_utilisation_free_MBytes),1000,high_memory_utilisation_free_MBytes)\ | eval high_memory_utilisation_perc_mem_used=if(isnull(high_memory_utilisation_perc_mem_used),95,high_memory_utilisation_perc_mem_used)\ | eval high_hard_faults=if(isnull(high_hard_faults),1000,high_hard_faults)\ | eval active=if(isnull(active),"true",active)\ | fields - perc_mem_used\ | outputlookup host_memory_thresholds.csv [Host CPU Threshold Lookup Create] action.email.useNSSubject = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.track = 0 cron_schedule = 0 * * * * dispatch.earliest_time = -1h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.show = 0 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% Processor Time") as processor_time where index=winmetrics BY host \ | append [|inputlookup host_cpu_thresholds.csv | stats count by host]\ | stats count by host \ | fields - count\ | lookup host_cpu_thresholds.csv host\ | eval high_system_usage_Privileged_time=if(isnull(high_system_usage_Privileged_time),30,high_system_usage_Privileged_time)\ | eval high_system_usage_Processor_time=if(isnull(high_system_usage_Processor_time),80,high_system_usage_Processor_time)\ | eval high_interrupt_time=if(isnull(high_interrupt_time),20,high_interrupt_time)\ | eval high_DPC_time_with_processor_bottleneck_dpc_time=if(isnull(High_DPC_time_with_processor_bottleneck_dpc_time),15,High_DPC_time_with_processor_bottleneck_dpc_time)\ | eval high_DPC_time_with_processor_bottleneck_Processor_time=if(isnull(High_DPC_time_with_processor_bottleneck_Processor_time),80,High_DPC_time_with_processor_bottleneck_Processor_time)\ | eval high_DPC_Time=if(isnull(high_DPC_Time),20,high_DPC_Time)\ | eval high_processor_usage=if(isnull(high_processor_usage),80,high_processor_usage)\ | eval high_processor_queue=if(isnull(high_processor_queue),5,high_processor_queue)\ | eval active=if(isnull(active),"true",active)\ | fields - processor_time\ | outputlookup host_cpu_thresholds.csv [Host PageFile Threshold Lookup Create] action.email.useNSSubject = 1 alert.track = 0 dispatch.earliest_time = -24h@h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.tab = statistics display.visualizations.charting.chart = line display.visualizations.show = 0 request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% Usage") as Pagefile_Usage where index=winmetrics BY host instance span=15m \ | append [|inputlookup host_pagefile_thresholds.csv | stats count by host]\ | stats count by host \ | fields - count\ | lookup host_pagefile_thresholds.csv host\ | eval pagefile_high=if(isnull(pagefile_high),90,pagefile_high)\ | eval active=if(isnull(active),"true",active)\ | outputlookup host_pagefile_thresholds.csv [Host Network Threshold Lookup Create] action.email.useNSSubject = 1 alert.track = 0 dispatch.earliest_time = -24h@h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.tab = statistics display.visualizations.charting.chart = line display.visualizations.show = 0 request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("Bytes Sent/sec") as Bytes_Sent where index=winmetrics BY host instance \ | append [|inputlookup host_network_thresholds.csv | stats count by host]\ | stats count by host instance\ | fields - count\ | lookup host_network_thresholds.csv host instance\ | eval send_utilisation_high=if(isnull(send_utilisation_high),90,send_utilisation_high)\ | eval receive_utilisation_high=if(isnull(receive_utilisation_high),90,receive_utilisation_high)\ | eval packet_issues_packets_outbound_discarded=if(isnull(packet_issues_packets_outbound_discarded),0,packet_issues_packets_outbound_discarded)\ | eval packet_issues_packets_outbound_errors=if(isnull(packet_issues_packets_outbound_errors),0,packet_issues_packets_outbound_errors)\ | eval packet_issues_packets_received_discarded=if(isnull(packet_issues_packets_received_discarded),0,packet_issues_packets_received_discarded)\ | eval packet_issues_packets_received_errors=if(isnull(packet_issues_packets_received_errors),0,packet_issues_packets_received_errors)\ | eval active=if(isnull(active),"true",active)\ | outputlookup host_network_thresholds.csv [Host Disk Threshold Lookup Create] action.email.useNSSubject = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.track = 0 cron_schedule = 0 * * * * dispatch.earliest_time = -1h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.show = 0 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% Free Space") as perc_free_space where index=winmetrics BY host instance\ | append [|inputlookup host_cpu_thresholds.csv | stats count by host instance]\ | stats count by host instance\ | fields - count\ | lookup host_disk_thresholds.csv host instance\ | search instance=*: \ | eval low_disk_space_perc_free_space=if(isnull(low_disk_space_perc_free_space),10,low_disk_space_perc_free_space)\ | eval low_disk_space_free_MB=if(isnull(low_disk_space_free_MB),5000,low_disk_space_free_MB)\ | eval high_disk_queue=if(isnull(high_disk_queue),20,high_disk_queue)\ | eval high_latency=if(isnull(high_latency),50,high_latency)\ | eval active=if(isnull(active),"true",active)\ | fields - perc_free_space\ | outputlookup host_disk_thresholds.csv [PageFile Alert] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.digest_mode = 0 alert.suppress = 0 alert.suppress.fields = host alert.track = 1 counttype = number of events cron_schedule = 0 * * * * dispatch.earliest_time = -1h dispatch.latest_time = now display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% Usage") as Pagefile_Usage where index=winmetrics BY host instance span=15m \ | search instance=_Total \ | fields - instance\ | lookup host_pagefile_thresholds.csv host\ | eval pagefile_high=if(isnull(pagefile_high),90,pagefile_high)\ | eval alert_type=case(Pagefile_Usage > pagefile_high,"Pagefile High")\ | eval active=if(isnull(active),"true",active)\ | search alert_type=* active=true\ | mvexpand alert_type [Network Alerts] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.digest_mode = 0 alert.suppress = 0 alert.suppress.fields = host alert.track = 1 counttype = number of events cron_schedule = 0 * * * * dispatch.earliest_time = -1h dispatch.latest_time = now display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("Bytes Sent/sec") as Bytes_Sent avg("Bytes Received/sec") as Bytes_Received avg("Current Bandwidth") as Current_Bandwidth avg("Packets Outbound Discarded") as Packets_Outbound_Discarded avg("Packets Outbound Errors") as Packets_Outbound_Errors avg("Packets Received Discarded") as Packets_Received_Discarded avg("Packets Received Errors") as Packets_Received_Errors where index=winmetrics BY host instance span=15m\ | eval send_utilisation=round((bytes_sent/Current_Bandwidth)*100,5)\ | eval receive_utilisation=round((Bytes_Received/Current_Bandwidth)*100,5)\ | eval Packets_Outbound_Discarded=if(isnull(Packets_Outbound_Discarded),0,Packets_Outbound_Discarded)\ | eval Packets_Outbound_Errors=if(isnull(Packets_Outbound_Errors),0,Packets_Outbound_Errors)\ | eval Packets_Received_Discarded=if(isnull(Packets_Received_Discarded),0,Packets_Received_Discarded)\ | eval Packets_Received_Errors=if(isnull(Packets_Received_Errors),0,Packets_Received_Errors)\ | eval Bytes_Sent=if(isnull(Bytes_Sent),0,Bytes_Sent)\ | eval Bytes_Received=if(isnull(Bytes_Received),0,Bytes_Received)\ | lookup host_network_thresholds.csv host\ | eval send_utilisation_high=if(isnull(send_utilisation_high),90,send_utilisation_high)\ | eval receive_utilisation_high=if(isnull(receive_utilisation_high),90,receive_utilisation_high)\ | eval packet_issues_packets_outbound_discarded=if(isnull(packet_issues_packets_outbound_discarded),0,packet_issues_packets_outbound_discarded)\ | eval packet_issues_packets_outbound_errors=if(isnull(packet_issues_packets_outbound_errors),0,packet_issues_packets_outbound_errors)\ | eval packet_issues_packets_received_discarded=if(isnull(packet_issues_packets_received_discarded),0,packet_issues_packets_received_discarded)\ | eval packet_issues_packets_received_errors=if(isnull(packet_issues_packets_received_errors),0,packet_issues_packets_received_errors)\ | eval alert_type=case(send_utilisation > send_utilisation_high,"Send Utilisation High")\ | eval alert_type=if(isnull(alert_type),case(receive_utilisation > receive_utilisation_high,"Receive Utilisation High"),mvappend(case(receive_utilisation > receive_utilisation_high,"Receive Utilisation High"),alert_type))\ | eval alert_type=if(isnull(alert_type),case(Packets_Outbound_Discarded> packet_issues_packets_outbound_discarded OR Packets_Received_Discarded > packet_issues_packets_received_discarded, "Packet Discarded"),mvappend(case(Packets_Outbound_Discarded> packet_issues_packets_outbound_discarded OR Packets_Received_Discarded > packet_issues_packets_received_discarded,"Packets Discarded"),alert_type))\ | eval alert_type=if(isnull(alert_type),case(Packets_Outbound_Errors > packet_issues_packets_outbound_errors OR Packets_Received_Errors> packet_issues_packets_received_errors,"Packet Errors"),mvappend(case(Packets_Outbound_Errors > packet_issues_packets_outbound_errors OR Packets_Received_Errors> packet_issues_packets_received_errors,"Packet Errors"),alert_type))\ | eval active=if(isnull(active),"true",active)\ | search alert_type=* active=true\ | mvexpand alert_type [Update Host_Status] alert.track = 0 cron_schedule = 30 * * * * description = updates the host_status csv, monitors the last time the host updated metric data in Splunk dispatch.earliest_time = -60m@m dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.show = 0 enableSched = 1 request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats count(*) where index=winmetrics BY host span=1m\ | stats latest(_time) as _time by host\ | eval checkin_age=now()-_time\ | eval status=if(timer>900,"inactive","active")\ | append [|inputlookup host_status.csv]\ | eval removal=now()-_time\ | search removal<864000\ | table host _time checkin_age status\ | outputlookup host_status.csv [CPU Alerts] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.digest_mode = 0 alert.suppress = 0 alert.suppress.fields = host alert.track = 1 counttype = number of events cron_schedule = 0 * * * * dispatch.earliest_time = -1h dispatch.latest_time = now display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% DPC Time") as dpc_time avg("% Interrupt Time") as Interrupt_time avg("% User Time") as User_time avg("% Processor Time") as Processor_time avg("% Privileged Time") as Privileged_time where index=winmetrics BY host instance span=15m \ | eventstats dc(instance) as processor_count\ | join type=outer instance _time [|mstats avg("Processor Queue Length") as Processor_Queue_Length where index=winmetrics BY host instance span=15m \ | eval instance="_Total"]\ | search instance=_Total \ | eval queue_to_processor=(processor_count-1)/Processor_Queue_Length\ | fields - instance\ | lookup host_cpu_thresholds.csv host\ | eval high_system_usage_Privileged_time=if(isnull(high_system_usage_Privileged_time),1,high_system_usage_Privileged_time)\ | eval high_system_usage_Processor_time=if(isnull(high_system_usage_Processor_time),1,high_system_usage_Processor_time)\ | eval high_interrupt_time=if(isnull(high_interrupt_time),20,high_interrupt_time)\ | eval high_DPC_time_with_processor_bottleneck_dpc_time=if(isnull(High_DPC_time_with_processor_bottleneck_dpc_time),15,High_DPC_time_with_processor_bottleneck_dpc_time)\ | eval high_DPC_time_with_processor_bottleneck_Processor_time=if(isnull(High_DPC_time_with_processor_bottleneck_Processor_time),80,High_DPC_time_with_processor_bottleneck_Processor_time)\ | eval high_DPC_Time=if(isnull(high_DPC_Time),20,high_DPC_Time)\ | eval high_processor_usage=if(isnull(high_processor_usage),80,high_processor_usage)\ | eval high_processor_queue=if(isnull(high_processor_queue),5,high_processor_queue)\ | eval alert_type=case(Privileged_time > high_system_usage_Privileged_time AND Processor_time > high_system_usage_Processor_time,"High System Usage")\ | eval alert_type=if(isnull(alert_type),case(Interrupt_time > high_interrupt_time,"High Interrupt time"),mvappend(case(Interrupt_time > high_interrupt_time,"High Interrupt time"),alert_type))\ | eval alert_type=if(isnull(alert_type),case(dpc_time > high_DPC_time_with_processor_bottleneck_dpc_time AND Processor_time > high_DPC_time_with_processor_bottleneck_Processor_time,"High DPC Time with Processor bottleneck"),mvappend(case(dpc_time > high_DPC_time_with_processor_bottleneck_dpc_time AND Processor_time > high_DPC_time_with_processor_bottleneck_Processor_time,"High DPC Time with Processor bottleneck"),alert_type))\ | eval alert_type=if(isnull(alert_type),case(dpc_time > high_DPC_Time,"High DPC Time"),mvappend(case(dpc_time > high_DPC_Time,"High DPC Time"),alert_type))\ | eval alert_type=if(isnull(alert_type),case(Processor_time > high_processor_usage,"High Processor Usage"),mvappend(case(Processor_time > high_processor_usage,"High Processor Usage"),alert_type))\ | eval alert_type=if(isnull(alert_type),case(queue_to_processor> high_processor_queue,"High Processor Queue"),mvappend(case(queue_to_processor> high_processor_queue,"High Processor Queue"),alert_type))\ | eval active=if(isnull(active),"true",active)\ | search alert_type=* active=true\ | mvexpand alert_type [Disk Alert] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.digest_mode = 0 alert.suppress = 0 alert.track = 1 counttype = number of events cron_schedule = 0 * * * * dispatch.earliest_time = -1h dispatch.latest_time = now display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% Free Space") as perc_free_space avg("Free Megabytes") as Free_MB avg("Avg. Disk Queue Length") as Disk_queue avg("Avg. Disk sec/Transfer") as Latency where index=winmetrics BY host instance span=15m \ | search instance=*: \ | lookup host_disk_thresholds.csv host instance\ | eval low_disk_space_perc_free_space=if(isnull(low_disk_space_perc_free_space),10,low_disk_space_perc_free_space)\ | eval low_disk_space_free_MB=if(isnull(low_disk_space_free_MB),5000,low_disk_space_free_MB)\ | eval high_disk_queue=if(isnull(high_disk_queue),20,high_disk_queue)\ | eval high_latency=if(isnull(high_latency),50,high_latency)\ | eval alert_type=case(perc_free_space < low_disk_space_perc_free_space AND Free_MB < low_disk_space_free_MB,"Low Disk Space")\ | eval alert_type=if(isnull(alert_type),case(Disk_queue > high_disk_queue,"High Disk Queue"),mvappend(case(Disk_queue > high_disk_queue,"High Disk Queue"),alert_type))\ | eval alert_type=if(isnull(alert_type),case(Latency > high_latency,"High Latency"),mvappend(case(Latency > high_latency,"High Latency"),alert_type))\ | eval active=if(isnull(active),"true",active)\ | search alert_type=* active=true\ | mvexpand alert_type [Memory Alerts] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.digest_mode = 0 alert.suppress = 0 alert.track = 1 counttype = number of events cron_schedule = 0 * * * * dispatch.earliest_time = -1h dispatch.latest_time = now display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = | mstats avg("% Committed Bytes In Use") as perc_mem_used avg("Available MBytes") as Free_MBytes avg("Page Faults/sec") as Page_Faults avg("Page Reads/sec") as Page_Reads where `winmetrics_index` BY host instance span=15m\ | fields - instance\ | lookup host_memory_thresholds.csv host\ | eval high_memory_utilisation_free_MBytes=if(isnull(high_memory_utilisation_free_MBytes),1000,high_memory_utilisation_free_MBytes)\ | eval high_memory_utilisation_perc_mem_used=if(isnull(high_memory_utilisation_perc_mem_used),95,high_memory_utilisation_perc_mem_used)\ | eval high_hard_faults=if(isnull(high_hard_faults),1000,high_hard_faults)\ | eval alert_type=case(perc_mem_used > high_memory_utilisation_perc_mem_used AND Free_MBytes < high_memory_utilisation_free_MBytes,"High Memory Utilisation")\ | eval alert_type=if(isnull(alert_type),case(Page_Reads > high_hard_faults,"High Hard Faults"),mvappend(case(Page_Reads > high_hard_faults,"High Hard Faults"),alert_type))\ | eval active=if(isnull(active),"true",active)\ | search alert_type=* active=true\ | mvexpand alert_type [Create Service Monitors] action.email.useNSSubject = 1 alert.track = 0 description = Create services monitoring based on the check lookup dispatch.earliest_time = -24h@h dispatch.latest_time = now display.general.timeRangePicker.show = 0 display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.charting.chart = line display.visualizations.show = 0 request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = `winservice_index` source="service" sourcetype="WinHostMon" [|inputlookup service_check.csv | search check_enabled="TRUE" | return 9999 service_name] | stats count by host Type Name DisplayName State Status Started service_name | eval enabled="TRUE" | fields - count | outputlookup service_monitor.csv [Service Alerts] action.lookup = 1 action.lookup.append = 1 action.lookup.filename = distinct_alerts.csv alert.suppress = 0 alert.track = 1 counttype = number of events cron_schedule = 0 * * * * description = Alerts for when a Service is in a stopped state dispatch.earliest_time = -1h dispatch.latest_time = now display.general.type = statistics display.page.search.mode = fast display.page.search.tab = statistics display.visualizations.charting.chart = line enableSched = 1 quantity = 0 relation = greater than request.ui_dispatch_app = distinct_windows_performance request.ui_dispatch_view = search search = source="service" `winservice_index` sourcetype="WinHostMon" [|inputlookup service_monitor.csv | search enabled="TRUE" | return 9999 service_name host] | stats count by host Type Name DisplayName State Status Started service_name | fields - count | search State="Stopped" | eval alert_type="Service Alerts"