################################################# ## CPU Macros ################################################# ## Base Macros [os_index] definition = index=os [cp-unix-dashboards-internal-index] definition = index=_internal [cp-unix-dashboards-audit-index] definition = index=_audit [cp-unix-dashboards-unixsummary-index] definition = index=unix_summary [cp-unix-dashboards-firedalerts-index] definition = index=firedalerts [metadata_index] definition = `os_index` [cpu_sourcetype] definition = sourcetype=cpu [df_sourcetype] definition = sourcetype=df [hardware_sourcetype] definition = sourcetype=hardware [interfaces_sourcetype] definition = sourcetype=interfaces [iostat_sourcetype] definition = sourcetype=iostat [lastlog_sourcetype] definition = sourcetype=lastlog [lsof_sourcetype] definition = sourcetype=lsof [memory_sourcetype] definition = sourcetype=vmstat [netstat_sourcetype] definition = sourcetype=netstat [open_ports_sourcetype] definition = sourcetype=openPorts [package_sourcetype] definition = sourcetype=package [protocol_sourcetype] definition = sourcetype=protocol [ps_sourcetype] definition = sourcetype=ps [rlog_sourcetype] definition = sourcetype=rlog [syslog_sourcetype] definition = sourcetype=syslog [time_sourcetype] definition = sourcetype=time [top_sourcetype] definition = sourcetype=top [users_with_login_privs_sourcetype] definition = sourcetype=usersWithLoginPrivs [who_sourcetype] definition = sourcetype=who ##################### # utils/aliases ##################### [eval_host_group] definition = eval group=if(unix_group=="*", "default", unix_group) [group_add] definition = eventtype=groupadd OR eventtype="groupadd-suse" [group_del] definition = eventtype=groupdel [password_change] definition = eventtype=linux-password-change [password_change_failed] definition = eventtype=linux-password-change-failed [su_failed] definition = eventtype="Failed_SU" [syslog_errors] definition = error OR failed OR severe NOT assignment [unix_noop] definition = cowsshouldnteatcheesethatwouldmakethemsortofcannibalsinawaydontyouthink | head 1 [unix_errors] definition = eventtype=nix_errors [user_add] definition = eventtype=useradd OR eventtype="useradd-suse" [user_del] definition = eventtype=userdel # parse disk size of the format ||| where S is in {T,G,M} into a numeric value [parse_disk_size(1)] args = disk definition = case(substr($disk$, -2, 1) == "T", tonumber(substr($disk$, 1, len($disk$) - 2)) * 1024 * 1024, substr($disk$, -2, 1) == "G", tonumber(substr($disk$, 1, len($disk$) - 2)) * 1024, substr($disk$, -2, 1) == "M", tonumber(substr($disk$, 1, len($disk$) - 2)), substr($disk$, -1) == "T", tonumber(substr($disk$, 1, len($disk$) - 1)) * 1024 * 1024, substr($disk$, -1) == "G", tonumber(substr($disk$, 1, len($disk$) - 1)) * 1024, substr($disk$, -1) == "M", tonumber(substr($disk$, 1, len($disk$) - 1))) ##################### # nodes ##################### [unix_host_status] definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) | dedup host | `eval_host_group` | eval service="Node" | eval status=1 | eval symbol="nix" | table host service symbol group status [unix_hosts_status(2)] args = unix_category, unix_group definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | dedup host | `eval_host_group` | eval service="Node" | eval status=1 | eval symbol="nix" | table host service symbol group status [unix_hosts_details(2)] args = unix_category, unix_group definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | `eval_host_group` | multikv fields Avail Used rReq_PS wReq_PS | eval diskAvail=`parse_disk_size('Avail')` | eval diskUsed=`parse_disk_size('Used')` | fillnull value=NULL Filesystem | stats first(group) as group avg(pctIdle) as pctIdle avg(memUsedPct) as mem first(rReq_PS) as ioReads first(wReq_PS) as ioWrites first(diskAvail) as diskAvail first(diskUsed) as diskUsed by host Filesystem | stats first(group) as group first(pctIdle) as pctIdle first(mem) as mem first(ioReads) as ioReads first(ioWrites) as ioWrites sum(diskAvail) as diskAvail sum(diskUsed) as diskUsed by host | eval cpu=(100-pctIdle) | eval io=ioReads+ioWrites | eval disk=round(diskUsed/(diskUsed+diskAvail)*100,2) | table host group cpu mem io disk [unix_host_details] definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) | `eval_host_group` | multikv fields Avail Used rReq_PS wReq_PS | eval diskAvail=`parse_disk_size('Avail')` | eval diskUsed=`parse_disk_size('Used')` | fillnull value=NULL Filesystem | stats first(group) as group avg(pctIdle) as pctIdle avg(memUsedPct) as mem first(rReq_PS) as ioReads first(wReq_PS) as ioWrites first(diskAvail) as diskAvail first(diskUsed) as diskUsed by host Filesystem | stats first(group) as group first(pctIdle) as pctIdle first(mem) as mem first(ioReads) as ioReads first(ioWrites) as ioWrites sum(diskAvail) as diskAvail sum(diskUsed) as diskUsed by host | eval cpu=(100-pctIdle) | eval io=ioReads+ioWrites | eval disk=round(diskUsed/diskAvail*100,2) | table host group cpu mem io disk [unix_nodes_heatmap_cpu] definition = `os_index` `cpu_sourcetype` CPU="all" | eval heatmap = (100-pctIdle) | stats first(heatmap) as heatmap by host [unix_nodes_heatmap_mem] definition = `os_index` `memory_sourcetype` | stats avg(memUsedPct) as heatmap by host | eval heatmap = round(heatmap, 2) [unix_nodes_heatmap_disk] definition = `os_index` `df_sourcetype` | multikv fields Avail Used | `convert_memk(Avail, avail)` | `convert_memk(Used, used)` | stats first(avail) as avail first(used) as used by host Filesystem | stats sum(avail) as avail sum(used) as used by host | eval heatmap = round(((used / (used + avail)) * 100),2) | fields host heatmap [unix_nodes_heatmap_io] definition = `os_index` `iostat_sourcetype` | multikv fields rReq_PS wReq_PS | stats first(rReq_PS) as reads first(wReq_PS) as writes by host | eval heatmap = reads + writes | fields host heatmap [unix_nodes_detail_specs_cpu_by_host(1)] args = host definition = `os_index` `cpu_sourcetype` host=$host$ NOT cpu="all" | append [stats count | eval _raw="no results"] | head 100 | eval name="CPU:" | stats first(name) as name max(CPU) as cpus | eval cpus=if(isnum(cpus), tostring(cpus + 1) + " cores", "unknown - is cpu.sh enabled?") [unix_nodes_detail_specs_mem_by_host(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | head 10 | fillnull TotalMBytes value=0 | stats first(TotalMBytes) as mem | eval name = "RAM:" | fillnull mem value=0 | eval mem=if(mem=0, "unknown - is vmstat.sh enabled?", mem) | eval mem = if(isnum(mem), mem . " MB", mem) | fields name mem [unix_nodes_detail_specs_disk_drives_by_host(1)] args = host definition = `os_index` `df_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | stats dc(Filesystem) as drives | eval name="Disk:" | fillnull drives value=0 | eval drives=if(drives=0, "unknown - is df.sh enabled?", tostring(drives) . " drives") | fields name drives [unix_nodes_detail_specs_disk_cap_by_host(1)] args = host definition =`os_index` `df_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | `convert_memk(Avail, cap)` | fillnull Filesystem value=NULL | stats first(cap) as cap by Filesystem | fillnull cap value=0 | stats sum(cap) as cap | eval name="Capacity:" | eval cap=if(cap=0, "unknown - is df.sh enabled?", round(cap/(1024*1024),0) . " GB") | fields name cap [unix_nodes_detail_status_cpu_by_host(1)] args = host definition = `os_index` `cpu_sourcetype` host=$host$ | eval Percent_CPU_Load = 100 - pctIdle | stats avg(Percent_CPU_Load) as cpu by host [unix_nodes_detail_status_mem_by_host(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | stats avg(memUsedPct) as mem by host [unix_nodes_detail_status_disk_by_host(1)] args = host definition = `os_index` `df_sourcetype` host=$host$ | multikv fields Avail Used | `convert_memk(Avail, avail)` | `convert_memk(Used, used)` | stats first(avail) as avail first(used) as used by host Filesystem | stats sum(avail) as avail sum(used) as used by host | eval disk = round(used/(used+avail)*100, 2) | fields host disk [unix_nodes_detail_cpu_sparkline_by_host_1h(1)] args = host definition = `os_index` `cpu_sourcetype` host=$host$ CPU="all" | append [stats count | eval _raw="no results" ] | eval used = 100 - pctIdle | eval name = "CPU:" | stats first(name) as name avg(used) as used sparkline(avg(used), 2m) as sl | eval used = round(used, 0) . "%" | fillnull used value="unknown - is cpu.sh enabled?" iseval = 0 [unix_nodes_detail_mem_sparkline_by_host_1h(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | eval name = "Memory:" | stats first(name) as name avg(memUsedPct) as used sparkline(avg(memUsedPct), 2m) as sl | eval used = round(used, 0) . "%" | fillnull used value="unknown - is vmstat.sh enabled?" iseval = 0 [unix_nodes_detail_disk_sparkline_by_host_1h(1)] args = host definition = `os_index` `df_sourcetype` host=$host$ | append [stats count | eval _raw="no results" | eval _time=now()] | `convert_memk(Avail, avail)` | `convert_memk(Used, used)` | fillnull used value=0 | fillnull avail value=0 | fillnull Filesystem value="no results" | bucket span=5m _time | stats avg(used) as used avg(avail) as avail by Filesystem _time | stats sum(used) as used sum(avail) as avail by _time | eval used_perc_no_unit = round(used/(used+avail)*100, 0) | eval used_perc = used_perc_no_unit . "%" | eval name = "Disk:" | stats first(name) as name first(used_perc) as used sparkline(max(used_perc_no_unit), 5m) as sl | fillnull used value="--%" iseval = 0 [unix_nodes_detail_top_processes_by_host(1)] args = host definition = `os_index` sourcetype=top host=$host$ | stats max(pctCPU) as pctCPU max(pctMEM) as pctMEM last(cpuTIME) as cpuTIME by COMMAND, USER | eval CMD=COMMAND | fields CMD, USER, pctCPU, pctMEM, cpuTIME ### Multiple Host Macro's [Percent_CPU_by_Host(1)] args = host definition = `os_index` `cpu_sourcetype` host=$host$ | eval Percent_CPU_Load = 100 - pctIdle | timechart avg(Percent_CPU_Load) by host [Percent_Load_by_Host(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | timechart avg(loadAvg1mi) by host [Top_5_CPU_Processes_by_Host(1)] args = host definition = `os_index` `top_sourcetype` host=$host$ | stats max(pctCPU) as maxCPU by host, COMMAND, _time | sort -maxCPU | dedup 5 host [Number_Threads_by_Host(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | timechart avg(threads) by host [Number_Processes_by_Host(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | timechart avg(processes) by host ### Single Host Macros [CPU_Usage_by_Command_for_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | stats sum(pctCPU) as pctCPU by _time,COMMAND | timechart avg(pctCPU) by COMMAND [CPU_Usage_by_State_for_Host(1)] args = host definition = `os_index` `cpu_sourcetype` host=$host$ | eval pctNice=if(isnull(pctNice), 0, pctNice) | timechart avg(pctUser), avg(pctNice), avg(pctSystem), avg(pctIowait), avg(pctIdle) [Stats_for_CPU_State_by_Host(1)] args = host definition = `os_index` `cpu_sourcetype` host=$host$ | stats avg(pctUser), avg(pctSystem), avg(pctIdle) by host [Top_CPU_Processes_for_Host(1)] args = host definition = `os_index` `top_sourcetype` host=$host$ | stats max(pctCPU) as maxCPU by host, COMMAND, _time | sort -maxCPU [CPU_Usage_by_User_for_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | timechart avg(pctCPU) by USER [Top_CPU_Users_for_Host(1)] args = host definition = `os_index` `top_sourcetype` host=$host$ | stats sum(pctCPU) as Total_CPU_Time by host, USER, | sort -Total_CPU_Time [CPU_Sum_by_Command_for_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | bin _time | stats avg(pctCPU) as pctCPU by _time, PID, COMMAND, host | timechart sum(pctCPU) by COMMAND #################################################### ## Memory Macros #################################################### ### Single Host Macros [Mem_Usage_for_Host(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | timechart median(memFreePct) as Percent_Mem_Free, median(memUsedPct) as Percent_Mem_Used, median(swapUsedPct) as Percent_Swap [Mem_Usage_by_Command_for_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | timechart eval(median(RSZ_KB)/1024) as ResidentMB by COMMAND [Top_Mem_Command_for_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | eval VSZ_MB=VSZ_KB/1024 | stats max(RSZ_MB) as Resident_MB, max(VSZ_MB) as Virtual_MB, max(pctMEM) as Percent_Memory by host, COMMAND, _time | dedup COMMAND | sort -Resident_MB [Top_Users_of_VM_for_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | eval time=_time | timechart eval(sum(RSZ_MB)/dc(time)) as Avg_Mem_Usage by USER useother=F limit=10 ### Multiple Host Macros [Percent_MEM_by_Host(1)] args = host definition = `os_index` `memory_sourcetype` host=$host$ | timechart median(memUsedPct) by host [Top_Mem_Processes_by_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | eval VSZ_MB=VSZ_KB/1024 | stats median(RSZ_MB) by VSZ_MB, host, COMMAND, _time | dedup 1 host, COMMAND sortby -median(RSZ_MB) [Memory_Hardware_by_Host(1)] args = host definition = `os_index` `hardware_sourcetype` earliest=-1d host=$host$ | dedup host | eval k=if(unit=="kB",1024,1) | eval RealMemoryMB = RealMemoryMB/k | eval SwapMemoryMB = SwapMemoryMB/k | fields + RealMemoryMB, SwapMemoryMB, host | chart max(RealMemoryMB) as Real_Memory_MB, max(SwapMemoryMB) as Swap_Memory_MB by host [Top_Memory_Users_by_Command_by_Host(1)] args = host definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | stats max(RSZ_MB) by USER, COMMAND| sort -max(RSZ_MB) ##################################################### ## Network Macros ##################################################### [Thruput_by_Interface_by_Host(1)] args = host definition = `os_index` `interfaces_sourcetype` host=$host$ | streamstats current=f last(TXbytes) as lastTX, last(RXbytes) as lastRX by Name | eval time=_time | strcat Name "-" inetAddr "@" host Interface_Host | eval RX_Thruput_KB = (lastRX-RXbytes)/1024 | eval TX_Thruput_KB = (lastTX-TXbytes)/1024 | timechart eval(sum(TX_Thruput_KB)/dc(time)) by Interface_Host [Top_Inet_Addresses_by_Host(1)] args = host definition = `os_index` `interfaces_sourcetype` host=$host$ | strcat Name "-" inetAddr "@" host Interface_Host | top Interface_Host limit=20 [Open_Ports_by_Host(1)] args = host definition = `os_index` `open_ports_sourcetype` host=$host$ | head 1 | multikv fields Proto, Port | eval PortOverProto = case(Port == "8089", "Splunk Management Port", Port == "8000", "Splunk HTTP Port", Port == "21", "ftp", Port == "22", "ssh", Port == "23", "telnet", Port == "25", "smtp", Port == "69", "tftp", Port == "79", "finger", Port == "80", "http", Port == "88", "kerberos", Port == "143", "imap", Port == "161", "snmp", Port == "162", "snmptrap", Port == "179", "bgp", Port == "1521", "SQL*Net", 1==1, Port." / ".Proto) | chart count by PortOverProto | sort count desc | rename count as "# of Connections Accepted" [Addresses_by_Host(1)] args = host definition = `os_index` `netstat_sourcetype` host=$host$ | head 1 | multikv fields ForeignAddress | rex field=ForeignAddress "(?^.*)[:\.].+$" | fields + hostOnly | where hostOnly != "*" | rename hostOnly as Address | chart count by Address | sort count desc | rename count as "# of Connections to This Address" [Sockets_by_State_by_Host(1)] args = host definition = `os_index` `netstat_sourcetype` host=$host$ | multikv fields State | search NOT (State="") | timechart count by State [Frequently_Open_Ports_by_Host(1)] args = host definition = `os_index` `open_ports_sourcetype` | multikv | stats count by Port | sort count ##################################################### ## Disk Macros ##################################################### [Disk_Used_Pct_by_Host(1)] args = host definition = `os_index` `df_sourcetype` host=$host$ | strcat host '@' Filesystem Host_FileSystem | timechart avg(UsePct) by Host_FileSystem | rename avg(UsePct) as %Used [Latest_Disk_Used_by_Host(1)] args = host definition = `os_index` `df_sourcetype` | dedup host [Max_Disk_Used_by_Host(1)] args = host definition = `os_index` `df_sourcetype` host=$host$ | fields UsePct, Filesystem |sort 1 UsePct, Filesystem, host [Open_Files_by_Command_and_Host(1)] args = host definition = `os_index` `lsof_sourcetype` host=$host$ | eval time=_time| timechart eval(count/dc(time)) by COMMAND [Open_Files_by_Type_and_Host(1)] args = host definition = `os_index` `lsof_sourcetype` host=$host$ | eval time =_time | timechart eval(count/dc(time)) by TYPE [Open_Files_by_User_and_Host(1)] args = host definition = `os_index` `lsof_sourcetype` host=$host$ | eval time=_time | timechart eval(count/dc(time)) by USER ##################################################### ## User Macros ##################################################### [User_Sessions_by_Host(1)] args = host definition = `os_index` `who_sourcetype` host=$host$ | fields USERNAME, LINE, TIME, _time | dedup USERNAME, LINE, TIME sortby +_time | sort -_time [Failed_Logins_by_Host(1)] args = host definition = `os_index` eventtype=failed_login host=$host$ [Users_with_Login_Privs_by_Host(1)] args = host definition = `os_index` `users_with_login_privs_sourcetype` host=$host$ | dedup host ##################################################### ## Alerts ##################################################### [CPU_Exceeds_Percent_by_Host(1)] args = threshold definition = `os_index` `cpu_sourcetype` host=* CPU="all" | stats avg(pctIdle) as pctIdle by host | eval Percent_CPU_Load = 100 - pctIdle | where Percent_CPU_Load > $threshold$ | eval title="CPU_Exceeds_Percent_by_Host" | `unix_alert_decoration` | fields Percent_CPU_Load, host, hosts, host_count, severity, sid, time_fired, _time [CPU_Under_Percent_by_Host(1)] args = threshold definition = `os_index` `cpu_sourcetype` host=* CPU="all" | stats avg(pctIdle) as pctIdle by host | eval Percent_CPU_Load = 100 - pctIdle | where Percent_CPU_Load < $threshold$ | eval title="CPU_Under_Percent_by_Host" | `unix_alert_decoration` | fields Percent_CPU_Load, host, hosts, host_count, severity, sid, time_fired, _time [Load_Exceeds_by_Host(1)] args = threshold definition = `os_index` `memory_sourcetype` host=* | stats avg(loadAvg1mi) as Load_Avg_One_Minute by host | where Load_Avg_One_Minute > $threshold$ | eval title="Load_Exceeds_by_Host" | `unix_alert_decoration` | fields Load_Avg_One_Minute, host, hosts, host_count, severity, sid, time_fired, _time [Threads_Exceeds_by_Host(1)] args = threshold definition = `os_index` `memory_sourcetype` host=* | stats avg(threads) as Total_Threads by host | where Total_Threads > $threshold$ | eval title="Threads_Exceeds_by_Host" | `unix_alert_decoration` | fields Total_Threads, host, hosts, host_count, severity, sid, time_fired, _time [Processes_Exceeds_by_Host(1)] args = threshold definition = `os_index` `memory_sourcetype` host=* | stats avg(processes) as Total_Processes by host | where Total_Processes > $threshold$ | eval title="Processes_Exceeds_by_Host" | `unix_alert_decoration` | fields Total_Processes, host, hosts, host_count, severity, sid, time_fired, _time [Memory_Exceeds_MB_by_Process(1)] args = threshold definition = `os_index` `ps_sourcetype` host=* | stats first(RSZ_KB) as ResidentMB by COMMAND, host | eval ResidentMB = (ResidentMB/1024) | where ResidentMB > $threshold$ | eval title="Memory_Exceeds_MB_by_Process" | `unix_alert_decoration` | fields ResidentMB, COMMAND, host, hosts, host_count, severity, sid, time_fired, _time [Memory_Exceeds_Percent_by_Host(1)] args = threshold definition = `os_index` `memory_sourcetype` host=* | stats first(memUsedPct) as UsedMemoryPercent by host | where UsedMemoryPercent > $threshold$ | eval title="Memory_Exceeds_Percent_by_Host" | `unix_alert_decoration` | fields UsedMemoryPercent, host, hosts, host_count, severity, sid, time_fired, _time [Memory_Exceeds_MB_by_Host(1)] args = threshold definition = `os_index` `memory_sourcetype` host=* | stats first(memUsedMB) as UsedMemoryMB by host | where UsedMemoryMB > $threshold$ | eval title="Memory_Exceeds_MB_by_Host" | `unix_alert_decoration` | fields UsedMemoryMB, host, hosts, host_count, severity, sid, time_fired, _time [Disk_Used_Exceeds_Percent_by_Host(1)] args = threshold definition = `os_index` `df_sourcetype` host=* | stats first(PercentUsedSpace) as PercentUsed, values(Type) as Type, values(Size) as Size, values(Used) as Used, values(Avail) as Avail, values(MountedOn) as MountedOn by Filesystem, host | where PercentUsed > $threshold$ | eval title="Disk_Used_Exceeds_Percent_by_Host" | `unix_alert_decoration` | fields Filesystem, Type, Size, Used, Avail, PercentUsed, MountedOn, host, hosts, host_count, severity, sid, time_fired, _time [Open_Files_Exceeds_by_Process(1)] args = threshold definition = `os_index` `lsof_sourcetype` host=* | stats count by _time, PID, COMMAND, host | stats first(count) as OpenFiles by PID, COMMAND, host | where OpenFiles > $threshold$ | eval title="Open_Files_Exceeds_by_Process" | `unix_alert_decoration` | fields OpenFiles, PID, COMMAND, host, hosts, host_count, severity, sid, time_fired, _time [IO_Wait_Exceeds_Threshold(1)] args = threshold definition = `os_index` `iostat_sourcetype` | stats avg(avgWaitMillis) as wait_time_millis by host | where wait_time_millis > $threshold$ | eval title="IO_Wait_Exceeds_Threshold" | `unix_alert_decoration` | fields wait_time_millis, host, hosts, host_count, severity, sid, time_fired, _time [IO_Utilization_Exceeds_Threshold(1)] args = threshold definition = `os_index` `iostat_sourcetype` | stats avg(bandwUtilPct) as bandwidth_util by host | where bandwidth_util > $threshold$ | eval title="IO_Utilization_Exceeds_Threshold" | `unix_alert_decoration` | fields bandwidth_util, host, hosts, host_count, severity, sid, time_fired, _time ##################################################### ## Alert Helpers ##################################################### [unix_alert_decoration] definition = join title [rest /services/saved/searches | rename alert.severity as severity | fields title severity] | eventstats values(host) as hosts | eventstats dc(host) as host_count | eval time_fired = now() | addinfo | eval _time = time_fired | rename info_sid as sid ##################################################### ## Alert Thresholds ##################################################### [_unix_alert_threshold_CPU_Exceeds_Percent_by_Host] definition = 99 [_unix_alert_threshold_CPU_Under_Percent_by_Host] definition = 30 [_unix_alert_threshold_Load_Exceeds_by_Host] definition = 5 [_unix_alert_threshold_Threads_Exceeds_by_Host] definition = 250 [_unix_alert_threshold_Processes_Exceeds_by_Host] definition = 100 [_unix_alert_threshold_Memory_Exceeds_Percent_by_Host] definition = 90 [_unix_alert_threshold_Memory_Exceeds_MB_by_Process] definition = 1024 [_unix_alert_threshold_Memory_Exceeds_MB_by_Host] definition = 2048 [_unix_alert_threshold_Disk_Used_Exceeds_Percent_by_Host] definition = 90 [_unix_alert_threshold_Open_Files_Exceeds_by_Process] definition = 200 [_unix_alert_threshold_IO_Wait_Exceeds_Threshold] definition = 10 [_unix_alert_threshold_IO_Utilization_Exceeds_Threshold] definition = 95 ##################################################### ## View Macros ##################################################### [alert_detail_cpu_basesearch(1)] args = HOST definition = `os_index` `cpu_sourcetype` host=$HOST$ | search CPU="all" | convert mktime(_time) as time | table _time, time, host, sourcetype, pctUser, pctSystem [alert_detail_memory_basesearch(1)] args = HOST definition = `os_index` `memory_sourcetype` host=$HOST$ | convert mktime(_time) as time | eval memUsed=memUsedMB*1000000 | eval memFree=memFreeMB*1000000 | table _time, time, sourcetype, host, memFreePct, memUsedPct, memFree, memUsed, processes, threads [alert_detail_ps_basesearch(1)] args = HOST definition = `os_index` `ps_sourcetype` host=$HOST$ | eval RSZ=RSZ_KB * 1024 | eval VSZ=VSZ_KB * 1024 | convert mktime(_time) as time | table _time, time, USER, PID, PSR, pctCPU, CPUTIME, pctMEM, RSZ, VSZ, S, ELAPSED, COMMAND, ARGS [alert_detail_netstat_basesearch(1)] args = HOST definition = `os_index` `netstat_sourcetype` host=$HOST$ | convert mktime(_time) as time | table _time, time, Proto, Recv_Q, Send_Q, LocalAddress, ForeignAddress, State [alert_detail_lsof_basesearch(1)] args = HOST definition = `os_index` `lsof_sourcetype` host=$HOST$ | convert mktime(_time) as time | table _time, time, COMMAND, PID, USER, FD, TYPE, DEVICE, NAME, SIZE [alert_events] definition = `cp-unix-dashboards-unixsummary-index` | stats first(hosts) as hosts first(host_count) as host_count by severity sid time_fired search_name | rename time_fired as trigger_time | rename search_name as ss_name | join ss_name [| rest /services/unix/alert_overlay splunk_server=local| fillnull value="" business_impact description escalation remediation | fields - eai:* | rename title as ss_name ] | eval severity=case(severity==1, "Low", severity==2, "Medium", severity==3, "Medium", severity==4, "Medium", severity==5, "High") | makemv hosts | mvexpand hosts | sort +trigger_time ##################################################### ## Home View Macros ##################################################### [home_cpu_idle(2)] args = unix_category, unix_group definition = `os_index` `cpu_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] |mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = 100-pctIdle | eval pctFree = pctIdle | stats median(pctFree) as metric by unix_group unix_category [home_cpu_used(2)] args = unix_category, unix_group definition = `os_index` `cpu_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] |mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = 100-pctIdle | eval pctFree = pctIdle | stats median(pctUsed) as metric by unix_group unix_category [home_memory(2)] args = unix_category, unix_group definition = `os_index` `memory_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = memUsedPct | eval pctFree = memFreePct | stats median(pctUsed) as metric by unix_group unix_category [home_memory_free(2)] args = unix_category, unix_group definition = `os_index` `memory_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = memUsedPct | eval pctFree = memFreePct | stats median(pctFree) as metric by unix_group unix_category [home_disk_used(2)] args = unix_category, unix_group definition = `os_index` `df_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | strcat host '@' Filesystem Host_FileSystem | stats avg(PercentUsedSpace) as metric by unix_group unix_category [home_disk_free(2)] args = unix_category, unix_group definition = `os_index` `df_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | strcat host '@' Filesystem Host_FileSystem | stats avg(PercentFreeSpace) as metric by unix_group unix_category # [home_network_thruput(2)] # args = category, group # definition = `os_index` `interfaces_sourcetype` $category$ $group$ | streamstats current=f last(TXbytes) as lastTX, last(RXbytes) as lastRX by Name | eval time=_time | strcat Name "-" inetAddr "@" host Interface_Host | eval RX_Thruput_KB = (lastRX-RXbytes)/1024 | eval TX_Thruput_KB = (lastTX-TXbytes)/1024 | stats eval(sum(TX_Thruput_KB)/dc(time)) by group category # ##################################################### ## Utility Macros ##################################################### # `convert_memk(sizeField, bytesField)` == `convert memk(sizeField) as bytesField` # but it also supports terabytes, which the regular command does not. (SPL-95079) [convert_memk(2)] args = sizeField, bytesField definition = rex field=$sizeField$ "(?<_terabytes>.*)[tT]" | eval _size=if(_terabytes >= 0, (_terabytes*1024)."G", $sizeField$) | convert memk(_size) as $bytesField$