You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
529 lines
29 KiB
529 lines
29 KiB
#################################################
|
|
## CPU Macros
|
|
#################################################
|
|
|
|
## Base Macros
|
|
|
|
[os_index]
|
|
definition = index=os
|
|
|
|
[cp-unix-dashboards-internal-index]
|
|
definition = index=_internal
|
|
|
|
[cp-unix-dashboards-audit-index]
|
|
definition = index=_audit
|
|
|
|
[cp-unix-dashboards-unixsummary-index]
|
|
definition = index=unix_summary
|
|
|
|
[cp-unix-dashboards-firedalerts-index]
|
|
definition = index=firedalerts
|
|
|
|
[metadata_index]
|
|
definition = `os_index`
|
|
|
|
[cpu_sourcetype]
|
|
definition = sourcetype=cpu
|
|
|
|
[df_sourcetype]
|
|
definition = sourcetype=df
|
|
|
|
[hardware_sourcetype]
|
|
definition = sourcetype=hardware
|
|
|
|
[interfaces_sourcetype]
|
|
definition = sourcetype=interfaces
|
|
|
|
[iostat_sourcetype]
|
|
definition = sourcetype=iostat
|
|
|
|
[lastlog_sourcetype]
|
|
definition = sourcetype=lastlog
|
|
|
|
[lsof_sourcetype]
|
|
definition = sourcetype=lsof
|
|
|
|
[memory_sourcetype]
|
|
definition = sourcetype=vmstat
|
|
|
|
[netstat_sourcetype]
|
|
definition = sourcetype=netstat
|
|
|
|
[open_ports_sourcetype]
|
|
definition = sourcetype=openPorts
|
|
|
|
[package_sourcetype]
|
|
definition = sourcetype=package
|
|
|
|
[protocol_sourcetype]
|
|
definition = sourcetype=protocol
|
|
|
|
[ps_sourcetype]
|
|
definition = sourcetype=ps
|
|
|
|
[rlog_sourcetype]
|
|
definition = sourcetype=rlog
|
|
|
|
[syslog_sourcetype]
|
|
definition = sourcetype=syslog
|
|
|
|
[time_sourcetype]
|
|
definition = sourcetype=time
|
|
|
|
[top_sourcetype]
|
|
definition = sourcetype=top
|
|
|
|
[users_with_login_privs_sourcetype]
|
|
definition = sourcetype=usersWithLoginPrivs
|
|
|
|
[who_sourcetype]
|
|
definition = sourcetype=who
|
|
|
|
#####################
|
|
# utils/aliases
|
|
#####################
|
|
|
|
[eval_host_group]
|
|
definition = eval group=if(unix_group=="*", "default", unix_group)
|
|
|
|
[group_add]
|
|
definition = eventtype=groupadd OR eventtype="groupadd-suse"
|
|
|
|
[group_del]
|
|
definition = eventtype=groupdel
|
|
|
|
[password_change]
|
|
definition = eventtype=linux-password-change
|
|
|
|
[password_change_failed]
|
|
definition = eventtype=linux-password-change-failed
|
|
|
|
[su_failed]
|
|
definition = eventtype="Failed_SU"
|
|
|
|
[syslog_errors]
|
|
definition = error OR failed OR severe NOT assignment
|
|
|
|
[unix_noop]
|
|
definition = cowsshouldnteatcheesethatwouldmakethemsortofcannibalsinawaydontyouthink | head 1
|
|
|
|
[unix_errors]
|
|
definition = eventtype=nix_errors
|
|
|
|
[user_add]
|
|
definition = eventtype=useradd OR eventtype="useradd-suse"
|
|
|
|
[user_del]
|
|
definition = eventtype=userdel
|
|
|
|
# parse disk size of the format <S>|<SB>|<Sb>|<Si> where S is in {T,G,M} into a numeric value
|
|
[parse_disk_size(1)]
|
|
args = disk
|
|
definition = case(substr($disk$, -2, 1) == "T", tonumber(substr($disk$, 1, len($disk$) - 2)) * 1024 * 1024, substr($disk$, -2, 1) == "G", tonumber(substr($disk$, 1, len($disk$) - 2)) * 1024, substr($disk$, -2, 1) == "M", tonumber(substr($disk$, 1, len($disk$) - 2)), substr($disk$, -1) == "T", tonumber(substr($disk$, 1, len($disk$) - 1)) * 1024 * 1024, substr($disk$, -1) == "G", tonumber(substr($disk$, 1, len($disk$) - 1)) * 1024, substr($disk$, -1) == "M", tonumber(substr($disk$, 1, len($disk$) - 1)))
|
|
|
|
#####################
|
|
# nodes
|
|
#####################
|
|
[unix_host_status]
|
|
definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) | dedup host | `eval_host_group` | eval service="Node" | eval status=1 | eval symbol="nix" | table host service symbol group status
|
|
|
|
[unix_hosts_status(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | dedup host | `eval_host_group` | eval service="Node" | eval status=1 | eval symbol="nix" | table host service symbol group status
|
|
|
|
[unix_hosts_details(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | `eval_host_group` | multikv fields Avail Used rReq_PS wReq_PS | eval diskAvail=`parse_disk_size('Avail')` | eval diskUsed=`parse_disk_size('Used')` | fillnull value=NULL Filesystem | stats first(group) as group avg(pctIdle) as pctIdle avg(memUsedPct) as mem first(rReq_PS) as ioReads first(wReq_PS) as ioWrites first(diskAvail) as diskAvail first(diskUsed) as diskUsed by host Filesystem | stats first(group) as group first(pctIdle) as pctIdle first(mem) as mem first(ioReads) as ioReads first(ioWrites) as ioWrites sum(diskAvail) as diskAvail sum(diskUsed) as diskUsed by host | eval cpu=(100-pctIdle) | eval io=ioReads+ioWrites | eval disk=round(diskUsed/(diskUsed+diskAvail)*100,2) | table host group cpu mem io disk
|
|
|
|
[unix_host_details]
|
|
definition = `os_index` ((`cpu_sourcetype` CPU="all") OR `memory_sourcetype` OR `df_sourcetype` OR `iostat_sourcetype`) | `eval_host_group` | multikv fields Avail Used rReq_PS wReq_PS | eval diskAvail=`parse_disk_size('Avail')` | eval diskUsed=`parse_disk_size('Used')` | fillnull value=NULL Filesystem | stats first(group) as group avg(pctIdle) as pctIdle avg(memUsedPct) as mem first(rReq_PS) as ioReads first(wReq_PS) as ioWrites first(diskAvail) as diskAvail first(diskUsed) as diskUsed by host Filesystem | stats first(group) as group first(pctIdle) as pctIdle first(mem) as mem first(ioReads) as ioReads first(ioWrites) as ioWrites sum(diskAvail) as diskAvail sum(diskUsed) as diskUsed by host | eval cpu=(100-pctIdle) | eval io=ioReads+ioWrites | eval disk=round(diskUsed/diskAvail*100,2) | table host group cpu mem io disk
|
|
|
|
[unix_nodes_heatmap_cpu]
|
|
definition = `os_index` `cpu_sourcetype` CPU="all" | eval heatmap = (100-pctIdle) | stats first(heatmap) as heatmap by host
|
|
|
|
[unix_nodes_heatmap_mem]
|
|
definition = `os_index` `memory_sourcetype` | stats avg(memUsedPct) as heatmap by host | eval heatmap = round(heatmap, 2)
|
|
|
|
[unix_nodes_heatmap_disk]
|
|
definition = `os_index` `df_sourcetype` | multikv fields Avail Used | `convert_memk(Avail, avail)` | `convert_memk(Used, used)` | stats first(avail) as avail first(used) as used by host Filesystem | stats sum(avail) as avail sum(used) as used by host | eval heatmap = round(((used / (used + avail)) * 100),2) | fields host heatmap
|
|
|
|
[unix_nodes_heatmap_io]
|
|
definition = `os_index` `iostat_sourcetype` | multikv fields rReq_PS wReq_PS | stats first(rReq_PS) as reads first(wReq_PS) as writes by host | eval heatmap = reads + writes | fields host heatmap
|
|
|
|
[unix_nodes_detail_specs_cpu_by_host(1)]
|
|
args = host
|
|
definition = `os_index` `cpu_sourcetype` host=$host$ NOT cpu="all" | append [stats count | eval _raw="no results"] | head 100 | eval name="CPU:" | stats first(name) as name max(CPU) as cpus | eval cpus=if(isnum(cpus), tostring(cpus + 1) + " cores", "unknown - is cpu.sh enabled?")
|
|
|
|
[unix_nodes_detail_specs_mem_by_host(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | head 10 | fillnull TotalMBytes value=0 | stats first(TotalMBytes) as mem | eval name = "RAM:" | fillnull mem value=0 | eval mem=if(mem=0, "unknown - is vmstat.sh enabled?", mem) | eval mem = if(isnum(mem), mem . " MB", mem) | fields name mem
|
|
|
|
[unix_nodes_detail_specs_disk_drives_by_host(1)]
|
|
args = host
|
|
definition = `os_index` `df_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | stats dc(Filesystem) as drives | eval name="Disk:" | fillnull drives value=0 | eval drives=if(drives=0, "unknown - is df.sh enabled?", tostring(drives) . " drives") | fields name drives
|
|
|
|
[unix_nodes_detail_specs_disk_cap_by_host(1)]
|
|
args = host
|
|
definition =`os_index` `df_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | `convert_memk(Avail, cap)` | fillnull Filesystem value=NULL | stats first(cap) as cap by Filesystem | fillnull cap value=0 | stats sum(cap) as cap | eval name="Capacity:" | eval cap=if(cap=0, "unknown - is df.sh enabled?", round(cap/(1024*1024),0) . " GB") | fields name cap
|
|
|
|
[unix_nodes_detail_status_cpu_by_host(1)]
|
|
args = host
|
|
definition = `os_index` `cpu_sourcetype` host=$host$ | eval Percent_CPU_Load = 100 - pctIdle | stats avg(Percent_CPU_Load) as cpu by host
|
|
|
|
[unix_nodes_detail_status_mem_by_host(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | stats avg(memUsedPct) as mem by host
|
|
|
|
[unix_nodes_detail_status_disk_by_host(1)]
|
|
args = host
|
|
definition = `os_index` `df_sourcetype` host=$host$ | multikv fields Avail Used | `convert_memk(Avail, avail)` | `convert_memk(Used, used)` | stats first(avail) as avail first(used) as used by host Filesystem | stats sum(avail) as avail sum(used) as used by host | eval disk = round(used/(used+avail)*100, 2) | fields host disk
|
|
|
|
[unix_nodes_detail_cpu_sparkline_by_host_1h(1)]
|
|
args = host
|
|
definition = `os_index` `cpu_sourcetype` host=$host$ CPU="all" | append [stats count | eval _raw="no results" ] | eval used = 100 - pctIdle | eval name = "CPU:" | stats first(name) as name avg(used) as used sparkline(avg(used), 2m) as sl | eval used = round(used, 0) . "%" | fillnull used value="unknown - is cpu.sh enabled?"
|
|
iseval = 0
|
|
|
|
[unix_nodes_detail_mem_sparkline_by_host_1h(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | append [stats count | eval _raw="no results"] | eval name = "Memory:" | stats first(name) as name avg(memUsedPct) as used sparkline(avg(memUsedPct), 2m) as sl | eval used = round(used, 0) . "%" | fillnull used value="unknown - is vmstat.sh enabled?"
|
|
iseval = 0
|
|
|
|
[unix_nodes_detail_disk_sparkline_by_host_1h(1)]
|
|
args = host
|
|
definition = `os_index` `df_sourcetype` host=$host$ | append [stats count | eval _raw="no results" | eval _time=now()] | `convert_memk(Avail, avail)` | `convert_memk(Used, used)` | fillnull used value=0 | fillnull avail value=0 | fillnull Filesystem value="no results" | bucket span=5m _time | stats avg(used) as used avg(avail) as avail by Filesystem _time | stats sum(used) as used sum(avail) as avail by _time | eval used_perc_no_unit = round(used/(used+avail)*100, 0) | eval used_perc = used_perc_no_unit . "%" | eval name = "Disk:" | stats first(name) as name first(used_perc) as used sparkline(max(used_perc_no_unit), 5m) as sl | fillnull used value="--%"
|
|
iseval = 0
|
|
|
|
[unix_nodes_detail_top_processes_by_host(1)]
|
|
args = host
|
|
definition = `os_index` sourcetype=top host=$host$ | stats max(pctCPU) as pctCPU max(pctMEM) as pctMEM last(cpuTIME) as cpuTIME by COMMAND, USER | eval CMD=COMMAND | fields CMD, USER, pctCPU, pctMEM, cpuTIME
|
|
|
|
|
|
### Multiple Host Macro's
|
|
[Percent_CPU_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `cpu_sourcetype` host=$host$ | eval Percent_CPU_Load = 100 - pctIdle | timechart avg(Percent_CPU_Load) by host
|
|
|
|
[Percent_Load_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | timechart avg(loadAvg1mi) by host
|
|
|
|
[Top_5_CPU_Processes_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `top_sourcetype` host=$host$ | stats max(pctCPU) as maxCPU by host, COMMAND, _time | sort -maxCPU | dedup 5 host
|
|
|
|
[Number_Threads_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | timechart avg(threads) by host
|
|
|
|
[Number_Processes_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | timechart avg(processes) by host
|
|
|
|
|
|
### Single Host Macros
|
|
[CPU_Usage_by_Command_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | stats sum(pctCPU) as pctCPU by _time,COMMAND | timechart avg(pctCPU) by COMMAND
|
|
|
|
[CPU_Usage_by_State_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `cpu_sourcetype` host=$host$ | eval pctNice=if(isnull(pctNice), 0, pctNice) | timechart avg(pctUser), avg(pctNice), avg(pctSystem), avg(pctIowait), avg(pctIdle)
|
|
|
|
[Stats_for_CPU_State_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `cpu_sourcetype` host=$host$ | stats avg(pctUser), avg(pctSystem), avg(pctIdle) by host
|
|
|
|
[Top_CPU_Processes_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `top_sourcetype` host=$host$ | stats max(pctCPU) as maxCPU by host, COMMAND, _time | sort -maxCPU
|
|
|
|
[CPU_Usage_by_User_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | timechart avg(pctCPU) by USER
|
|
|
|
[Top_CPU_Users_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `top_sourcetype` host=$host$ | stats sum(pctCPU) as Total_CPU_Time by host, USER, | sort -Total_CPU_Time
|
|
|
|
[CPU_Sum_by_Command_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | bin _time | stats avg(pctCPU) as pctCPU by _time, PID, COMMAND, host | timechart sum(pctCPU) by COMMAND
|
|
|
|
|
|
####################################################
|
|
## Memory Macros
|
|
####################################################
|
|
### Single Host Macros
|
|
[Mem_Usage_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | timechart median(memFreePct) as Percent_Mem_Free, median(memUsedPct) as Percent_Mem_Used, median(swapUsedPct) as Percent_Swap
|
|
|
|
[Mem_Usage_by_Command_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | timechart eval(median(RSZ_KB)/1024) as ResidentMB by COMMAND
|
|
|
|
|
|
[Top_Mem_Command_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | eval VSZ_MB=VSZ_KB/1024 | stats max(RSZ_MB) as Resident_MB, max(VSZ_MB) as Virtual_MB, max(pctMEM) as Percent_Memory by host, COMMAND, _time | dedup COMMAND | sort -Resident_MB
|
|
|
|
[Top_Users_of_VM_for_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | eval time=_time | timechart eval(sum(RSZ_MB)/dc(time)) as Avg_Mem_Usage by USER useother=F limit=10
|
|
|
|
### Multiple Host Macros
|
|
[Percent_MEM_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `memory_sourcetype` host=$host$ | timechart median(memUsedPct) by host
|
|
|
|
[Top_Mem_Processes_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | eval VSZ_MB=VSZ_KB/1024 | stats median(RSZ_MB) by VSZ_MB, host, COMMAND, _time | dedup 1 host, COMMAND sortby -median(RSZ_MB)
|
|
|
|
[Memory_Hardware_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `hardware_sourcetype` earliest=-1d host=$host$ | dedup host | eval k=if(unit=="kB",1024,1) | eval RealMemoryMB = RealMemoryMB/k | eval SwapMemoryMB = SwapMemoryMB/k | fields + RealMemoryMB, SwapMemoryMB, host | chart max(RealMemoryMB) as Real_Memory_MB, max(SwapMemoryMB) as Swap_Memory_MB by host
|
|
|
|
[Top_Memory_Users_by_Command_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `ps_sourcetype` host=$host$ | eval RSZ_MB=RSZ_KB/1024 | stats max(RSZ_MB) by USER, COMMAND| sort -max(RSZ_MB)
|
|
|
|
#####################################################
|
|
## Network Macros
|
|
#####################################################
|
|
|
|
[Thruput_by_Interface_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `interfaces_sourcetype` host=$host$ | streamstats current=f last(TXbytes) as lastTX, last(RXbytes) as lastRX by Name | eval time=_time | strcat Name "-" inetAddr "@" host Interface_Host | eval RX_Thruput_KB = (lastRX-RXbytes)/1024 | eval TX_Thruput_KB = (lastTX-TXbytes)/1024 | timechart eval(sum(TX_Thruput_KB)/dc(time)) by Interface_Host
|
|
|
|
[Top_Inet_Addresses_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `interfaces_sourcetype` host=$host$ | strcat Name "-" inetAddr "@" host Interface_Host | top Interface_Host limit=20
|
|
|
|
[Open_Ports_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `open_ports_sourcetype` host=$host$ | head 1 | multikv fields Proto, Port | eval PortOverProto = case(Port == "8089", "Splunk Management Port", Port == "8000", "Splunk HTTP Port", Port == "21", "ftp", Port == "22", "ssh", Port == "23", "telnet", Port == "25", "smtp", Port == "69", "tftp", Port == "79", "finger", Port == "80", "http", Port == "88", "kerberos", Port == "143", "imap", Port == "161", "snmp", Port == "162", "snmptrap", Port == "179", "bgp", Port == "1521", "SQL*Net", 1==1, Port." / ".Proto) | chart count by PortOverProto | sort count desc | rename count as "# of Connections Accepted"
|
|
|
|
[Addresses_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `netstat_sourcetype` host=$host$ | head 1 | multikv fields ForeignAddress | rex field=ForeignAddress "(?<hostOnly>^.*)[:\.].+$" | fields + hostOnly | where hostOnly != "*" | rename hostOnly as Address | chart count by Address | sort count desc | rename count as "# of Connections to This Address"
|
|
|
|
[Sockets_by_State_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `netstat_sourcetype` host=$host$ | multikv fields State | search NOT (State="<n/a>") | timechart count by State
|
|
|
|
[Frequently_Open_Ports_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `open_ports_sourcetype` | multikv | stats count by Port | sort count
|
|
|
|
#####################################################
|
|
## Disk Macros
|
|
#####################################################
|
|
[Disk_Used_Pct_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `df_sourcetype` host=$host$ | strcat host '@' Filesystem Host_FileSystem | timechart avg(UsePct) by Host_FileSystem | rename avg(UsePct) as %Used
|
|
|
|
[Latest_Disk_Used_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `df_sourcetype` | dedup host
|
|
|
|
[Max_Disk_Used_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `df_sourcetype` host=$host$ | fields UsePct, Filesystem |sort 1 UsePct, Filesystem, host
|
|
|
|
[Open_Files_by_Command_and_Host(1)]
|
|
args = host
|
|
definition = `os_index` `lsof_sourcetype` host=$host$ | eval time=_time| timechart eval(count/dc(time)) by COMMAND
|
|
|
|
[Open_Files_by_Type_and_Host(1)]
|
|
args = host
|
|
definition = `os_index` `lsof_sourcetype` host=$host$ | eval time =_time | timechart eval(count/dc(time)) by TYPE
|
|
|
|
[Open_Files_by_User_and_Host(1)]
|
|
args = host
|
|
definition = `os_index` `lsof_sourcetype` host=$host$ | eval time=_time | timechart eval(count/dc(time)) by USER
|
|
|
|
|
|
#####################################################
|
|
## User Macros
|
|
#####################################################
|
|
[User_Sessions_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `who_sourcetype` host=$host$ | fields USERNAME, LINE, TIME, _time | dedup USERNAME, LINE, TIME sortby +_time | sort -_time
|
|
|
|
[Failed_Logins_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` eventtype=failed_login host=$host$
|
|
|
|
[Users_with_Login_Privs_by_Host(1)]
|
|
args = host
|
|
definition = `os_index` `users_with_login_privs_sourcetype` host=$host$ | dedup host
|
|
|
|
#####################################################
|
|
## Alerts
|
|
#####################################################
|
|
[CPU_Exceeds_Percent_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `cpu_sourcetype` host=* CPU="all" | stats avg(pctIdle) as pctIdle by host | eval Percent_CPU_Load = 100 - pctIdle | where Percent_CPU_Load > $threshold$ | eval title="CPU_Exceeds_Percent_by_Host" | `unix_alert_decoration` | fields Percent_CPU_Load, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[CPU_Under_Percent_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `cpu_sourcetype` host=* CPU="all" | stats avg(pctIdle) as pctIdle by host | eval Percent_CPU_Load = 100 - pctIdle | where Percent_CPU_Load < $threshold$ | eval title="CPU_Under_Percent_by_Host" | `unix_alert_decoration` | fields Percent_CPU_Load, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Load_Exceeds_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `memory_sourcetype` host=* | stats avg(loadAvg1mi) as Load_Avg_One_Minute by host | where Load_Avg_One_Minute > $threshold$ | eval title="Load_Exceeds_by_Host" | `unix_alert_decoration` | fields Load_Avg_One_Minute, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Threads_Exceeds_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `memory_sourcetype` host=* | stats avg(threads) as Total_Threads by host | where Total_Threads > $threshold$ | eval title="Threads_Exceeds_by_Host" | `unix_alert_decoration` | fields Total_Threads, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Processes_Exceeds_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `memory_sourcetype` host=* | stats avg(processes) as Total_Processes by host | where Total_Processes > $threshold$ | eval title="Processes_Exceeds_by_Host" | `unix_alert_decoration` | fields Total_Processes, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Memory_Exceeds_MB_by_Process(1)]
|
|
args = threshold
|
|
definition = `os_index` `ps_sourcetype` host=* | stats first(RSZ_KB) as ResidentMB by COMMAND, host | eval ResidentMB = (ResidentMB/1024) | where ResidentMB > $threshold$ | eval title="Memory_Exceeds_MB_by_Process" | `unix_alert_decoration` | fields ResidentMB, COMMAND, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Memory_Exceeds_Percent_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `memory_sourcetype` host=* | stats first(memUsedPct) as UsedMemoryPercent by host | where UsedMemoryPercent > $threshold$ | eval title="Memory_Exceeds_Percent_by_Host" | `unix_alert_decoration` | fields UsedMemoryPercent, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Memory_Exceeds_MB_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `memory_sourcetype` host=* | stats first(memUsedMB) as UsedMemoryMB by host | where UsedMemoryMB > $threshold$ | eval title="Memory_Exceeds_MB_by_Host" | `unix_alert_decoration` | fields UsedMemoryMB, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Disk_Used_Exceeds_Percent_by_Host(1)]
|
|
args = threshold
|
|
definition = `os_index` `df_sourcetype` host=* | stats first(PercentUsedSpace) as PercentUsed, values(Type) as Type, values(Size) as Size, values(Used) as Used, values(Avail) as Avail, values(MountedOn) as MountedOn by Filesystem, host | where PercentUsed > $threshold$ | eval title="Disk_Used_Exceeds_Percent_by_Host" | `unix_alert_decoration` | fields Filesystem, Type, Size, Used, Avail, PercentUsed, MountedOn, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[Open_Files_Exceeds_by_Process(1)]
|
|
args = threshold
|
|
definition = `os_index` `lsof_sourcetype` host=* | stats count by _time, PID, COMMAND, host | stats first(count) as OpenFiles by PID, COMMAND, host | where OpenFiles > $threshold$ | eval title="Open_Files_Exceeds_by_Process" | `unix_alert_decoration` | fields OpenFiles, PID, COMMAND, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[IO_Wait_Exceeds_Threshold(1)]
|
|
args = threshold
|
|
definition = `os_index` `iostat_sourcetype` | stats avg(avgWaitMillis) as wait_time_millis by host | where wait_time_millis > $threshold$ | eval title="IO_Wait_Exceeds_Threshold" | `unix_alert_decoration` | fields wait_time_millis, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
[IO_Utilization_Exceeds_Threshold(1)]
|
|
args = threshold
|
|
definition = `os_index` `iostat_sourcetype` | stats avg(bandwUtilPct) as bandwidth_util by host | where bandwidth_util > $threshold$ | eval title="IO_Utilization_Exceeds_Threshold" | `unix_alert_decoration` | fields bandwidth_util, host, hosts, host_count, severity, sid, time_fired, _time
|
|
|
|
#####################################################
|
|
## Alert Helpers
|
|
#####################################################
|
|
[unix_alert_decoration]
|
|
definition = join title [rest /services/saved/searches | rename alert.severity as severity | fields title severity] | eventstats values(host) as hosts | eventstats dc(host) as host_count | eval time_fired = now() | addinfo | eval _time = time_fired | rename info_sid as sid
|
|
|
|
#####################################################
|
|
## Alert Thresholds
|
|
#####################################################
|
|
[_unix_alert_threshold_CPU_Exceeds_Percent_by_Host]
|
|
definition = 99
|
|
|
|
[_unix_alert_threshold_CPU_Under_Percent_by_Host]
|
|
definition = 30
|
|
|
|
[_unix_alert_threshold_Load_Exceeds_by_Host]
|
|
definition = 5
|
|
|
|
[_unix_alert_threshold_Threads_Exceeds_by_Host]
|
|
definition = 250
|
|
|
|
[_unix_alert_threshold_Processes_Exceeds_by_Host]
|
|
definition = 100
|
|
|
|
[_unix_alert_threshold_Memory_Exceeds_Percent_by_Host]
|
|
definition = 90
|
|
|
|
[_unix_alert_threshold_Memory_Exceeds_MB_by_Process]
|
|
definition = 1024
|
|
|
|
[_unix_alert_threshold_Memory_Exceeds_MB_by_Host]
|
|
definition = 2048
|
|
|
|
[_unix_alert_threshold_Disk_Used_Exceeds_Percent_by_Host]
|
|
definition = 90
|
|
|
|
[_unix_alert_threshold_Open_Files_Exceeds_by_Process]
|
|
definition = 200
|
|
|
|
[_unix_alert_threshold_IO_Wait_Exceeds_Threshold]
|
|
definition = 10
|
|
|
|
[_unix_alert_threshold_IO_Utilization_Exceeds_Threshold]
|
|
definition = 95
|
|
|
|
#####################################################
|
|
## View Macros
|
|
#####################################################
|
|
[alert_detail_cpu_basesearch(1)]
|
|
args = HOST
|
|
definition = `os_index` `cpu_sourcetype` host=$HOST$ | search CPU="all" | convert mktime(_time) as time | table _time, time, host, sourcetype, pctUser, pctSystem
|
|
|
|
[alert_detail_memory_basesearch(1)]
|
|
args = HOST
|
|
definition = `os_index` `memory_sourcetype` host=$HOST$ | convert mktime(_time) as time | eval memUsed=memUsedMB*1000000 | eval memFree=memFreeMB*1000000 | table _time, time, sourcetype, host, memFreePct, memUsedPct, memFree, memUsed, processes, threads
|
|
|
|
[alert_detail_ps_basesearch(1)]
|
|
args = HOST
|
|
definition = `os_index` `ps_sourcetype` host=$HOST$ | eval RSZ=RSZ_KB * 1024 | eval VSZ=VSZ_KB * 1024 | convert mktime(_time) as time | table _time, time, USER, PID, PSR, pctCPU, CPUTIME, pctMEM, RSZ, VSZ, S, ELAPSED, COMMAND, ARGS
|
|
|
|
[alert_detail_netstat_basesearch(1)]
|
|
args = HOST
|
|
definition = `os_index` `netstat_sourcetype` host=$HOST$ | convert mktime(_time) as time | table _time, time, Proto, Recv_Q, Send_Q, LocalAddress, ForeignAddress, State
|
|
|
|
[alert_detail_lsof_basesearch(1)]
|
|
args = HOST
|
|
definition = `os_index` `lsof_sourcetype` host=$HOST$ | convert mktime(_time) as time | table _time, time, COMMAND, PID, USER, FD, TYPE, DEVICE, NAME, SIZE
|
|
|
|
[alert_events]
|
|
definition = `cp-unix-dashboards-unixsummary-index` | stats first(hosts) as hosts first(host_count) as host_count by severity sid time_fired search_name | rename time_fired as trigger_time | rename search_name as ss_name | join ss_name [| rest /services/unix/alert_overlay splunk_server=local| fillnull value="" business_impact description escalation remediation | fields - eai:* | rename title as ss_name ] | eval severity=case(severity==1, "Low", severity==2, "Medium", severity==3, "Medium", severity==4, "Medium", severity==5, "High") | makemv hosts | mvexpand hosts | sort +trigger_time
|
|
|
|
|
|
#####################################################
|
|
## Home View Macros
|
|
#####################################################
|
|
|
|
[home_cpu_idle(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` `cpu_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] |mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = 100-pctIdle | eval pctFree = pctIdle | stats median(pctFree) as metric by unix_group unix_category
|
|
|
|
[home_cpu_used(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` `cpu_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] |mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = 100-pctIdle | eval pctFree = pctIdle | stats median(pctUsed) as metric by unix_group unix_category
|
|
|
|
[home_memory(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` `memory_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = memUsedPct | eval pctFree = memFreePct | stats median(pctUsed) as metric by unix_group unix_category
|
|
|
|
[home_memory_free(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` `memory_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | eval pctUsed = memUsedPct | eval pctFree = memFreePct | stats median(pctFree) as metric by unix_group unix_category
|
|
|
|
[home_disk_used(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` `df_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | strcat host '@' Filesystem Host_FileSystem | stats avg(PercentUsedSpace) as metric by unix_group unix_category
|
|
|
|
[home_disk_free(2)]
|
|
args = unix_category, unix_group
|
|
definition = `os_index` `df_sourcetype` [ | inputlookup dropdownsLookup | search $unix_category$ $unix_group$ | fields host] | mvexpand unix_group | mvexpand unix_category | search $unix_category$ $unix_group$ | strcat host '@' Filesystem Host_FileSystem | stats avg(PercentFreeSpace) as metric by unix_group unix_category
|
|
|
|
|
|
# [home_network_thruput(2)]
|
|
# args = category, group
|
|
# definition = `os_index` `interfaces_sourcetype` $category$ $group$ | streamstats current=f last(TXbytes) as lastTX, last(RXbytes) as lastRX by Name | eval time=_time | strcat Name "-" inetAddr "@" host Interface_Host | eval RX_Thruput_KB = (lastRX-RXbytes)/1024 | eval TX_Thruput_KB = (lastTX-TXbytes)/1024 | stats eval(sum(TX_Thruput_KB)/dc(time)) by group category
|
|
#
|
|
|
|
|
|
#####################################################
|
|
## Utility Macros
|
|
#####################################################
|
|
|
|
# `convert_memk(sizeField, bytesField)` == `convert memk(sizeField) as bytesField`
|
|
# but it also supports terabytes, which the regular command does not. (SPL-95079)
|
|
[convert_memk(2)]
|
|
args = sizeField, bytesField
|
|
definition = rex field=$sizeField$ "(?<_terabytes>.*)[tT]" | eval _size=if(_terabytes >= 0, (_terabytes*1024)."G", $sizeField$) | convert memk(_size) as $bytesField$
|