You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1095 lines
58 KiB

[dmc_audit_searchlocal(1)]
args = filter
definition = search `dmc_set_index_audit` action=search (id=* OR search_id=*) | eval search_id = if(isnull(search_id), id, search_id) | replace '*' with * in search_id | search $filter$
[dmc_get_instance_info(1)]
# this is used in the 'Instance to Query:' dropdown list
# note that the role name is something like: indexer, search_head, cluster_master, license_master, deployment_server
# note: the reason we do "dedup serverName" is that, if `dmc_get_instance_info(*)`, then it's possible to get duplicate records since search_group is not a mv-fields
args = group
definition = inputlookup dmc_assets | mvcombine search_group | search search_group="$group$"
iseval = 0
[dmc_get_machine_info]
# chose the first instance on that machine
# this is used in the 'Server to Query:' dropdown list
definition = `dmc_get_instance_info(*)` \
| stats first(host) as host first(serverName) as serverName by machine
iseval = 0
[dmc_get_instances_on_a_machine(1)]
args = machine
definition = `dmc_get_instance_info(*)` \
| where machine = "$machine$" \
| dedup serverName
iseval = 0
[dmc_get_groups]
definition = inputlookup dmc_assets \
| fields search_group \
| dedup search_group \
| search search_group="dmc_*" \
| `dmc_set_group_label_and_sort(search_group)`
iseval = 0
[dmc_get_groups_containing_role(1)]
# this will get groups that contain a given role, for example, get all groups that contain at least one indexer.
args = role
definition = inputlookup dmc_assets \
| mvcombine search_group \
| search search_group="$role$" \
| mvexpand search_group \
| fields search_group \
| dedup search_group \
| `dmc_set_group_label_and_sort(search_group)`
[dmc_get_indexer_cluster_groups]
definition = inputlookup dmc_assets \
| fields search_group \
| dedup search_group \
| search search_group="dmc_indexerclustergroup_*" \
| eval label = replace(search_group, "dmc_indexerclustergroup_", "")
[dmc_get_search_head_cluster_groups]
definition = inputlookup dmc_assets \
| fields search_group \
| dedup search_group \
| search search_group="dmc_searchheadclustergroup_*" \
| eval label = replace(search_group, "dmc_searchheadclustergroup_", "")
iseval = 0
[dmc_get_instance_roles]
# get roles of each instance
# the only difference between role and group is that, group includes custom group, which has prefix: dmc_customgroup_
# role group: dmc_group_(indexer, search_head, cluster_master, deployment_server, license_master)
# custom group: dmc_customgroup_somename
definition = inputlookup dmc_assets \
| mvcombine search_group \
| eval role = search_group \
| mvexpand role \
| eval role = if(like(role, "dmc_group_%"), role, "") \
| eval role = replace(role, "^dmc_group_", "") \
| mvcombine role
iseval = 0
[dmc_get_primary_role]
# get primary role of each instance
definition = eval primary_role = case( \
role = "cluster_master", "cluster_master", \
role = "search_head", "search_head", \
role = "indexer", "indexer", \
role = "kv_store", "kv_store", \
role = "license_master", "license_master")
iseval = 0
[dmc_pretty_print_role(1)]
# pretty print the role
args = role
definition = eval $role$ = mvmap($role$, case( \
$role$ = "cluster_master", "Cluster Master", \
$role$ = "search_head", "Search Head", \
$role$ = "indexer", "Indexer", \
$role$ = "kv_store", "KV Store", \
$role$ = "license_master", "License Master"))
iseval = 0
[dmc_set_group_label_and_sort(1)]
# produce well-formatted group name and sort groups: build-in group first, custom group second
args = search_group
definition = eval label = replace($search_group$, "^dmc_\w*group_", "") \
| eval label = case( \
$search_group$ == "dmc_group_indexer", "Indexer", \
$search_group$ == "dmc_group_search_head", "Search Head", \
$search_group$ == "dmc_group_cluster_master", "Cluster Master", \
$search_group$ == "dmc_group_license_master", "License Master", \
$search_group$ == "dmc_group_deployment_server", "Deployment Server", \
$search_group$ == "dmc_group_kv_store", "KV Store", \
$search_group$ == "dmc_group_shc_deployer", "SHC Deployer", \
like($search_group$, "dmc_customgroup_%"), label." (custom)", \
like($search_group$, "dmc_indexerclustergroup_%"), label." (Indexer Cluster)", \
like($search_group$, "dmc_searchheadclustergroup_%"), label." (Search Head Cluster)", \
1 == 1, label." (Unrecognized)") \
| eval order = case( \
like($search_group$, "dmc_group_%"), 100, \
like($search_group$, "dmc_customgroup_%"), 200, \
like($search_group$, "dmc_indexerclustergroup_%"), 300, \
like($search_group$, "dmc_searchheadclustergroup_%"), 400, \
1 == 1, "500") \
| sort order, $search_group$ \
| fields - order
iseval = 0
[dmc_collection_interval]
# this is for calculating runtime of searches, because the data.elapsed in _introspection index is logged every 10 seconds.
definition = collection_interval = 10
iseval = 0
[dmc_set_index_internal]
# in case some product has different name for _internal index
definition = index=_internal
iseval = 0
[dmc_set_index_introspection]
# in case some product has different name for _introspection index
definition = index=_introspection
iseval = 0
[dmc_set_index_audit]
# in case some product has different name for _audit index
definition = index=_audit
iseval = 0
[dmc_match_all_scheduled_search_types]
# this macro is used to match all types of scheduled searches based on the data.search_props.type field in PerProcess
# introspection events OR on the search_props.type field in events from the splunk-processes endpoint
definition = ((data.search_props.type="datamodel acceleration" OR search_props.type="datamodel acceleration") OR (data.search_props.type="report acceleration" OR search_props.type="report acceleration") OR (data.search_props.type="scheduled" OR search_props.type="scheduled") OR (data.search_props.type="summary indexing" OR search_props.type="summary indexing"))
[dmc_classify_processes]
# categorize processes into different classes.
definition = eval process_class = case( \
process=="splunk-optimize","index service", \
process=="sh" OR process=="ksh" OR process=="bash" OR like(process,"python%") OR process=="powershell","scripted input", \
process=="mongod", "KVStore") \
| eval process_class = case( \
process=="splunkd" AND (like(args,"-p %start%") OR like(args,"service") OR like(args,"%_internal_launch_under_systemd%")),"splunkd server", \
process=="splunkd" AND isnotnull(sid),"search", \
process=="splunkd" AND (like(args,"fsck%") OR like(args,"recover-metadata%") OR like(args,"cluster_thing")),"index service", \
process=="splunkd" AND args=="instrument-resource-usage", "scripted input",\
(like(process,"python%") AND like(args,"%/appserver/mrsparkle/root.py%")) OR like(process,"splunkweb"),"Splunk Web", \
isnotnull(process_class), process_class) \
| eval process_class = if(isnull(process_class),"other",process_class)
iseval = 0
[dmc_rename_introspection_fields]
# splunk search language doesn't like dot notation of json attributes, for now.
definition = eval process = 'data.process' \
| eval args = 'data.args' \
| eval pid = 'data.pid' \
| eval ppid = 'data.ppid' \
| eval elapsed = 'data.elapsed' \
| eval mem_used = 'data.mem_used' \
| eval mem = 'data.mem' \
| eval pct_memory = 'data.pct_memory' \
| eval normalized_pct_cpu = 'data.normalized_pct_cpu' \
| eval pct_cpu = 'data.pct_cpu' \
| eval sid = 'data.search_props.sid' \
| eval app = 'data.search_props.app' \
| eval label = 'data.search_props.label' \
| eval type = 'data.search_props.type' \
| eval mode = 'data.search_props.mode' \
| eval user = 'data.search_props.user' \
| eval role = 'data.search_props.role' \
| eval label = if(isnotnull('data.search_props.label'), 'data.search_props.label', "") \
| eval provenance = if(isnotnull('data.search_props.provenance'), 'data.search_props.provenance', "unknown") \
| eval search_head = case(isnotnull('data.search_props.search_head') AND 'data.search_props.role' == "peer", 'data.search_props.search_head', isnull('data.search_props.search_head') AND 'data.search_props.role' == "head", "_self", isnull('data.search_props.search_head') AND 'data.search_props.role' == "peer", "_unknown") \
| eval workload_pool = if(isnotnull('data.workload_pool'), 'data.workload_pool', "UNDEFINED")
iseval = 0
[dmc_resource_usage_by_processes_timechart(2)]
# this snippet is used for CPU usage chart and physical memory usage chart.
args = resource_type, function
definition = `dmc_set_bin` \
| `dmc_rename_introspection_fields` \
| stats latest($resource_type$) AS resource_usage_dedup latest(process_class) AS process_class by pid, _time \
| stats sum(resource_usage_dedup) AS resource_usage by _time, process_class \
| timechart minspan=10s $function$(resource_usage) AS "Resource Usage" by process_class
iseval = 0
[dmc_set_bin]
# force span equal to collection interval of resource_usage.log, to allow us deduplicate events in each time interval
definition = bin _time span=10s
iseval = 0
[dmc_set_bin_for_metrics_log]
# set bin span, because we use timechart a lot.
definition = bin _time minspan=30s
iseval = 0
[dmc_set_bin_for_disk_usage]
# force span equal to collection interval of disk_usage.log, to allow us deduplicate events in each time interval
definition = bin _time span=10min
iseval = 0
[dmc_set_bin_for_iostats]
definition = bin _time minspan=1min
[dmc_set_bin_for_timechart]
# this is paired with dmc_timechart
definition = bin _time minspan=10s
iseval = 0
[dmc_set_bin_for_timechart_for_disk_usage]
# this is paired with dmc_timechart_for_disk_usage
definition = bin _time minspan=10min
iseval = 0
[dmc_timechart]
# this's a work around. In some cases timechart has too few columns, so we manually set many columns here.
definition = timechart minspan=10s
iseval = 0
[dmc_timechart_for_disk_usage]
# this's a work around. In some cases timechart has too few columns, so we manually set many columns here. This is specifically for disk usage charts.
definition = timechart minspan=10min
iseval = 0
[dmc_timechart_for_metrics_log]
# this's a work around. In some cases timechart has too few columns, so we manually set many columns here. This is specifically for metrics.log.
definition = timechart minspan=30s
iseval = 0
[dmc_timechart_for_iostats]
# by default the iostats log event is recorded every 60s seconds.
definition = timechart minspan=60s partial=f
[dmc_timechart_for_metrics_reaper_and_heartbeat]
# according to SPL-100474, the reaper and heartbeat info is written every 300s
definition = timechart minspan=300s
[time_modifier(1)]
# for scheduler assistant chart
args = time_modifier
definition = tostring(relative_time(time(), "$time_modifier$"))
iseval = 1
[dmc_get_distsearch_group_servers(1)]
args = group_name
definition = rest splunk_server=local /services/search/distributed/groups \
| fields title member \
| search title="dmc_group_$group_name$" \
| eval title=replace(title, "^dmc_group_", "") \
| eval servers=mvjoin(member,",") \
| fields title servers
iseval = 0
[dmc_pct_cpu_rangemap]
definition = rangemap field=resource_usage \
"0 cores"=0-0.009 \
"0.01-1 cores"=0.01-1 \
"1-2 cores"=1.01-2 \
"2-3 cores"=2.01-3 \
"3-4 cores"=3.01-4 \
"4-5 cores"=4.01-5 \
"5-10 cores"=5.01-10 \
"10-15 cores"=10.01-15 \
"15-20 cores"=15.01-20 \
"20-25 cores"=20.01-25 \
"25-30 cores"=25.01-30 \
"30+ cores"=30.01-999 \
"negative"=-99999--0.001 \
default="0 cores"
iseval = 0
[dmc_pct_cpu_rangemap_and_timechart]
definition = `dmc_pct_cpu_rangemap` \
| `dmc_timechart` partial=f dc(host) AS host_count by range \
| fields _time, "30+ cores" "25-30 cores" "20-25 cores" "15-20 cores" "10-15 cores" "5-10 cores" "4-5 cores" "3-4 cores" "2-3 cores" "1-2 cores" "0.01-1 cores" "0 cores"
iseval = 0
[dmc_mem_used_rangemap]
definition = rangemap field=resource_usage \
"0GB"=0-0 \
"<1GB"=0.01-1024 \
"1-2GB"=1024.001-2048 \
"2-3GB"=2048.001-3072 \
"3-4GB"=3072.001-4096 \
"4-5GB"=4096.001-5120 \
"5-10GB"=5120.001-10240 \
"10-15GB"=10240.001-15360 \
"15-20GB"=15360.001-20480 \
"20-30GB"=20480.001-30720 \
"30GB+"=30720.001-999999 \
default="0GB" \
| eval resource_usage = round(resource_usage / 1024, 3)
iseval = 0
[dmc_mem_used_rangemap_and_timechart]
definition = `dmc_mem_used_rangemap` \
| `dmc_timechart` partial=f dc(host) AS host_count by range \
| fields _time, "30GB+" "20-30GB" "15-20GB" "5-10GB" "4-5GB" "3-4GB" "2-3GB" "1-2GB" "<1GB" "0GB"
iseval = 0
[dmc_search_count_rangemap]
definition = rangemap field=search_count "0"=0-0 "1-5"=1-5 "6-10"=6-10 "11-15"=11-15 "16-20"=16-20 "21-30"=21-30 "31-40"=31-40 "41-50"=41-50 "51-75"=51-75 "76-100"=76-100 "101+"=101-99999
iseval = 0
[dmc_search_count_rangemap_and_timechart]
definition = `dmc_search_count_rangemap` \
| `dmc_timechart` dc(host) AS count_host by range \
| fields _time "101+" "76-100" "51-75" "41-50" "31-40" "21-30" "16-20" "11-15" "6-10" "1-5" "0"
iseval = 0
[dmc_indexing_rate_rangemap]
definition = rangemap field=kbps "0-50 KB/s"=0-50 "50-100 KB/s"=50.001-100 "100-500 KB/s"=100.001-500 "500 KB/s - 1 MB/s"=500.001-1024 "1-2.5 MB/s"=1024.001-2560 "2.5-5 MB/s"=2560.001-5120 "5-10 MB/s"=5120.001-10240 "10+ MB/s"=10240.001-999999 default="NULL"
iseval = 0
[dmc_indexing_rate_rangemap_and_timechart]
definition = `dmc_indexing_rate_rangemap` \
| timechart minspan=30s partial=f dc(host) AS instance_count by range \
| fields _time "10+ MB/s" "5-10 MB/s" "2.5-5 MB/s" "1-2.5 MB/s" "500 KB/s - 1 MB/s" "100-500 KB/s" "50-100 KB/s" "0-50 KB/s"
iseval = 0
[dmc_queue_fill_ratio_rangemap]
definition = rangemap field=fill_percentage "0-60%"=0-60 "60-80%"=60.01-80 "80-100%"=80.01-100
iseval = 0
[dmc_queue_fill_ratio_rangemap_and_timechart]
definition = `dmc_queue_fill_ratio_rangemap` \
| timechart partial=f dc(host) by range \
| fields _time "80-100%" "60-80%" "0-60%"
iseval = 0
[dmc_load_average_rangemap]
definition = rangemap field=load_average "0-0.75"=0-0.75 "0.75-1.5"=0.75001-1.5 "1.5+"=1.50001-999999 default=abnormal
iseval = 0
[dmc_load_average_rangemap_and_timechart]
definition = `dmc_load_average_rangemap` \
| `dmc_timechart` partial=f dc(server) as server_count by range \
| fields _time, "1.5+", "0.75-1.5", "0-0.75"
iseval = 0
[dmc_cpu_usage_rangemap]
definition = rangemap field=cpu_usage "0-60%"=0-60 "60-80%"=60.001-80 "80-100%"=80.001-100 "100%+"=100.001-999999 default=abnormal
iseval = 0
[dmc_cpu_usage_rangemap_and_timechart]
definition = `dmc_cpu_usage_rangemap` \
| `dmc_timechart` partial=f dc(server) as server_count by range \
| fields _time, "100%+", "80-100%", "60-80%", "0-60%"
iseval = 0
[dmc_get_core_info]
definition = eval core_info = if(isnull(numberOfCores), "N/A", numberOfCores)." / ".if(isnull(numberOfVirtualCores), "N/A", numberOfVirtualCores)
[dmc_memory_usage_rangemap]
definition = rangemap field=pct_mem_used "0-60%"=0-0.6 "60-80%"=0.6001-0.8 "80-100%"=0.8001-1 default=abnormal
iseval = 0
[dmc_memory_usage_rangemap_and_timechart]
definition = `dmc_memory_usage_rangemap` \
| `dmc_timechart` partial=f dc(server) as server_count by range \
| fields _time, "80-100%", "60-80%", "0-60%"
iseval = 0
[dmc_disk_usage_rangemap]
definition = rangemap field=pct_disk_usage "0-60%"=0-0.6 "60-80%"=0.6001-0.8 "80-100%"=0.8001-1 default=abnormal
iseval = 0
[dmc_disk_usage_rangemap_and_timechart]
definition = `dmc_disk_usage_rangemap` \
| `dmc_timechart_for_disk_usage` partial=f dc(server_mount_point) as server_mount_point_count by range \
| fields _time, "80-100%", "60-80%", "0-60%"
iseval = 0
[dmc_iostats_rangemap(1)]
args = fieldname
definition = rangemap field=$fieldname$ "0-60%"=0-60 "60-80%"=60.0001-80, "80-100%"=80.0001-100 default=abnormal
[dmc_replication_lag_rangemap]
definition = rangemap field=lag "0-10s"=0-10 "10-30s"=10.001-30 ">30s"=30.001-10000000 default=abnormal
iseval = 0
[dmc_background_flush_rangemap]
definition = rangemap field=percent "0-10%"=0-10 "10-50%"=10.001-50 "50-100%"=50.001-100 default=abnormal
iseval = 0
[dmc_audit_get_searches(1)]:
# note here we also polyfill the "savedsearch_name" into "search" field
args = host
definition = `dmc_set_index_audit` host=$host$ action=search sourcetype=audittrail search_id!="rsa_*" \
| eval user = if(user="n/a", null(), user) \
| `dmc_audit_get_search_type` \
| eval search=if(isnull(savedsearch_name) OR savedsearch_name=="", search, savedsearch_name)
iseval = 0
[dmc_audit_get_searches_for_groups(1)]
# note here we also polyfill the "savedsearch_name" into "search" field
args = search_group
definition = `dmc_set_index_audit` search_group=dmc_group_search_head search_group="$search_group$" action=search sourcetype=audittrail search_id!="rsa_*" \
| eval user = if(user="n/a", null(), user) \
| `dmc_audit_get_search_type` \
| eval search=if(isnull(savedsearch_name) OR savedsearch_name=="", search, savedsearch_name)
[dmc_audit_get_search_type]
# note the "other" type means there are some search types that are not covered in this macro, we should handle that if that happened
definition = eval search_type = case( \
match(search_id, "^SummaryDirector_"), "summarization", \
match(search_id, "^((rt_)?scheduler__|alertsmanager_)"), "scheduled", \
match(search_id, "\d{10}\.\d+(_[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})?$"), "ad hoc", \
true(), "other")
[dmc_drilldown_join_peers_by_peerURI]
# this macro is used to get the following fields: serverName, machine, CPU, RAM, version
# NOTE: host and host_fqdn are not available in splunk 6.1 or earlier
definition = lookup dmc_assets host OUTPUT peerURI, serverName, machine \
| eval peerURI = mvindex(peerURI, 0) \
| eval machine = mvindex(machine, 0) \
| eval serverName = mvindex(serverName, 0) \
| join type=outer peerURI \
[| rest splunk_server=local /services/search/distributed/peers \
| rename title as peerURI \
| append \
[| rest splunk_server=local /services/server/info \
| eval peerURI = "localhost" \
] \
| eval ram = round(physicalMemoryMB / 1024, 2)." GB" \
| fields peerURI, version, numberOfCores, ram \
] \
| eval Action = serverName \
| fields - _time
iseval = 0
[dmc_drilldown_search_activity_deployment_search_concurrency(4)]
args = role, group, searchFunction, metric
definition = `dmc_set_index_introspection` search_group=$role$ search_group=$group$ sourcetype=splunk_resource_usage ((component=PerProcess data.search_props.sid::*) OR component=Hostwide) \
| `dmc_rename_introspection_fields` \
| `dmc_set_bin` \
| stats dc(sid) AS distinct_search_count values(Action) as Action by host, _time \
| stats $searchFunction$(distinct_search_count) as search_count values(Action) as Action by host \
| `dmc_search_count_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." $role$" \
| fields serverName, machine, search_count, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, search_count as "Count of Searches", range as "Count of Searches Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_search_activity_deployment_resource_usage(5)]
args = role, group, resourceType, resourceFunction, metric
definition = `dmc_set_index_introspection` search_group=$role$ search_group=$group$ sourcetype=splunk_resource_usage ((component=PerProcess data.search_props.sid::*) OR component=Hostwide) \
| `dmc_rename_introspection_fields` \
| eval sid = if(component=="Hostwide", "n/a", sid) \
| eval pid = if(component=="Hostwide", "n/a", pid) \
| eval pct_cpu = if(component=="Hostwide", 0, pct_cpu) \
| eval mem_used = if(component=="Hostwide", 0, 'mem_used) \
| `dmc_set_bin` \
| stats latest($resourceType$) AS resource_usage_dedup values(Action) as Action by _time, sid, pid, host \
| stats sum(resource_usage_dedup) AS sum_resource_usage values(Action) as Action by _time, host \
| stats $resourceFunction$(sum_resource_usage) as resource_usage values(Action) as Action by host \
| `dmc_$resourceType$_rangemap` \
| where range == "$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." $role$" \
| fields serverName, machine, resource_usage, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, resource_usage as "$resourceType$ Usage", range as "$resourceType$ Usage Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_indexing_performance_deployment_indexing_rate(2)]
args = group, metric
definition = `dmc_set_index_internal` source=*metrics.log* sourcetype=splunkd search_group=dmc_group_indexer search_group=$group$ group=thruput name=index_thruput \
| stats sum(kb) AS total_kb by host \
| addinfo \
| eval time_window = info_max_time - info_min_time \
| eval kbps = round(total_kb / time_window, 0) \
| `dmc_indexing_rate_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." dmc_group_indexer" \
| fields serverName, machine, kbps, range, numberOfCores, ram, version, Action\
| eval kbps = kbps." KB/s" \
| rename serverName as Instance, machine as Machine, kbps as "Indexing Rate", range as "Indexing Rate Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_indexing_performance_deployment_queue_fill_ratio(4)]
args = group, queueType, funcQueue, metric
definition = `dmc_set_index_internal` source=*metrics.log sourcetype=splunkd search_group=dmc_group_indexer search_group=$group$ group=queue name=$queueType$ \
| eval max=if(isnotnull(max_size_kb),max_size_kb,max_size) \
| eval curr=if(isnotnull(current_size_kb),current_size_kb,current_size) \
| eval fill_perc=round((curr/max)*100,2) \
| stats $funcQueue$(fill_perc) AS fill_percentage by host \
| `dmc_queue_fill_ratio_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." dmc_group_indexer" \
| fields serverName, machine, fill_percentage, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, fill_percentage as "Fill Ratio (%)", range as "Fill Ratio Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_resource_usage_deployment_load_average(4)]
args = role, group, countLoadAvgFunc, metric
definition = `dmc_set_index_introspection` search_group=$role$ search_group=$group$ sourcetype=splunk_resource_usage component=Hostwide \
| stats $countLoadAvgFunc$(data.normalized_load_avg_1min) as load_average by host \
| `dmc_load_average_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." $role$" \
| fields serverName, machine, load_average, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, load_average as "Load Average", range as "Load Average Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_resource_usage_deployment_cpu_usage(4)]
args = role, group, countCPUFunc, metric
definition = `dmc_set_index_introspection` search_group=$role$ search_group=$group$ sourcetype=splunk_resource_usage component=Hostwide \
| eval total_cpu_usage = 'data.cpu_system_pct' + 'data.cpu_user_pct' \
| stats $countCPUFunc$(total_cpu_usage) as cpu_usage by host \
| `dmc_cpu_usage_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." $role$" \
| fields serverName, machine, cpu_usage, range, numberOfCores, ram, version, Action \
| eval cpu_usage = cpu_usage." %" \
| rename serverName as Instance, machine as Machine, cpu_usage as "CPU Usage", range as "CPU Usage Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_resource_usage_cpu_deployment_usage(5)]
args = role, group, role_post_filter, countCPUFunc, metric
definition = `dmc_set_index_introspection` search_group=$role$ search_group=$group$ $role_post_filter$ sourcetype=splunk_resource_usage component=Hostwide \
| eval total_cpu_usage = 'data.cpu_system_pct' + 'data.cpu_user_pct' \
| stats $countCPUFunc$(total_cpu_usage) as cpu_usage by host \
| `dmc_cpu_usage_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." $role$" \
| fields serverName, machine, cpu_usage, range, numberOfCores, ram, version, Action \
| eval cpu_usage = cpu_usage." %" \
| rename serverName as Instance, machine as Machine, cpu_usage as "CPU Usage", range as "CPU Usage Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_resource_usage_deployment_memory_usage(4)]
args = role, group, countMemFunc, metric
definition = `dmc_set_index_introspection` search_group=$role$ search_group=$group$ sourcetype=splunk_resource_usage component=Hostwide \
| eval pct_mem_used = 'data.mem_used' / 'data.mem' \
| stats $countMemFunc$(pct_mem_used) as pct_mem_used by host \
| `dmc_memory_usage_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." $role$" \
| fields serverName, machine, pct_mem_used, range, numberOfCores, ram, version, Action \
| eval pct_mem_used = round(pct_mem_used * 100, 2) \
| eval pct_mem_used = pct_mem_used." %" \
| rename serverName as Instance, machine as Machine, pct_mem_used as "Memory Usage", range as "Memory Usage Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_resource_usage_deployment_disk_usage(4)]
args = role, group, countDiskFunc, metric
definition = `dmc_set_index_introspection` search_group=$role$ search_group=$group$ sourcetype=splunk_disk_objects component=Partitions \
| eval mount_point = 'data.mount_point' \
| eval free = if(isnotnull('data.available'), 'data.available', 'data.free') \
| eval pct_disk_usage = round(1 - free / 'data.capacity', 2) \
| eval server_mount_point = host.":".mount_point \
| stats $countDiskFunc$(pct_disk_usage) as pct_disk_usage by server_mount_point \
| `dmc_disk_usage_rangemap` \
| where range="$metric$" \
| eval host = replace(server_mount_point, ":.*", "") \
| eval mount_point = replace(server_mount_point, ".*:", "") \
| eval pct_disk_usage = pct_disk_usage * 100 \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." $role$" \
| fields serverName, machine, mount_point, pct_disk_usage, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, mount_point as "Mount Point", pct_disk_usage as "Disk Usage (%)", range as "Disk Usage Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_kv_store_deployment_page_faults(3)]
args = group, countPageFaultsFunc, metric
definition = `dmc_set_index_introspection` search_group=dmc_group_kv_store search_group=$group$ component=kvstoreserverstats \
| rename data.opcounters.command as c, data.opcounters.update as u, data.opcounters.queries as q, data.opcountes.deletes as d, data.opcounters.getmore as g, data.opcounters.inserts as i \
| eval commands=if(isNotNull('c'), 'c',0) \
| eval updates=if(isNotNull('u'), 'u', 0) \
| eval queries=if(isNotNull('q'), 'q', 0) \
| eval deletes=if(isNotNull('d'), 'd', 0) \
| eval getmores=if(isNotNull('g'), 'g', 0) \
| eval inserts=if(isNotNull('i'), 'i', 0) \
| eval totalops=commands+updates+queries+deletes+getmores+inserts \
| bin _time minspan=30s \
| stats latest(totalops) AS ops latest(data.extra_info.page_faults) AS pf by host _time \
| eval percent=if(opsdiff==0, 0, round(abs(pf/ops), 2)) \
| stats $countPageFaultsFunc$(percent) as percent by host \
| rangemap field=percent "0-0.7"=0-0.7 "0.7-1.3"=0.7001-1.3 "1.3+"=1.3001-999999 default=abnormal \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." dmc_group_kv_store" \
| fields serverName, machine, percent, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, percent as "Page Faults per Operation", range as "Page Fault Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_kv_store_deployment_lock_percentage(3)]
args = group, countLockFunc, metric
definition = `dmc_set_index_introspection` search_group=dmc_group_kv_store search_group=$group$ component=kvstoreserverstats \
| stats range(data.globalLock.lockTime) AS globalLock range(data.globalLock.totalTime) AS total by host \
| eval percent=abs(globalLock*100/total) \
| stats $countLockFunc$(percent) as percent_locked by host \
| rangemap field=percent_locked "0-30%"=0-30 "30-50%"=30.001-50 "50-100%"=50.001-1000 default=abnormal \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." dmc_group_kv_store" \
| fields serverName, machine, percent_locked, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, percent_locked as "Lock (%)", range as "Lock Percentage Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_kv_store_deployment_network(1)]
args = group
definition =`dmc_set_index_introspection` search_group=dmc_group_kv_store search_group=$group$ component=kvstoreserverstats \
| stats range(data.network.bytesIn) as indiff range(data.network.bytesOut) as outdiff range(data.network.numRequests) as Requests by host \
| eval "MB In"=indiff/1000000 \
| eval "MB Out"=outdiff/1000000 \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." dmc_group_kv_store" \
| table serverName, machine, "MB In", "MB Out", Requests, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_kv_store_deployment_memory_ratio(2)]
args = group, metric
definition = `dmc_set_index_introspection` search_group=$group$ search_group=dmc_group_kv_store component=kvstoreserverstats \
| eval ratio='data.mem.virtual'/'data.mem.mappedWithJournal' \
| stats avg(ratio) AS myratio by host \
| eval myratio = round(myratio, 2) \
| rangemap field=myratio "0-2x"=0-2 "2-3x"=2.001-3 ">3x"=3.001-10000 default=abnormal \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." dmc_group_kv_store" \
| fields serverName, machine, myratio, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, myratio as "Virtual to Mapped Ratio", range as "Ratio Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_drilldown_kv_store_deployment_replication_lag(2)]
args = group, metric
definition = `dmc_set_index_introspection` search_group=$group$ search_group=dmc_group_kv_store component=kvstorereplicasetstats data.replSetStats.myState=1 \
| spath data.replSetStats.members{}.name output=name \
| spath data.replSetStats.members{}.optimeDate output=optimeDate \
| spath data.replSetStats.members{}.stateStr output=stateStr \
| spath data.replSetStats.date output=date \
| eval prop_key_val=mvzip(mvzip(name, optimeDate, "---"), stateStr, "---") \
| fields _time, date, prop_key_val \
| mvexpand prop_key_val \
| eval name=mvindex(split(prop_key_val, "---"), 0) \
| eval optimeDate=mvindex(split(prop_key_val, "---"), 1) \
| eval stateStr=mvindex(split(prop_key_val, "---"), 2) \
| where stateStr="SECONDARY" \
| join _time \
[ search `dmc_set_index_introspection` search_group=dmc_group_kv_store component=kvstorereplicasetstats data.replSetStats.myState=1 \
| spath data.replSetStats.members{}.name output=name \
| spath data.replSetStats.members{}.optimeDate output=optimeDate \
| spath data.replSetStats.members{}.stateStr output=stateStr \
| spath data.replSetStats.date output=date \
| eval prop_key_val=mvzip(mvzip(name, optimeDate, "---"), stateStr, "---") \
| fields _time, date, prop_key_val \
| mvexpand prop_key_val \
| eval name=mvindex(split(prop_key_val, "---"), 0) \
| eval optimeDate=mvindex(split(prop_key_val, "---"), 1) \
| eval stateStr=mvindex(split(prop_key_val, "---"), 2) \
| where stateStr="PRIMARY" \
| stats max(optimeDate) as primary by _time] \
| eval name=mvindex(split(name, ":"), 0) \
| eval lag=(primary-optimeDate)/1000 \
| eventstats max(date) as max_date by name \
| stats avg(lag) as lag by name stateStr max_date \
| eval lag = round(lag, 2) \
| `dmc_replication_lag_rangemap` \
| where range="$metric$" \
| eval date = max_date/1000 \
| eval date = strftime(date, "%m/%d/%Y %H:%M:%S %z") \
| rename name as Instance, lag as Lag, range as "Lag Range", stateStr as State, date as "Oplog Date" \
| table Instance Lag "Oplog Date" "Lag Range" State
iseval = 0
[dmc_drilldown_kv_store_primary_oplog_window(1)]
# start & end are in milliseconds, hence dividing 3600000
args = group
definition= `dmc_set_index_introspection` search_group=$group$ search_group=dmc_group_kv_store component=kvstorereplicasetstats data.replSetStats.myState=1 \
| eval Instance=host \
| rename data.oplogInfo.start as start, data.oplogInfo.end as end \
| eval start = start/1000 \
| eval end = end/1000 \
| eval diff= (end-start)/3600 \
| `dmc_time_format(start)` \
| `dmc_time_format(end)` \
| stats first(start) as "Start Date" first(end) as "End Date" first(diff) as "Window (Hours)" by Instance
iseval=0
[dmc_drilldown_kv_store_deployment_background_flush(2)]
args = group, metric
definition = `dmc_set_index_introspection` search_group=$group$ search_group=dmc_group_kv_store component=kvstoreserverstats \
| stats range(data.backgroundFlushing.total_ms) AS diff by host \
| addinfo \
| eval time_window = info_max_time - info_min_time \
| eval ms_per_min = diff / (time_window / 60) \
| eval percent = round(ms_per_min / (60 * 1000) * 100, 2) \
| `dmc_background_flush_rangemap` \
| where range="$metric$" \
| `dmc_drilldown_join_peers_by_peerURI` \
| eval Action = Action." dmc_group_kv_store" \
| fields serverName, machine, percent, range, numberOfCores, ram, version, Action \
| rename serverName as Instance, machine as Machine, percent as "Background Flush (%)", range as "Background Flush Range", version as Version, numberOfCores as Cores, ram as RAM
iseval = 0
[dmc_instances_view_default_search(1)]
args = group
definition = inputlookup dmc_assets \
| mvcombine search_group \
| rex max_match=0 field=search_group "dmc_indexerclustergroup_(?<indexer_cluster>.*)" \
| rex max_match=0 field=search_group "dmc_searchheadclustergroup_(?<search_head_cluster>.*)" \
| mvexpand search_group \
| search search_group = "$group$" \
| mvcombine search_group \
| join type=outer peerURI \
[| rest splunk_server=local /services/search/distributed/peers \
| rename title as peerURI ] \
| join type=outer peerURI \
[| rest splunk_server=local /services/server/info \
| eval peerURI = "localhost" \
| eval status = "Up"] \
| join peerURI \
[| `dmc_get_instance_roles` ] \
| eval status = if(status == "Up", status, "Unreachable") \
| eval Action = serverName." ".role \
| eval OS = os_name \
| eval ram = round(physicalMemoryMB / 1024, 2)." GB" \
| `dmc_get_core_info` \
| fields serverName, machine, role, indexer_cluster, search_head_cluster, OS, core_info, ram, version, status, Action \
| sort status, serverName \
| rename serverName as Instance, machine as Machine, role as Role, indexer_cluster as "Indexer Cluster", search_head_cluster as "Search Head Cluster", core_info as "CPU Cores (Physical / Virtual)", ram as RAM, version as Version, status as Status
iseval = 0
[dmc_get_recently_triggered_alerts]
definition = rest splunk_server=local /servicesNS/-/splunk_monitoring_console/saved/searches \
|rename title as savedsearch_name \
| fields savedsearch_name, triggered_alert_count, description \
| where triggered_alert_count > 0 \
| join type=outer savedsearch_name[|rest splunk_server=local /servicesNS/-/splunk_monitoring_console/alerts/fired_alerts/- \
| cluster field=savedsearch_name] \
| join type=outer sid [rest splunk_server=local /servicesNS/nobody/splunk_monitoring_console/search/jobs/] \
| rename savedsearch_name AS "Alert Name", triggered_alert_count AS "Trigger Count", published AS "Last time triggered", description as "Description" | fields "Alert Name" , "Trigger Count", "Last time triggered", "Description", sid
iseval = 0
[dmc_get_all_triggered_alerts(1)]
args = minute_offset
definition = rest splunk_server=local /servicesNS/-/splunk_monitoring_console/alerts/fired_alerts/- \
| rename savedsearch_name as title \
| join type=outer title [|rest splunk_server=local /servicesNS/-/splunk_monitoring_console/saved/searches ] \
| rename title as "Alert Name" \
| join type=outer sid [rest splunk_server=local /servicesNS/nobody/splunk_monitoring_console/search/jobs/] \
| eval now=relative_time(now(),"-$minute_offset$m") \
| where trigger_time > now \
| rename trigger_time AS "_time", description as "Description", sid AS Instance \
| convert timeformat="%b. %d, %Y %l:%M %p" ctime(_time) AS "Time Triggered" \
| fields - _time \
| fields "Alert Name", Instance, "Time Triggered", "Description"
iseval = 0
[dmc_dmc_group_license_master]
definition = splunk_server_group=dmc_group_license_master
iseval = 0
[dmc_daily_license_usage_quota]
definition = rest `dmc_dmc_group_license_master` /services/licenser/pools \
| join type=outer stack_id splunk_server [rest `dmc_dmc_group_license_master` /services/licenser/groups | search is_active=1 | eval stack_id=stack_ids | fields splunk_server stack_id is_active] \
| search is_active=1 \
| fields splunk_server, stack_id, used_bytes \
| join type=outer stack_id splunk_server [rest `dmc_dmc_group_license_master` /services/licenser/stacks | eval stack_id=title | eval stack_quota=quota | fields splunk_server stack_id stack_quota] \
| stats sum(used_bytes) as used_bytes max(stack_quota) as stack_quota by splunk_server \
| eval usedGB=round(used_bytes/1024/1024/1024,3) \
| eval totalGB=round(stack_quota/1024/1024/1024,3) \
| eval percentage=round(usedGB / totalGB, 3)*100 \
| fields splunk_server, percentage, usedGB, totalGB
iseval = 0
[dmc_get_local_instance_asset]
definition = `dmc_get_local_instance_asset_computed_groups` \
| join type=outer peerURI [ \
| rest splunk_server=local /services/search/distributed/groups \
| fields title member \
| where isnotnull(mvfind(member, "localhost:localhost")) \
| eval peerURI="localhost" \
| rename title AS search_groups \
| fields peerURI search_groups \
| mvcombine delim=" " search_groups] \
| makemv delim=" " search_groups \
| eval search_groups = if(isnotnull(search_groups),search_groups,computed_search_groups) \
| fields - computed_search_groups \
| mvexpand search_groups
iseval = 0
[dmc_get_local_instance_asset_in_distributed_mode]
# note this search will return no result if localhost is not in any distributed search group. This is expected because localhost's status is "disabled" if it is not in any distributed search group, so that we do not want to add it to dmc_assets.
# please note the inner join. Other than that, this macro is almost the same as dmc_get_local_instance_asset.
definition = `dmc_get_local_instance_asset_computed_groups` \
| join type=inner peerURI [ \
| rest splunk_server=local /services/search/distributed/groups \
| fields title member \
| where isnotnull(mvfind(member, "localhost:localhost")) \
| eval peerURI="localhost" \
| rename title AS search_groups \
| fields peerURI search_groups \
| mvcombine delim=" " search_groups] \
| makemv delim=" " search_groups \
| eval search_groups = if(isnotnull(search_groups),search_groups,computed_search_groups) \
| fields - computed_search_groups \
| mvexpand search_groups
[dmc_get_local_instance_asset_computed_groups]
definition = rest splunk_server=local /services/server/info \
| eval peerURI="localhost" \
| eval server_roles="indexer,search_head,license_master,kv_store" \
| makemv delim="," server_roles \
| mvexpand server_roles \
| eval computed_search_groups="dmc_group_".server_roles \
| stats first(peerURI) AS peerURI first(host) AS host first(host_fqdn) AS machine values(computed_search_groups) AS computed_search_groups by serverName
[dmc_get_warnings_and_errors(2)]
args = search_group, component
definition = `dmc_set_index_internal` search_group=$search_group$ sourcetype=splunkd component=$component$ (log_level=WARN OR log_level=ERROR)
[dmc_time_format(1)]
# convert epoch time to standard date time format
args = time
definition = eval $time$ = strftime($time$, "%m/%d/%Y %H:%M:%S %z")
[dmc_get_forwarder_tcpin]
# return events from metrics.log / group=tcpin_connections that exclusively record incoming s2s data connections from forwarders
definition = `dmc_set_index_internal` sourcetype=splunkd group=tcpin_connections (connectionType=cooked OR connectionType=cookedSSL) fwdType=* guid=*
[dmc_rename_forwarder_type(1)]
args = type
definition = eval $type$ = case($type$ == "full", "Heavy Forwarder", $type$ == "uf", "Universal Forwarder", $type$ == "lwf", "Light Forwarder", 1==1, $type$)
[dmc_tcp_throughput_split_by(2)]
args = field, group
definition = `dmc_get_forwarder_tcpin` search_group=dmc_group_indexer search_group=$group$ \
| `dmc_timechart_for_metrics_log` per_second(kb) as avg_tcp_KBps by $field$ \
| rename avg_tcp_KBps as "KB/s"
[dmc_tcp_forwarder_count_split_by(2)]
args = field, group
definition = `dmc_get_forwarder_tcpin` search_group=dmc_group_indexer search_group=$group$ \
| `dmc_timechart_for_metrics_log` dc(guid) as forwarder_count by $field$ \
| rename forwarder_count as "Forwarder Count"
[dmc_get_port_from_splunktcp_stanza(1)]
# takes as input a field that contains a splunktcp stanza name (example: stanza_name="splunktcp://9997")
# returns a field named "port", describing the port used for the splunktcp data input specified in the input field, and whether this port is SSL-secure
args = stanza_name
definition = | rex field=$stanza_name$ "splunktcp(-ssl)?:(\/{1,2})?(([0-9]{1,3}\.){3}[0-9]{1,3})?(:)?(?<port>\d+)" \
| eval port = if(match($stanza_name$, "splunktcp-ssl"), port." (SSL)", port) \
| eval port = if(isnotnull(port), port, null)
[dmc_get_forwarder_info]
definition = inputlookup dmc_forwarder_assets
[dmc_build_forwarder_assets(1)]
args = sparkline_span
definition = `dmc_get_forwarder_tcpin` \
| stats values(fwdType) as forwarder_type, latest(version) as version, values(arch) as arch, values(os) as os, max(_time) as last_connected, sum(kb) as new_sum_kb, sparkline(avg(tcp_KBps), $sparkline_span$) as new_avg_tcp_kbps_sparkline, avg(tcp_KBps) as new_avg_tcp_kbps, avg(tcp_eps) as new_avg_tcp_eps by guid, hostname
[dmc_rangemap_indexing_rate]
definition = rangemap field=average_KBps #CEE4EE=0-99.999 #6BAECA=100-999.999 #0877A6=1000-4999 #005E86=5000-9999 default=#004664
[dmc_rangemap_search_concurrency]
definition = rangemap field=search_concurrency #CEE4EE=0-5.999 #6BAECA=6-10.999 #0877A6=11-20.999 default=#005E86
[dmc_rangemap_cpu_system_pct]
definition = rangemap field=cpu_system_pct #53A051=0-59.999 #F1813F=60-79.999 default=#DC4E41
[dmc_rangemap_mem_used]
definition = rangemap field=mem_used #53A051=0-59.999 #F1813F=60-79.999 default=#DC4E41
[dmc_rangemap_up_down_status]
definition = rangemap field=up_down_status #DC4E41=0-0.999 default=#53A051
[dmc_rangemap_default_indexing_rate]
definition = rangemap field=average_KBps #CEE4EE=0-99.999 #6BAECA=100-999.999 #0877A6=1000-4999 #005E86=5000-9999 default=#004664
[dmc_rangemap_default_search_concurrency]
definition = rangemap field=search_concurrency #CEE4EE=0-5.999 #6BAECA=6-10.999 #0877A6=11-20.999 default=#005E86
[dmc_rangemap_default_cpu_system_pct]
definition = rangemap field=cpu_system_pct #53A051=0-59.999 #F1813F=60-79.999 default=#DC4E41
[dmc_rangemap_default_mem_used]
definition = rangemap field=mem_used #53A051=0-59.999 #F1813F=60-79.999 default=#DC4E41
[dmc_rangemap_default_up_down_status]
definition = rangemap field=up_down_status #DC4E41=0-0.999 default=#53A051
[dmc_re_build_forwarder_assets_light(1)]
# note the "eval avg_tcp_kbps_sparkline = "N/A"" term, the reason is that, for no reason this field will use comma as mv delimiter, while in normal case the delimiter is whitespace.
# that means, the field is not compatible with the search string (in term of re-constructing sparkline) in the dashboard.
args = sparkline_span
definition = `dmc_build_forwarder_assets($sparkline_span$)` \
| rename new_sum_kb as sum_kb, new_avg_tcp_kbps_sparkline as avg_tcp_kbps_sparkline, new_avg_tcp_kbps as avg_tcp_kbps, new_avg_tcp_eps as avg_tcp_eps \
| eval avg_tcp_kbps_sparkline = "N/A" \
| addinfo \
| eval status = if(isnull(sum_kb) or (sum_kb <= 0) or (last_connected < (relative_time(now(),"-15m"))), "missing", "active") \
| eval sum_kb = round(sum_kb, 2) \
| eval avg_tcp_kbps = round(avg_tcp_kbps, 2) \
| eval avg_tcp_eps = round(avg_tcp_eps, 2) \
| fields guid, hostname, forwarder_type, version, arch, os, status, last_connected, sum_kb, avg_tcp_kbps_sparkline, avg_tcp_kbps, avg_tcp_eps \
| outputlookup dmc_forwarder_assets
[dmc_re_build_forwarder_assets(1)]
# note the "eval avg_tcp_kbps_sparkline = "N/A"" term, the reason is that, for no reason this field will use comma as mv delimiter, while in normal case the delimiter is whitespace.
# that means, the field is not compatible with the search string (in term of re-constructing sparkline) in the dashboard.
args = sparkline_span
definition = `dmc_build_forwarder_assets($sparkline_span$)` \
| rename new_sum_kb as sum_kb, new_avg_tcp_kbps_sparkline as avg_tcp_kbps_sparkline, new_avg_tcp_kbps as avg_tcp_kbps, new_avg_tcp_eps as avg_tcp_eps \
| eval avg_tcp_kbps_sparkline = "N/A" \
| addinfo \
| eval status = if(isnull(sum_kb) or (sum_kb <= 0) or (last_connected < (info_max_time - 900)), "missing", "active") \
| eval sum_kb = round(sum_kb, 2) \
| eval avg_tcp_kbps = round(avg_tcp_kbps, 2) \
| eval avg_tcp_eps = round(avg_tcp_eps, 2) \
| fields guid, hostname, forwarder_type, version, arch, os, status, last_connected, sum_kb, avg_tcp_kbps_sparkline, avg_tcp_kbps, avg_tcp_eps \
| outputlookup dmc_forwarder_assets
[dmc_search_activity_instance_drilldown(3)]
args = host, concurrencySplitBy, type
definition = `dmc_set_index_introspection` host=$host$ sourcetype=splunk_resource_usage component=PerProcess data.search_props.sid::* \
| `dmc_rename_introspection_fields` \
| where $concurrencySplitBy$=="$type$" \
| stats latest(pid) as "PID", latest(ppid) as "PPID", latest(label) as Label, latest(provenance) AS Provenance, latest(search_head) as "Search Head", latest(type) as Type, latest(user) as User, latest(app) as App, latest(mode) as Mode, latest(role) as Role, avg(pct_cpu) as avg_pct_cpu, max(mem_used) as "Physical Memory Usage (MB)", min(_time) as min_time, max(_time) as max_time, max(elapsed) as "Elapsed Time" by sid \
| eval avg_pct_cpu = round(avg_pct_cpu, 2) \
| `dmc_time_format(min_time)` \
| `dmc_time_format(max_time)` \
| rename avg_pct_cpu as "Average CPU Usage (%)", min_time as "First Time Seen", max_time as "Last Time Seen"
[dmc_get_last_hour_resource_usage_from_sid(2)]
args = host, sid
definition = `dmc_set_index_introspection` host=$host$ sourcetype=splunk_resource_usage component=PerProcess data.search_props.sid="$sid$" earliest=-1h \
| `dmc_set_bin` \
| `dmc_rename_introspection_fields` \
| stats latest(pct_cpu) AS pct_cpu_dedup latest(mem_used) AS mem_used_dedup by _time, pid \
| stats sum(pct_cpu_dedup) AS pct_cpu_sum sum(mem_used_dedup) AS mem_used_sum by _time \
| timechart span=10s latest(pct_cpu_sum) AS pct_cpu latest(mem_used_sum) AS mem_used
[dmc_resource_usage_instance_drilldown(1)]
args = host
definition = `dmc_set_index_introspection` sourcetype=splunk_resource_usage component=PerProcess host=$host$ \
| `dmc_rename_introspection_fields` \
| `dmc_classify_processes` \
| eval args = if(isnotnull(sid),"search --id=".sid." (...)", args) \
| eval cmd = process." ".args \
| stats latest(pid) as "PID", latest(ppid) as "PPID", latest(label) as Label, latest(provenance) as Provenance, latest(search_head) as "Search Head", latest(type) as Type, latest(user) as User, latest(app) as App, latest(mode) as Mode, latest(role) as Role, avg(pct_cpu) as avg_pct_cpu, max(mem_used) as "Physical Memory Usage (MB)", min(_time) as min_time, max(_time) as max_time, max(elapsed) as "Elapsed Time" by cmd \
| eval avg_pct_cpu = round(avg_pct_cpu, 2) \
| `dmc_time_format(min_time)` \
| `dmc_time_format(max_time)` \
| rename avg_pct_cpu as "Average CPU Usage (%)", min_time as "First Time Seen", max_time as "Last Time Seen"
[dmc_convert_disk_usage_unit(1)]
# input unit is MB
args = x
definition = case( \
$x$ > 1000000, round($x$ / 1024 / 1024, 2)." TB" \
$x$ > 1000, round($x$ / 1024, 2)." GB" \
1 == 1, $x$." MB" \
)
[dmc_convert_count_unit(1)]
args = x
definition = case( \
$x$ >= 1000000000000, round($x$ / 1000000000000, 2)."T", \
$x$ >= 1000000000, round($x$ / 1000000000, 2)."B", \
$x$ >= 1000000, round($x$ / 1000000, 2)."M", \
$x$ >= 1000, round($x$ / 1000, 2)."K", \
true(), $x$ \
)
[dmc_exclude_indexes]
# exclude the indexes history, _thefishbucket, and _blocksignature because they are special
definition = eval _dmc_title = if(isnotnull(title), title, 'data.name')\
| where NOT (_dmc_title == "history" OR _dmc_title == "_thefishbucket" OR _dmc_title == "_blocksignature")\
| fields - _dmc_title
[dmc_exclude_volumes]
# exclude _splunk_summaries volume, because we have no info for this volume
definition = eval _dmc_volume = if(isnotnull(title), title, 'data.name') \
| where NOT _dmc_volume == "_splunk_summaries" \
| fields - _dmc_volume
[dmc_event_local_search_dispatch]
# An event that is recorded in the _audit index when a search is dispatched locally
definition = `dmc_set_index_audit` sourcetype=audittrail action=search search=*
[dmc_event_remote_search_dispatch]
# An event that is recorded in remote_searches.log when a search is started locally after being dispatched by a remote instance
definition = `dmc_set_index_internal` sourcetype=splunkd_remote_searches "Streamed search search starting"
[dmc_convert_runtime(1)]
# input parameter is runtime in seconds
# output runtime is converted to appropriate format depending on the amount of time
# make sure to use fieldformat for the result of this function, that way the column is still sortable
args = runtime
definition = case( \
round($runtime$ / (3600*24) - 0.5) > 0, round($runtime$ / (3600*24) - 0.5)."d ".round(($runtime$ % (3600*24)) / 3600 - 0.5)."h ".round(($runtime$ % 3600) / 60 - 0.5)."min ".round($runtime$ % 60, 2)."s", \
round(($runtime$ % (3600*24)) / 3600 - 0.5) > 0, round(($runtime$ % (3600*24)) / 3600 - 0.5)."h ".round(($runtime$ % 3600) / 60 - 0.5)."min ".round($runtime$ % 60, 2)."s", \
round(($runtime$ % 3600) / 60 - 0.5) > 0, round(($runtime$ % 3600) / 60 - 0.5)."min ".round($runtime$ % 60, 2)."s", \
1 = 1, round($runtime$ % 60, 2)."s" \
)
[dmc_licensing_base_summary(2)]
args = host, pool_clause
definition = `dmc_set_index_internal` host=$host$ source=*license_usage.log* type="RolloverSummary" earliest=-30d@d $pool_clause$
[dmc_licensing_base_usage(2)]
args = host, pool_clause
definition = `dmc_set_index_internal` source=*license_usage.log* type="Usage" | eval h=if(len(h)=0 OR isnull(h),"(SQUASHED)",h) | eval s=if(len(s)=0 OR isnull(s),"(SQUASHED)",s) | eval idx=if(len(idx)=0 OR isnull(idx),"(UNKNOWN)",idx) | bin _time span=1d | stats sum(b) as b by _time, host, pool, s, st, h, idx | search host=$host$ $pool_clause$
[dmc_licensing_summery_no_split(5)]
args = splunk_server, size_search, host, pool_clause, split_by_field_name
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b by slave, pool, _time | timechart span=1d sum(b) AS "volume" fixedrange=false | `$size_search$($host$, "$pool_clause$")` | foreach * [eval <<FIELD>>=round('<<FIELD>>'/1024/1024/1024, 3)]
[dmc_licensing_summery_pool(5)]
args = splunk_server, size_search, host, pool_clause, split_by_field_name
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b by slave, pool, _time | timechart span=1d sum(b) AS "volume" by pool fixedrange=false | `$size_search$($host$, "$pool_clause$")` | foreach * [eval <<FIELD>>=round('<<FIELD>>'/1024/1024/1024, 3)]
[dmc_licensing_summery_indxr(5)]
args = splunk_server, size_search, host, pool_clause, split_by_field_name
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b by slave, pool, _time | eval slave_guid=slave | stats max(b) AS volume by slave_guid, _time | join type=outer slave_guid [rest splunk_server=$splunk_server$ /services/licenser/slaves | rename label AS slave_name title AS slave_guid | table slave_guid slave_name] | eval slave_name = if(isnotnull(slave_name),slave_name,"GUID: ".slave_guid) | timechart span=1d max(volume) AS "volume" by slave_name fixedrange=false | `$size_search$($host$, "$pool_clause$")` | foreach * [eval <<FIELD>>=round('<<FIELD>>'/1024/1024/1024, 3)]
[dmc_licensing_usage_all(5)]
args = splunk_server, size_search, host, pool_clause, split_by_field_name
definition = timechart span=1d sum(b) AS volumeB by $split_by_field_name$ fixedrange=false | `$size_search$($host$, "$pool_clause$")` | foreach * [eval <<FIELD>>=round('<<FIELD>>'/1024/1024/1024, 3)]
[dmc_licensing_summery_pct_no_split(4)]
# sz_clause = poolsz|stacksz
args = splunk_server, sz_clause, host, split_by_field_name
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b latest($sz_clause$) AS $sz_clause$ by slave, pool, _time | stats sum(b) AS volumeB max($sz_clause$) AS $sz_clause$ by _time | eval pctused=round(volumeB/$sz_clause$*100,2) | timechart span=1d max(pctused) AS "used" fixedrange=false
[dmc_licensing_summery_pct_pool(4)]
args = splunk_server, sz_clause, host, split_by_field_name
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b latest($sz_clause$) AS $sz_clause$ by slave, pool, _time | stats sum(b) AS volumeB max($sz_clause$) AS $sz_clause$ by pool, _time | eval pctused=round(volumeB/$sz_clause$*100,2) | timechart span=1d max(pctused) AS "used" by pool fixedrange=false
[dmc_licensing_summery_pct_indxr(4)]
args = splunk_server, sz_clause, host, split_by_field_name
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b latest($sz_clause$) AS $sz_clause$ by slave, pool, _time | stats sum(b) AS volumeB max($sz_clause$) AS $sz_clause$ by slave, _time | eval pctused=round(volumeB/$sz_clause$*100,2) | eval slave_guid=slave | join type=outer slave_guid [rest splunk_server=$splunk_server$ /services/licenser/slaves | rename label AS slave_name title AS slave_guid | table slave_guid slave_name] | eval slave_name = if(isnotnull(slave_name),slave_name,"GUID: ".slave_guid) | timechart span=1d max(pctused) AS "used" by slave_name fixedrange=false
[dmc_licensing_usage_pct_all(4)]
args = splunk_server, sz_clause, host, split_by_field_name
definition = join _time pool type=outer [search `dmc_set_index_internal` host=$host$ source=*license_usage.log* type="RolloverSummary" earliest=-30d@d | eval _time=_time - 43200 | bin _time span=1d | stats latest($sz_clause$) AS $sz_clause$ by slave, pool, _time | stats max($sz_clause$) AS $sz_clause$ by pool, _time] | stats sum(b) AS bytes_used max($sz_clause$) AS $sz_clause$ by $split_by_field_name$, _time | timechart span=1d max(eval(round(bytes_used/$sz_clause$*100,2))) by $split_by_field_name$ fixedrange=false
[dmc_licensing_summary_maxavg_no_split(3)]
args = splunk_server, split_by_field_name, split_by_label
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b by slave, pool, _time | stats sum(b) AS volume by _time | stats avg(volume) AS avgVolume max(volume) AS maxVolume | eval maxVolumeGB=round(maxVolume/1024/1024/1024,3) | eval avgVolumeGB=round(avgVolume/1024/1024/1024,3) | rename avgVolumeGB AS "average" maxVolumeGB AS "peak" | eval "All Pools" = "" | fields "All Pools", "average", "peak"
[dmc_licensing_summary_maxavg_pool(3)]
args = splunk_server, split_by_field_name, split_by_label
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b by slave, pool, _time | stats sum(b) AS volume by pool, _time | stats avg(volume) AS avgVolume max(volume) AS maxVolume by pool | eval maxVolumeGB=round(maxVolume/1024/1024/1024,3) | eval avgVolumeGB=round(avgVolume/1024/1024/1024,3) | rename avgVolumeGB AS "average" maxVolumeGB AS "peak" pool as "Pool" | fields "Pool", "average", "peak" | sort 5 - "average"
[dmc_licensing_summary_maxavg_indxr(3)]
args = splunk_server, split_by_field_name, split_by_label
definition = eval _time=_time - 43200 | bin _time span=1d | stats latest(b) AS b by slave, pool, _time | eval slave_guid=slave | stats sum(b) AS volume by slave_guid, _time | stats avg(volume) AS avgVolume max(volume) AS maxVolume by slave_guid | join type=outer slave_guid [rest splunk_server=$splunk_server$ /services/licenser/slaves | rename label AS slave_name title AS slave_guid | table slave_guid slave_name] | eval slave_name = if(isnotnull(slave_name),slave_name,"GUID: ".slave_guid) | foreach *Volume [eval <<FIELD>>=round('<<FIELD>>'/1024/1024/1024, 3)] | rename slave_name as "Indexer" avgVolume AS "average" maxVolume AS "peak" | sort 5 - "average" | fields "Indexer", "average", "peak" | fields - _timediff, slave_guid
[dmc_licensing_usage_maxavg_all(3)]
args = splunk_server, split_by_field_name, split_by_label
definition = stats sum(b) AS volume by $split_by_field_name$, _time | stats avg(volume) AS avgVolume max(volume) AS maxVolume by $split_by_field_name$ | eval avgVolumeGB=round(avgVolume/1024/1024/1024,3) | eval maxVolumeGB=round(maxVolume/1024/1024/1024,3) | fields $split_by_field_name$, avgVolumeGB, maxVolumeGB | rename avgVolumeGB AS "average" maxVolumeGB AS "peak" $split_by_field_name$ AS "$split_by_label$" | sort 5 - "average"
[dmc_licensing_pool_size_srch(2)]
args = host, pool_clause
definition = join type=outer _time [search `dmc_set_index_internal` host=$host$ source=*license_usage.log* type="RolloverSummary" earliest=-30d@d $pool_clause$ | eval _time=_time - 43200 | bin _time span=1d | stats latest(poolsz) AS "pool size" by _time] | fields - _timediff
[dmc_licensing_stack_size_srch(2)]
args = host, pool_clause
definition = join type=outer _time [search `dmc_set_index_internal` host=$host$ source=*license_usage.log* type="RolloverSummary" earliest=-30d@d | eval _time=_time - 43200 | bin _time span=1d | dedup _time stack | stats sum(stacksz) AS "stack size" by _time] | fields - _timediff
[dmc_scheduler_instance_search_concurrency_drilldown(3)]
args = host, concurrencySplitBy, type
definition = `dmc_set_index_introspection` host=$host$ sourcetype=splunk_resource_usage component=PerProcess data.search_props.sid::* `dmc_match_all_scheduled_search_types` \
| `dmc_rename_introspection_fields` \
| where $concurrencySplitBy$=="$type$" \
| stats latest(pid) as "PID", latest(ppid) as "PPID", latest(label) as Label, latest(provenance) as Provenance, latest(search_head) as "Search Head", latest(type) as Type, latest(user) as User, latest(app) as App, latest(mode) as Mode, latest(role) as Role, avg(pct_cpu) as avg_pct_cpu, max(mem_used) as "Physical Memory Usage (MB)", min(_time) as min_time, max(_time) as max_time, max(elapsed) as "Elapsed Time" by sid \
| eval avg_pct_cpu = round(avg_pct_cpu, 2) \
| `dmc_time_format(min_time)` \
| `dmc_time_format(max_time)` \
| rename avg_pct_cpu as "Average CPU Usage (%)", min_time as "First Time Seen", max_time as "Last Time Seen"