You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
275 lines
12 KiB
275 lines
12 KiB
[DMC Alert - Total License Usage Near Daily Quota]
|
|
alert.digest_mode = 1
|
|
alert.expires = 7d
|
|
counttype = number of events
|
|
alert.suppress = 1
|
|
alert.suppress.period = 4h
|
|
alert.track = 1
|
|
action.email.sendresults = 1
|
|
action.email.inline = 1
|
|
cron_schedule = 3,33 * * * *
|
|
description = You have used 90% of your total daily license quota.
|
|
dispatch.ttl = 14400
|
|
disabled = 1
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | rest splunk_server_group=dmc_group_license_master /services/licenser/pools \
|
|
| join type=outer stack_id splunk_server [rest splunk_server_group=dmc_group_license_master /services/licenser/groups | search is_active=1 | eval stack_id=stack_ids | fields splunk_server stack_id is_active] \
|
|
| search is_active=1 \
|
|
| fields splunk_server, stack_id, used_bytes \
|
|
| join type=outer stack_id splunk_server [rest splunk_server_group=dmc_group_license_master /services/licenser/stacks | eval stack_id=title | eval stack_quota=quota | fields splunk_server stack_id stack_quota] \
|
|
| stats sum(used_bytes) as used_bytes max(stack_quota) as stack_quota by splunk_server \
|
|
| eval usedGB=round(used_bytes/1024/1024/1024,3) \
|
|
| eval totalGB=round(stack_quota/1024/1024/1024,3) \
|
|
| eval percentage=round(usedGB / totalGB, 3)*100 \
|
|
| fields splunk_server, percentage, usedGB, totalGB \
|
|
| where percentage > 90 \
|
|
| rename splunk_server AS Instance, percentage AS "License quota used (%)", usedGB AS "License quota used (GB)", totalGB as "Total license quota (GB)"
|
|
|
|
[DMC Alert - Missing forwarders]
|
|
disabled = 1
|
|
alert.suppress = 0
|
|
alert.track = 1
|
|
auto_summarize.dispatch.earliest_time = -1d@h
|
|
counttype = number of events
|
|
cron_schedule = */15 * * * *
|
|
description = One or more forwarders are missing.
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | inputlookup dmc_forwarder_assets\
|
|
| search status="missing" \
|
|
| rename hostname as Instance
|
|
|
|
[DMC Alert - Expired and Soon To Expire Licenses]
|
|
alert.suppress = 0
|
|
alert.digest_mode = 1
|
|
alert.expires = 7d
|
|
counttype = number of events
|
|
alert.track = 1
|
|
action.email.sendresults = 1
|
|
action.email.inline = 1
|
|
cron_schedule = 3 0 * * *
|
|
description = You have instances with licenses that expired or will expire within 2 weeks. No other valid licenses are installed.
|
|
dispatch.ttl = 14400
|
|
disabled = 1
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | rest splunk_server_group=dmc_group_license_master /services/licenser/licenses \
|
|
| join type=outer group_id splunk_server [ \
|
|
rest splunk_server_group=dmc_group_license_master /services/licenser/groups \
|
|
| where is_active = 1 \
|
|
| rename title AS group_id \
|
|
| fields is_active group_id splunk_server] \
|
|
| where is_active = 1 \
|
|
| eval days_left = floor((expiration_time - now()) / 86400) \
|
|
| where NOT (quota = 1048576 OR label == "Splunk Enterprise Reset Warnings" OR label == "Splunk Lite Reset Warnings") \
|
|
| eventstats max(eval(if(days_left >= 14, 1, 0))) as has_valid_license by splunk_server \
|
|
| where has_valid_license == 0 AND (status == "EXPIRED" OR days_left < 15) \
|
|
| eval expiration_status = case(days_left >= 14, days_left." days left", days_left < 14 AND days_left >= 0, "Expires soon: ".days_left." days left", days_left < 0, "Expired") \
|
|
| eval total_gb=round(quota/1024/1024/1024,3) \
|
|
| fields splunk_server label license_hash type group_id total_gb expiration_time expiration_status \
|
|
| convert ctime(expiration_time) \
|
|
| rename splunk_server AS Instance label AS "Label" license_hash AS "License Hash" type AS Type group_id AS Group total_gb AS Size expiration_time AS "Expires On" expiration_status AS Status
|
|
|
|
[DMC Alert - Saturated Event-Processing Queues]
|
|
alert.digest_mode = 1
|
|
alert.expires = 7d
|
|
counttype = number of events
|
|
alert.suppress = 1
|
|
alert.suppress.period = 1h
|
|
alert.track = 1
|
|
action.email.sendresults = 1
|
|
action.email.inline = 1
|
|
cron_schedule = 3,13,23,33,43,53 * * * *
|
|
description = One or more of your indexer queues is reporting a fill percentage, averaged over the last 15 minutes, of 90% or more.
|
|
disabled = 1
|
|
dispatch.ttl = 14400
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | rest splunk_server_group=dmc_group_indexer /services/server/introspection/queues \
|
|
| search title=tcpin_queue OR title=parsingQueue OR title=aggQueue OR title=typingQueue OR title=indexQueue \
|
|
| eval fifteen_min_fill_perc = round(value_cntr3_size_bytes_lookback / max_size_bytes * 100,2) \
|
|
| fields title fifteen_min_fill_perc splunk_server \
|
|
| where fifteen_min_fill_perc > 90 \
|
|
| rename splunk_server as Instance, title AS "Queue name", fifteen_min_fill_perc AS "Average queue fill percentage (last 15min)"
|
|
|
|
[DMC Alert - Abnormal State of Indexer Processor]
|
|
alert.digest_mode = 1
|
|
alert.expires = 7d
|
|
alert.suppress = 1
|
|
alert.suppress.period = 30m
|
|
alert.track = 1
|
|
action.email.sendresults = 1
|
|
action.email.inline = 1
|
|
counttype = number of events
|
|
cron_schedule = 3,8,13,18,23,28,33,38,43,48,53,58 * * * *
|
|
description = One or more of your indexers is reporting an abnormal state.
|
|
disabled = 1
|
|
dispatch.ttl = 14400
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | rest splunk_server_group=dmc_group_indexer /services/server/introspection/indexer \
|
|
| fields splunk_server, average_KBps, status, reason \
|
|
| where status != "normal" \
|
|
| eval average_KBps = round(average_KBps, 0) \
|
|
| eval status= if(status=="normal", status, status." - ".reason) \
|
|
| fields - reason \
|
|
| rename splunk_server as Instance, average_KBps as "Average KB/s (last 30s)", status as Status
|
|
|
|
[DMC Alert - Search Peer Not Responding]
|
|
alert.digest_mode = 1
|
|
alert.expires = 7d
|
|
alert.suppress = 1
|
|
alert.suppress.period = 30m
|
|
alert.track = 1
|
|
action.email.sendresults = 1
|
|
action.email.inline = 1
|
|
counttype = number of events
|
|
cron_schedule = 3,8,13,18,23,28,33,38,43,48,53,58 * * * *
|
|
description = One or more of your search peers is currently down.
|
|
disabled = 1
|
|
dispatch.ttl = 14400
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | rest splunk_server=local /services/search/distributed/peers/ \
|
|
| where status!="Up" AND disabled=0 \
|
|
| fields peerName, status \
|
|
| rename peerName as Instance, status as Status
|
|
|
|
[DMC Alert - Critical System Physical Memory Usage]
|
|
alert.digest_mode = 1
|
|
alert.expires = 7d
|
|
alert.suppress = 1
|
|
alert.suppress.period = 30m
|
|
alert.track = 1
|
|
action.email.sendresults = 1
|
|
action.email.inline = 1
|
|
counttype = number of events
|
|
cron_schedule = 3,8,13,18,23,28,33,38,43,48,53,58 * * * *
|
|
description = One or more instances has exceeded 90% memory usage.
|
|
disabled = 1
|
|
dispatch.ttl = 14400
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | rest splunk_server_group=dmc_group_* /services/server/status/resource-usage/hostwide \
|
|
| eval percentage=round(mem_used/mem,3)*100 \
|
|
| where percentage > 90 \
|
|
| fields splunk_server, percentage, mem_used, mem \
|
|
| rename splunk_server AS Instance, mem AS "Physical memory installed (MB)", percentage AS "Memory used (%)", mem_used AS "Memory used (MB)"
|
|
|
|
[DMC Alert - Near Critical Disk Usage]
|
|
alert.digest_mode = 1
|
|
alert.expires = 7d
|
|
counttype = number of events
|
|
alert.suppress = 1
|
|
alert.suppress.period = 4h
|
|
alert.track = 1
|
|
action.email.sendresults = 1
|
|
action.email.inline = 1
|
|
cron_schedule = 3,33 * * * *
|
|
description = You have used 80% of your disk capacity.
|
|
disabled = 1
|
|
enableSched = 1
|
|
quantity = 0
|
|
relation = greater than
|
|
search = | rest splunk_server_group=dmc_group_* /services/server/status/partitions-space \
|
|
| eval free = if(isnotnull(available), available, free) \
|
|
| eval usage = capacity - free \
|
|
| eval pct_usage = floor(usage / capacity * 100) \
|
|
| where pct_usage > 80 \
|
|
| stats first(fs_type) as fs_type first(capacity) AS capacity first(usage) AS usage first(pct_usage) AS pct_usage by splunk_server, mount_point \
|
|
| eval usage = round(usage / 1024, 2) \
|
|
| eval capacity = round(capacity / 1024, 2) \
|
|
| rename splunk_server AS Instance mount_point as "Mount Point", fs_type as "File System Type", usage as "Usage (GB)", capacity as "Capacity (GB)", pct_usage as "Usage (%)"
|
|
|
|
[DMC Asset - Build Standalone Asset Table]
|
|
action.populate_lookup = 1
|
|
action.populate_lookup.dest = dmc_assets
|
|
run_on_startup = 1
|
|
cron_schedule = */1 * * * *
|
|
run_n_times = 1
|
|
disabled = 0
|
|
description = This search establishes an updated cache of metadata necessary for the localhost to be included in DMC dashboards. This search will be disabled when user go throught the setup process. and will be re-enabled when user resets to factory mode.
|
|
enableSched = 1
|
|
dispatchAs = user
|
|
search = | `dmc_get_local_instance_asset` \
|
|
| rename search_groups AS search_group \
|
|
| inputlookup append=true dmc_assets \
|
|
| stats last(*) AS * by peerURI, search_group \
|
|
| fields peerURI serverName host machine search_group
|
|
|
|
[DMC Asset - Build Standalone Computed Groups Only]
|
|
action.populate_lookup = 1
|
|
action.populate_lookup.dest = dmc_assets
|
|
disabled = 0
|
|
dispatchAs = user
|
|
search = | `dmc_get_local_instance_asset` \
|
|
| rename search_groups AS search_group \
|
|
| fields peerURI serverName host machine search_group
|
|
|
|
[DMC Asset - Build Full]
|
|
action.populate_lookup = 1
|
|
action.populate_lookup.dest = dmc_assets
|
|
disabled = 0
|
|
dispatchAs = user
|
|
search = | rest splunk_server=local /services/search/distributed/peers \
|
|
| search status=Up disabled=0 \
|
|
| eval os = os_name \
|
|
| fields guid title peerName host host_fqdn server_roles search_groups cpu_arch os numberOfCores physicalMemoryMB version \
|
|
| rename title AS peerURI peerName AS serverName host_fqdn AS machine numberOfCores AS cpu_count physicalMemoryMB AS mem version AS splunk_version server_roles AS inherited_server_roles \
|
|
| where isnotnull(mvfind(search_groups,"dmc_group_")) \
|
|
| join type=outer peerURI [ \
|
|
| rest splunk_server=local /servicesNS/nobody/splunk_monitoring_console/configs/conf-splunk_monitoring_console_assets \
|
|
| fields title host host_fqdn \
|
|
| rename title AS peerURI host_fqdn AS machine] \
|
|
| mvexpand search_groups \
|
|
| append [ | `dmc_get_local_instance_asset_in_distributed_mode` ] \
|
|
| fields peerURI serverName host machine search_groups \
|
|
| rename search_groups AS search_group
|
|
|
|
[DMC Forwarder - Build Asset Table]
|
|
disabled = 1
|
|
enableSched = 1
|
|
cron_schedule = 3,18,33,48 * * * *
|
|
run_on_startup = false
|
|
dispatch.earliest_time = -16m@m
|
|
dispatch.latest_time = -1m@m
|
|
dispatchAs = user
|
|
search = `dmc_build_forwarder_assets(1m)` \
|
|
| inputlookup append=true dmc_forwarder_assets \
|
|
| stats values(forwarder_type) as forwarder_type, max(version) as version, values(arch) as arch, values(os) as os, max(last_connected) as last_connected, values(new_sum_kb) as sum_kb, values(new_avg_tcp_kbps_sparkline) as avg_tcp_kbps_sparkline, values(new_avg_tcp_kbps) as avg_tcp_kbps, values(new_avg_tcp_eps) as avg_tcp_eps by guid, hostname \
|
|
| addinfo \
|
|
| eval status = if(isnull(sum_kb) or (sum_kb <= 0) or (last_connected < (info_max_time - 900)), "missing", "active") \
|
|
| eval sum_kb = round(sum_kb, 2) \
|
|
| eval avg_tcp_kbps = round(avg_tcp_kbps, 2) \
|
|
| eval avg_tcp_eps = round(avg_tcp_eps, 2) \
|
|
| fields guid, hostname, forwarder_type, version, arch, os, status, last_connected, sum_kb, avg_tcp_kbps_sparkline, avg_tcp_kbps, avg_tcp_eps \
|
|
| outputlookup dmc_forwarder_assets
|
|
|
|
# For license usage report dashboard
|
|
[DMC License Usage Data Cube]
|
|
dispatch.earliest_time = -31d
|
|
dispatch.latest_time = -0d
|
|
auto_summarize = 0
|
|
auto_summarize.dispatch.earliest_time = -1mon@d
|
|
auto_summarize.cron_schedule = 3,13,23,33,43,53 * * * *
|
|
search = index=_internal source=*license_usage.log* type="Usage" | eval h=if(len(h)=0 OR isnull(h),"(SQUASHED)",h) | eval s=if(len(s)=0 OR isnull(s),"(SQUASHED)",s) | eval idx=if(len(idx)=0 OR isnull(idx),"(UNKNOWN)",idx) | bin _time span=1d | stats sum(b) as b by _time, host, pool, s, st, h, idx
|
|
|
|
# For DMC Heatmap
|
|
[default]
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.showTooltip = true
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.baseColor = #284774
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.showLegend = true
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.showXAxis = true
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.showYAxis = true
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.legendTitle = Instance count
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.xAxis = Time
|
|
display.visualizations.custom.splunk_monitoring_console.heatmap.yAxis =
|