You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
204 lines
11 KiB
204 lines
11 KiB
[accel_volume_data_rates]
|
|
auto_summarize = 1
|
|
auto_summarize.dispatch.earliest_time = -7d@d
|
|
cron_schedule = 0 */2 * * *
|
|
disabled = 1
|
|
dispatch.earliest_time = -2h@h
|
|
dispatch.latest_time = now
|
|
enableSched = 1
|
|
request.ui_dispatch_app = DA-ITSI-CP-netapp-dashboards
|
|
request.ui_dispatch_view = search
|
|
search = `ontap-index` sourcetype=ontap:perf source=VolumePerfHandler | eval vserver_name=if(isnull(vserver_name), "", vserver_name+":") | eval fullName=vserver_name+instance_name | stats first(read_data_rate) as read_data_rate first(write_data_rate) as write_data_rate values(instance_name) as instance_name values(host) as host by _time fullName
|
|
|
|
[accel_volume_latency_rate]
|
|
auto_summarize = 1
|
|
auto_summarize.dispatch.earliest_time = -7d@d
|
|
cron_schedule = 0 */2 * * *
|
|
disabled = 1
|
|
dispatch.earliest_time = -2h@h
|
|
dispatch.latest_time = now
|
|
enableSched = 1
|
|
request.ui_dispatch_app = DA-ITSI-CP-netapp-dashboards
|
|
request.ui_dispatch_view = search
|
|
search = `ontap-index` sourcetype=ontap:perf source=VolumePerfHandler | eval vserver_name=if(isnull(vserver_name), "", vserver_name+":") | eval fullName=vserver_name+instance_name | stats first(eval(avg_latency_average/1000)) as avg_latency_average first(eval(other_latency_average/1000)) as other_latency_average first(eval(write_latency_average/1000)) as write_latency_average first(eval(read_latency_average/1000)) as read_latency_average values(host) as host values(instance_name) as instance_name by _time fullName
|
|
|
|
[accel_volume_iops]
|
|
auto_summarize = 1
|
|
auto_summarize.dispatch.earliest_time = -7d@d
|
|
cron_schedule = 0 */2 * * *
|
|
disabled = 1
|
|
dispatch.earliest_time = -2h@h
|
|
dispatch.latest_time = now
|
|
enableSched = 1
|
|
request.ui_dispatch_app = DA-ITSI-CP-netapp-dashboards
|
|
request.ui_dispatch_view = search
|
|
search = `ontap-index` sourcetype=ontap:perf source=VolumePerfHandler | eval vserver_name=if(isnull(vserver_name), "", vserver_name+":") | eval fullName=vserver_name+instance_name | stats first(total_ops_rate) as total_ops_rate first(write_ops_rate) as write_ops_rate first(read_ops_rate) as read_ops_rate first(other_ops_rate) as other_ops_rate values(host) as host values(instance_name) as instance_name by _time fullName
|
|
|
|
[accel_volume_block_ops]
|
|
auto_summarize = 1
|
|
auto_summarize.dispatch.earliest_time = -7d@d
|
|
cron_schedule = 0 */2 * * *
|
|
disabled = 1
|
|
dispatch.earliest_time = -2h@h
|
|
dispatch.latest_time = now
|
|
enableSched = 1
|
|
request.ui_dispatch_app = DA-ITSI-CP-netapp-dashboards
|
|
request.ui_dispatch_view = search
|
|
search = `ontap-index` sourcetype=ontap:perf source=VolumePerfHandler | eval vserver_name=if(isnull(vserver_name), "", vserver_name+":") | eval fullName=vserver_name+instance_name | stats first(read_blocks_rate) as read_blocks_rate first(write_blocks_rate) as write_blocks_rate values(host) as host values(instance_name) as instance_name by _time fullName
|
|
|
|
|
|
|
|
[Unhealthy cluster nodes in the past hour]
|
|
disabled = 1
|
|
search = index=_internal (source="*hydra*" OR source="*splunk_ta_ontap_api*") "Node is not healthy" node=* | table _time,node
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Connection problems in the past hour]
|
|
disabled = 1
|
|
search = index=_internal source=*hydra* OR source=*splunk_ta_ontap_api* ("*[Errno 8]*" OR "timed out" OR "Could not login")
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Missing controller capability collection errors in the past hour]
|
|
disabled = 1
|
|
search = index=_internal source=*hydra* "does not have capability" ERROR
|
|
dispatch.earliest_time = -1h
|
|
|
|
|
|
[Total events in the past hour]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems"| stats count
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Total error events in the past hour]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" error | stats count
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Total events by controller in the past hour]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" | stats count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Total alert and critical events in the past hour]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" (alert OR critical) | stats count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of total disk and controller events by controller in the past hour]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" (controller OR disk) | stats count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of disk events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" disk | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of error events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" error | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of disk error events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" error disk | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of alert and critical events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" (alert OR critical) | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of read error events on disks by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" disk read error | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of aggregate events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" aggregat* | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of volume events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" volume* | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of snapshot events on aggregates over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" snapshot* aggregat* | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of error snapshot events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" snapshot error | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of SnapMirror error events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" snapmirror error | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of Monitoring and Host Configuration events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" (monitor* OR config*) | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of Backup and Restore events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" (backup OR restor*) | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of Optimization and Migration events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" (optimiz* OR migrat*) | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Count of Provisioning and Cloning events over time by controller]
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:syslog" OR sourcetype="ontap:ems" (provision* OR clon*) | timechart count by host
|
|
dispatch.earliest_time = -1h
|
|
|
|
[Volume Capacity Delta Table]
|
|
description = Shows the delta of storage of all volumes between two different points in time
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype=ontap:volume storage_used=* | eval name=if(isnull(name),$volume-id-attributes.name$,name) | table _time,host, storage_used,storage_used_percent,name | stats first(storage_used) as posterior_storage_used last(storage_used) as prior_storage_used first(storage_used_percent) as posterior_storage_used_percent last(storage_used_percent) as prior_storage_used_percent last(_time) as prior_time first(_time) as _time by host,name | eval percent_change=posterior_storage_used_percent-prior_storage_used_percent | eval capacity_change=posterior_storage_used-prior_storage_used | convert ctime(prior_time) | table prior_time,_time,host,name,prior_storage_used,posterior_storage_used,prior_storage_used_percent,posterior_storage_used_percent,percent_change,capacity_change
|
|
dispatch.earliest_time = -24h
|
|
|
|
|
|
[Aggregates with over 90% capacity used]
|
|
description = Shows all the Aggregates that have over 90% capacity used, in the last 24 hours
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:aggr" (source="aggr-list-info" OR source="aggr-get-iter") | `CoalesceAggrFields` | search size-percentage-used > 90 | dedup name, host | eval "gb-total"=`BytesToGigaBytes(sz_total)` | eval "gb-free"=`BytesToGigaBytes(sz_free)` | table name, host, volume-count, size-percentage-used, "gb-total", "gb-free"
|
|
dispatch.earliest_time = -24h
|
|
|
|
|
|
[Volumes with over 75% capacity used]
|
|
description = Shows all Volumes that have over 75% capacity used, in the last 24 hours
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:volume" (source=volume-get-iter) OR (source=volume-list-info-iter-start) | `CoalesceVolumeFields` | search percentage-used >= 75 | dedup name | eval "gb-total"=`BytesToGigaBytes(sz_total)` | eval "gb-used"=`BytesToGigaBytes(sz_used)` | table host, name, containing-aggregate, percentage-used, "gb-total", "gb-used", snapshot-percent-reserved | sort - percentage-used
|
|
dispatch.earliest_time = -24h
|
|
|
|
[Volumes with latency higher than 25ms over 5% of the time]
|
|
description = Shows all Volumes that have latency exceeding 25 msec more than 5% of the time
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype=ontap:perf source=VolumePerfHandler objname="*" | eval ismatch=if(latency>25, 1, 0) | stats count, sum(ismatch) AS matchCount, max(latency) AS max_latency, avg(latency) AS avg_latency by host, objname | eval percentage=round(100*matchCount/count,0) | search percentage > 5 | fields - count, matchCount | rename objname AS volume
|
|
dispatch.earliest_time = -24h
|
|
|
|
[Disk block transfer rates by Controller and RPM]
|
|
description = Shows block transfer rates and RPM for all disks associated with a controller
|
|
disabled = 1
|
|
search = `ontap-index` source="diskperfhandler" objname=* | stats avg(total_transfers_rate), avg(user_read_blocks_rate), avg(user_write_blocks_rate) by host, display_name, disk_speed | rename disk_speed AS rpm, display_name AS disk_name
|
|
dispatch.earliest_time = -24h
|
|
|
|
[Top 10 Busiest Controllers - 7 mode and Cluster mode]
|
|
description = Shows the top 10 controllers with highest total_ops_rate
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype=ontap:perf source="SystemPerfHandler" | stats first(total_ops_rate) AS total_ops_rate, first(read_ops_rate) AS read_ops_rate, first(write_ops_rate) AS write_ops_rate, first(cpu_busy_percent) AS cpu_busy_percent, by host | sort - total_ops_rate | head 10
|
|
dispatch.earliest_time = -24h
|
|
|
|
[Failed Disks]
|
|
description = Shows the disks that have raid-type as "broken"
|
|
disabled = 1
|
|
search = `ontap-index` sourcetype="ontap:disk" raid-state="broken" | rename physical-space as pspace | eval phys-space-gb=`BytesToGigaBytes(pspace)` | table host, serial-number, name, raid-state, raid-type, disk-type, firmware-revision, rpm, phys-space-gb, aggregate, shelf, bay, pool
|
|
dispatch.earliest_time = -24h |