You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Splunk_Deploiement/dashboards/trackme_trackMe_audit_adapt...

446 lines
14 KiB

<dashboard version="2" theme="dark">
<label>TrackMe - Adaptive delay threshold audit (global audit)</label>
<description>This dashboards audits the activity and behaviour of the adaptive delay thresholding for TrackMe feeds components</description>
<definition><![CDATA[
{
"dataSources": {
"ds_search_1": {
"type": "ds.search",
"options": {
"query": "index=_internal sourcetype=trackme:custom_commands:trackmesplkadaptivedelay tenant_id=$tk_tenant$ component=$tk_component$\n| rex field=sourcetype \"trackme:custom_commands:(?<command>.*)\"\n| table _time, log_level, command, _raw\n| sort - _time",
"queryParameters": {
"earliest": "$global_time.earliest$",
"latest": "$global_time.latest$"
}
},
"name": "loggging_events"
},
"ds_UpugjNjy": {
"type": "ds.search",
"options": {
"query": "index=_internal sourcetype=trackme:custom_commands:trackmesplkadaptivedelay tenant_id=$tk_tenant$ component=$tk_component$\n| rex field=sourcetype \"trackme:custom_commands:(?<command>.*)\"\n| timechart count minspan=5m count limit=0 by log_level",
"queryParameters": {
"earliest": "$global_time.earliest$",
"latest": "$global_time.latest$"
}
},
"name": "events_by_log_level"
},
"ds_yHwHGBpa": {
"type": "ds.search",
"options": {
"query": "| inputlookup trackme_virtual_tenants | eval keyid=_key\n| where tenant_status=\"enabled\" AND (tenant_dsm_enabled=1 OR tenant_dhm_enabled=1) AND tenant_replica=0\n| stats count by tenant_id\n| sort 0 tenant_id",
"queryParameters": {
"earliest": "-5m",
"latest": "now"
}
},
"name": "populate_tenants"
},
"ds_diTMqSWx": {
"type": "ds.search",
"options": {
"query": "`trackme_audit_idx` tenant_id=$tk_tenant$ object_category=$tk_component$ \"automated adaptive delay update\"\n| table _time, tenant_id, object_category, object, action, comment\n| sort - 0 _time | trackmeprettyjson fields=comment",
"queryParameters": {
"earliest": "$global_time.earliest$",
"latest": "$global_time.latest$"
}
},
"name": "audit_adaptive_table"
},
"ds_o8rZrPBE_ds_UpugjNjy": {
"type": "ds.search",
"options": {
"query": "`trackme_audit_idx` tenant_id=$tk_tenant$ object_category=$tk_component$ \"automated adaptive delay update\"\n| table _time, tenant_id, object_category, object, action, change_type, comment\n| sort - 0 _time | trackmeprettyjson fields=comment\n| spath input=comment\n| rename results.adaptive_delay as adaptive_delay\n| timechart minspan=1m useother=f limit=40 first(adaptive_delay) as adaptive_delay by object",
"queryParameters": {
"earliest": "$global_time.earliest$",
"latest": "$global_time.latest$"
}
},
"name": "overtime_adaptive_actions"
}
},
"visualizations": {
"viz_table_1": {
"type": "splunk.table",
"options": {
"columnFormat": {
"log_level": {
"data": "> table | seriesByName(\"log_level\") | formatByType(log_levelColumnFormatEditorConfig)",
"rowColors": "> table | seriesByName(\"log_level\") | matchValue(log_levelRowColorsEditorConfig)"
}
},
"count": 100
},
"context": {
"log_levelColumnFormatEditorConfig": {
"string": {
"unitPosition": "after"
}
},
"log_levelRowColorsEditorConfig": [
{
"match": "WARNING",
"value": "#DD9900"
},
{
"match": "INFO",
"value": "#00CDAF"
},
{
"match": "ERROR",
"value": "#FF677B"
},
{
"match": "DEBUG",
"value": "#009CEB"
}
]
},
"dataSources": {
"primary": "ds_search_1"
},
"title": "Logging:",
"description": "This shows logged events from the adaptive delay threshold backend"
},
"viz_dtUfQMrD": {
"type": "splunk.column",
"options": {
"stackMode": "stacked",
"seriesColorsByField": "{\"ERROR\": \"#FF677B\", \"WARNING\": \"#DD9900\", \"INFO\": \"#00CDAF\", \"DEBUG\": \"#009CEB\"}"
},
"dataSources": {
"primary": "ds_UpugjNjy"
},
"title": "Logging: events by logging level over time",
"description": "This shows events over time shown by their logging level"
},
"viz_NmxZjn2m": {
"type": "splunk.image",
"options": {
"preserveAspectRatio": true,
"src": "../../static/app/trackme/icons/trackme.png"
}
},
"viz_QmsYbcgT": {
"type": "splunk.table",
"options": {
"columnFormat": {
"action": {
"data": "> table | seriesByName(\"action\") | formatByType(actionColumnFormatEditorConfig)",
"rowColors": "> table | seriesByName(\"action\") | matchValue(actionRowColorsEditorConfig)"
},
"comment": {
"data": "> table | seriesByName(\"comment\") | formatByType(commentColumnFormatEditorConfig)"
}
},
"count": 100
},
"context": {
"actionColumnFormatEditorConfig": {
"string": {
"unitPosition": "after"
}
},
"actionRowColorsEditorConfig": [
{
"match": "WARNING",
"value": "#DD9900"
},
{
"match": "success",
"value": "#00CDAF"
},
{
"match": "failure",
"value": "#FF677B"
},
{
"match": "DEBUG",
"value": "#009CEB"
}
],
"commentColumnFormatEditorConfig": {
"string": {
"unitPosition": "after"
}
}
},
"dataSources": {
"primary": "ds_diTMqSWx"
},
"title": "Adaptive delay threshold audit update traces"
},
"viz_WWQmnNzo": {
"type": "splunk.column",
"dataSources": {
"primary": "ds_o8rZrPBE_ds_UpugjNjy"
},
"title": "Adaptive response actions by object",
"description": "This shows actions performed by the adaptive delay backend and the threshold value defined",
"options": {
"dataValuesDisplay": "all",
"xAxisTitleVisibility": "hide",
"yAxisTitleText": "Threshold seconds"
}
},
"viz_2O9rRdJE": {
"type": "splunk.markdown",
"options": {
"markdown": "# Adaptive Delay Thresholding in TrackMe\n## Behavior:\n- The adaptive threshold tracker monitors the status of feed entities currently in alert due to delay threshold breach (anomaly_reason=delay_threshold_breached). \n- This tracker invokes the command *trackmesplkadaptivedelay* for entities matching specific conditions, which then investigates historical metrics collected by TrackMe.\n\n## Dynamic Threshold Logic Attribution:\n\nAs a basis, TrackMe automatically runs the following mstats search (over 90 days of metrics):\n\n```\n| mstats latest(trackme.splk.feeds.lag_event_sec) as lag_event_sec where `trackme_metrics_idx(mytenant)` tenant_id=\"mytenant\" object_category=\"splk-dsm\" object=\"myobject\" OR object=\"myobject2\" by tenant_id, object_category, object span=5m\n| stats perc95(lag_event_sec) as perc95_lag_event_sec, max(lag_event_sec) as max_lag_event_sec by object\n| foreach *lag_event_sec [ eval <<FIELD>> = round('<<FIELD>>', 0) ]\n| eval perc95_days_unit = perc95_lag_event_sec/86400, perc95_duration = tostring(perc95_lag_event_sec, \"duration\")\n| lookup trackme_dsm_tenant_01-feeds object OUTPUT data_last_lag_seen as current_lag_event_sec\n| where current_lag_event_sec>7200\n| eval diff_perc95 = max_lag_event_sec-perc95_lag_event_sec, diff_duration=tostring(diff_perc95, \"duration\"), diff_proportion=round(diff_perc95/perc95_lag_event_sec*100, 2)\n| where diff_proportion<25\n| eval adaptive_delay = round(max_lag_event_sec/3600, 0) * 3600, adaptive_delay_duration = tostring(adaptive_delay, \"duration\")\n```\n\nTrackMe later on reviews previously updated entities using more sophisticated variations of this logic.\n\n### Further logic and previously processed review:\n\n- TrackMe reviews previously updated entities automatically.\n- Entities updated since less than 4 hours are temporarily ignored.\n- Entities updated since more than 4 hours and within the 24 hours and where the treshold was increased are reviewed for further update.\n- Beyond these conditions, entities updated since the past 7 days are reviewed and updated depending on the conditions.\n\n### Key Tracker Level Arguments: \n\n#### min_delay_sec\n\n- This defines the minimum delay value in seconds for entities to be considered (2 hours by default).\n\n#### max_auto_delay_sec\n\n- This defines the maximal delay value that the adaptive backend can set, if the automated delay calculation go beyond it, this value will be used instead, expressed in seconds.\n\n#### max_changes_past_7days\n\n- This defines the maximal number of changes that can be performed in a 7 days time frame, once reached we will not update this entity again until the counter is reset.\n\n#### min_historical_metrics_days\n\n- The minimal number of accumulated days of metrics before we start updating the delay threshold, expressed in days.\n\n#### review_period_no_days\n\n- The relative time period for review. When entities were updated, TrackMe will review over time the behaviour and eventually adapt the threshold to take into accoount new patterns, expressed in number of days, valid options: 7, 15, 30.\n\n### Updating Delay Thresholds Automatically:\n\n- After performing these investigations, the command updates the delay threshold value for selected entities, and generates an audit record with corresponding results (context: automated adaptive delay update).\n- Audit messages can be found with the following search:\n\n*Example:*\n\n```\n`trackme_audit_idx` tenant_id=* \"automated adaptive delay update\"\n| table _time, tenant_id, object_category, object, action, comment\n| sort - 0 _time | trackmeprettyjson fields=comment\n```\n\nActivity log traces can be found in:\n\n```\nindex=_internal sourcetype=trackme:custom_commands:trackmesplkadaptivedelay\n```\n\n### Preventing an Entity from Being Automatically Managed\n\n- Via the UI, you can set the value of ``allow_adaptive_delay`` to False, which prevents TrackMe from automatically updating the delay threshold for a given entity."
}
},
"viz_NUqP7Fjk": {
"type": "abslayout.line",
"options": {
"strokeDasharray": 4
}
},
"viz_XMHDnORn": {
"type": "abslayout.line",
"options": {
"strokeDasharray": 4
}
},
"viz_IuV33TS1": {
"type": "splunk.markdown",
"options": {
"markdown": "# Adaptive threshold audit traces"
}
},
"viz_IiBC8GdB": {
"type": "splunk.markdown",
"options": {
"markdown": "# Adaptive threshold logging traces"
}
},
"viz_eCsTg4eC": {
"type": "abslayout.line",
"options": {
"strokeDasharray": 4
}
}
},
"inputs": {
"input_global_trp": {
"type": "input.timerange",
"options": {
"token": "global_time",
"defaultValue": "-24h@h,now"
},
"title": "Global Time Range:"
},
"input_kquudf7q": {
"options": {
"items": ">frame(label, value) | prepend(formattedStatics) | objects()",
"defaultValue": "*",
"token": "tk_tenant"
},
"title": "Tenant:",
"type": "input.dropdown",
"dataSources": {
"primary": "ds_yHwHGBpa"
},
"context": {
"formattedConfig": {
"number": {
"prefix": ""
}
},
"formattedStatics": ">statics | formatByType(formattedConfig)",
"statics": [
[
"All"
],
[
"*"
]
],
"label": ">primary | seriesByName(\"tenant_id\") | renameSeries(\"label\") | formatByType(formattedConfig)",
"value": ">primary | seriesByName(\"tenant_id\") | renameSeries(\"value\") | formatByType(formattedConfig)"
}
},
"input_xdlNmvhR": {
"options": {
"items": [
{
"label": "All",
"value": "*"
},
{
"label": "splk-dsm",
"value": "splk-dsm"
},
{
"label": "splk-dhm",
"value": "splk-dhm"
}
],
"defaultValue": "*",
"token": "tk_component"
},
"title": "Component:",
"type": "input.dropdown"
}
},
"layout": {
"type": "absolute",
"options": {
"display": "auto-scale",
"width": 1330,
"height": 3400
},
"structure": [
{
"item": "viz_table_1",
"type": "block",
"position": {
"x": 10,
"y": 2760,
"w": 1310,
"h": 600
}
},
{
"item": "viz_dtUfQMrD",
"type": "block",
"position": {
"x": 10,
"y": 2320,
"w": 1310,
"h": 430
}
},
{
"item": "viz_NmxZjn2m",
"type": "block",
"position": {
"x": 1190,
"y": -90,
"w": 120,
"h": 300
}
},
{
"item": "viz_QmsYbcgT",
"type": "block",
"position": {
"x": 10,
"y": 1490,
"w": 1310,
"h": 720
}
},
{
"item": "viz_WWQmnNzo",
"type": "block",
"position": {
"x": 10,
"y": 1040,
"w": 1310,
"h": 430
}
},
{
"item": "viz_2O9rRdJE",
"type": "block",
"position": {
"x": 10,
"y": 30,
"w": 1310,
"h": 920
}
},
{
"item": "viz_NUqP7Fjk",
"type": "line",
"position": {
"from": {
"x": 7,
"y": 16
},
"to": {
"x": 1323,
"y": 15
}
}
},
{
"item": "viz_XMHDnORn",
"type": "line",
"position": {
"from": {
"x": 9,
"y": 968
},
"to": {
"x": 1322,
"y": 968
}
}
},
{
"item": "viz_IuV33TS1",
"type": "block",
"position": {
"x": 10,
"y": 980,
"w": 510,
"h": 40
}
},
{
"item": "viz_IiBC8GdB",
"type": "block",
"position": {
"x": 10,
"y": 2260,
"w": 510,
"h": 40
}
},
{
"item": "viz_eCsTg4eC",
"type": "line",
"position": {
"from": {
"x": 10,
"y": 2246
},
"to": {
"x": 1325,
"y": 2245
}
}
}
],
"globalInputs": [
"input_global_trp",
"input_kquudf7q",
"input_xdlNmvhR"
]
},
"title": "TrackMe - Adaptive delay threshold audit (global audit)",
"defaults": {
"dataSources": {
"ds.search": {
"options": {
"queryParameters": {
"latest": "$global_time.latest$",
"earliest": "$global_time.earliest$"
}
}
}
}
},
"description": "This dashboards audits the activity and behaviour of the adaptive delay thresholding for TrackMe feeds components"
}
]]> </definition>
<meta type="hiddenElements"><![CDATA[
{
"hideEdit": false,
"hideOpenInSearch": false,
"hideExport": false
}
]]> </meta>
</dashboard>