Providing quick access to R.E.D. metrics from your Splunk APM | sim flow query="def weighted_duration(base, p, filter_, groupby): error_durations = data(base + '.duration.ns.' + p, filter=filter_ and filter('sf_error', 'true'), rollup='max').mean(by=groupby, allow_missing=['sf_httpMethod']) non_error_durations = data(base + '.duration.ns.' + p, filter=filter_ and filter('sf_error', 'false'), rollup='max').mean(by=groupby, allow_missing=['sf_httpMethod']) error_counts = data(base + '.count', filter=filter_ and filter('sf_error', 'true'), rollup='sum').sum(by=groupby, allow_missing=['sf_httpMethod']) non_error_counts = data(base + '.count', filter=filter_ and filter('sf_error', 'false'), rollup='sum').sum(by=groupby, allow_missing=['sf_httpMethod']) error_weight = (error_durations * error_counts).sum(over='1m') non_error_weight = (non_error_durations * non_error_counts).sum(over='1m') total_weight = combine((error_weight if error_weight is not None else 0) + (non_error_weight if non_error_weight is not None else 0)) total = combine((error_counts if error_counts is not None else 0) + (non_error_counts if non_error_counts is not None else 0)).sum(over='1m') return (total_weight / total) filter_ = filter('sf_environment', '*') and filter('sf_service', '*') and filter('sf_error','*') and not filter('sf_dimensionalized', '*') groupby = ['sf_service', 'sf_environment', 'sf_error'] weighted_duration('service.request', 'median', filter_, groupby).publish(label='medianLatency')" | search sf_service = "$o11y_sf_service$" | stats avg(_value) as medianLatency by sf_environment $earliest$ $latest$ 1 | mstats avg(*) span=5m WHERE `itsi-cp-observability-indexes` AND sf_streamLabel="thruput_avg_rate" GROUPBY sf_service sf_environment | rename avg(service.request.count) as "thruput_avg_rate" | search sf_service = "$o11y_sf_service$" | eval thruput_avg_rate = thruput_avg_rate | timechart avg(thruput_avg_rate) by sf_service $earliest$ $latest$ 1 | sim flow query="filter_ = filter('sf_environment', '*') and filter('sf_service', '*') and (not filter('sf_dimensionalized', '*')) Errors = data('service.request.count', filter=filter_ and filter('sf_error', 'true'), rollup='delta').sum(by=['sf_environment', 'sf_service']).publish(label='Errors', enable=False) Requests = data('service.request.count', filter=filter_, rollup='delta').sum(by=['sf_environment', 'sf_service']).publish(label='Requests', enable=False) ErrorRate = combine(100*((Errors if Errors is not None else 0) / Requests)).publish(label='ErrorRate', enable=True)" | search sf_service = "$o11y_sf_service$" | stats avg(_value) as ErrorRate by _time $earliest$ $latest$ 1
-24h@h now sf_service sf_service | mstats avg(*) span=5m WHERE `itsi-cp-observability-indexes` AND sf_environment="*" GROUPBY sf_service |dedup sf_service |table sf_service |sort sf_service asc -15m now All *
RATE: Splunk APM (SINGLE VALUE) Request rate | mstats avg(*) span=5m WHERE `itsi-cp-observability-indexes` AND sf_streamLabel="thruput_avg_rate" GROUPBY sf_service sf_environment | rename avg(service.request.count) as "thruput_avg_rate" | search sf_service = "$o11y_sf_service$" | eval thruput_avg_rate = thruput_avg_rate | stats avg(thruput_avg_rate) as DurationRate -24h@h now RATE: Splunk APM Rate as throughput over time ERROR: Splunk APM (SINGLE VALUE) | sim flow query="filter_ = filter('sf_environment', '*') and filter('sf_service', '*') and (not filter('sf_dimensionalized', '*')) Errors = data('service.request.count', filter=filter_ and filter('sf_error', 'true'), rollup='delta').sum(by=['sf_environment', 'sf_service']).publish(label='Errors', enable=False) Requests = data('service.request.count', filter=filter_, rollup='delta').sum(by=['sf_environment', 'sf_service']).publish(label='Requests', enable=False) ErrorRate = combine(100*((Errors if Errors is not None else 0) / Requests)).publish(label='ErrorRate', enable=True)" | search sf_service = "$o11y_sf_service$" | stats avg(_value) as ErrorRate ERROR: Splunk APM Error percentage over time DURATION: Splunk APM (SINGLE VALUE) | sim flow query="def weighted_duration(base, p, filter_, groupby): error_durations = data(base + '.duration.ns.' + p, filter=filter_ and filter('sf_error', 'true'), rollup='max').mean(by=groupby, allow_missing=['sf_httpMethod']) non_error_durations = data(base + '.duration.ns.' + p, filter=filter_ and filter('sf_error', 'false'), rollup='max').mean(by=groupby, allow_missing=['sf_httpMethod']) error_counts = data(base + '.count', filter=filter_ and filter('sf_error', 'true'), rollup='sum').sum(by=groupby, allow_missing=['sf_httpMethod']) non_error_counts = data(base + '.count', filter=filter_ and filter('sf_error', 'false'), rollup='sum').sum(by=groupby, allow_missing=['sf_httpMethod']) error_weight = (error_durations * error_counts).sum(over='1m') non_error_weight = (non_error_durations * non_error_counts).sum(over='1m') total_weight = combine((error_weight if error_weight is not None else 0) + (non_error_weight if non_error_weight is not None else 0)) total = combine((error_counts if error_counts is not None else 0) + (non_error_counts if non_error_counts is not None else 0)).sum(over='1m') return (total_weight / total) filter_ = filter('sf_environment', '*') and filter('sf_service', '*') and filter('sf_error','*') and not filter('sf_dimensionalized', '*') groupby = ['sf_service', 'sf_environment', 'sf_error'] weighted_duration('service.request', 'median', filter_, groupby).publish(label='medianLatency')" | search sf_service = "$o11y_sf_service$" | stats avg(_value) as medianLatency | eval medianLatency=medianLatency/100000000/300 -24h@h now DURATION: Splunk APM Duration as median latency by environment | eval medianLatency=medianLatency/10000000