Deploiement_Server/deployment-apps/Splunk_ML_Toolkit/default/searchbnf.conf

[fit-command]
syntax = fit <algorithm> (<option_name>=<option_value>)* (<field>)+ (from (<field>)+)? (into <model_name>)? (as <output_field>)?
shortdesc = Fit and apply a machine learning model to search results.
description = The first argument, which is required, is the algorithm to use. \
             The following algorithms are available: \i\\
             <LinearRegression>, <LogisticRegression>, <SVM>, <PCA>, <KMeans>, <DBSCAN>, \
             <Birch>, <ElasticNet>, <Lasso>, <Ridge>, <FieldSelector>, <BernoulliNB>, \
             <GaussianNB>, <KernelPCA>, <OneClassSVM>, <RandomForestClassifier>, \
             <RandomForestRegressor>, <SpectralClustering>, <StandardScaler>, <TFIDF> \p\\
             All algorithms require a list of fields to use when learning a model. \
             For classification and regression algorithms, the first \
             field is the field to predict (i.e. response field), \
             followed by the keyword "from". \
             Subsequent fields are the fields to use when \
             making predictions (i.e. explanatory fields). \
             For unsupervised learning (e.g. <KMeans>) and transforming algorithms \
             (e.g. <TFIDF>), the fields given \
             are the fields over which to learn the model (e.g. for <KMeans>, \
             which fields to cluster over). \p\\
             Use the "as" keyword to rename the field added to \
             search results by the model. \p\\
             Use the "into" keyword to store the learned model \
             in an artifact that can be applied later to new search \
             results with the "apply" command. Not all algorithms support saved models. \p\\
             Some algorithms support options that can be given as <name=value> \
             arguments. For instance, <KMeans> and <PCA> both support a \
             "k" option that specifies how many clusters or how many \
             principal components to learn.
comment1 = Fit a LinearRegression model to predict errors using _time:
example1 = ... | fit LinearRegression errors from _time
comment2 = Fit a LinearRegression model and save the results as a model named "errors_over_time":
example2 = ... | fit LinearRegression errors from _time into errors_over_time
comment3 = Fit a LogisticRegression model to predict a categorical response from numerical measurements:
example3 = ... | fit LogisticRegression species from petal_length petal_width sepal_length sepal_width
usage = public
related = apply summary listmodels deletemodel

[apply-command]
syntax = apply <model_name> (as <output_field>)?
shortdesc = Applies a machine learning model that was learned using the "fit" command.
description = This command can be used on different search \
            results than those used when fitting the model, but the results \
            should have an identical list of fields.\p\\
            Use the "as" keyword to rename the field added to the search results by the model.
comment1 = Apply a previously-learned model:
example1 = ... | apply errors_over_time
comment2 = Rename the output of the model as "predicted_errors":
example2 = ... | apply errors_over_time as predicted_errors
usage = public
related = fit summary listmodels deletemodel

[score-command]
syntax = score <scoring method> (<option_name>=<option_value>)* (<field>)+ (against (<field>)+)?
shortdesc = Returns a score of fields according to specified scoring method.
description = The score command calculates the score in the fields
comment1 = Perform an independent t-test between two groups of fields
example1 = ... | score ttest_ind petal_length petal_width against sepal_length sepal_width
comment2 = Calculate the accuracy score using the ground truth field and the predicted field
example2 = ... | score accuracy_score species against predicted(species)
comment3 = Get basic statistical quantities for a set of fields
example3 = ... | score describe petal_length petal_width sepal_length sepal_width
usage = public
related = fit stats

[summary-command]
syntax = summary <model_name>
shortdesc = Returns a summary of a machine learning model that was learned using the "fit" command.
description = The summary is algorithm specific. For <LinearRegression>, \
            the summary is a list of coefficients. For \
            <LogisticRegression>, the summary is a list of \
            coefficients for each class.
comment1 = Inspect a previously-learned LinearRegression model, "errors_over_time":
example1 = | summary errors_over_time
usage = public
related = fit apply listmodels deletemodel

[listmodels-command]
syntax = listmodels
shortdesc = Returns a list of machine learning models that were learned using the "fit" command.
description = For each model, this command displays the algorithm and arguments given when \
            "fit" was invoked. Use the "deletemodel" command to delete a model.
comment1 = List all models:
example1 = | listmodels
usage = public
related = fit apply summary deletemodel

[deletemodel-command]
syntax = deletemodel <model_name>
shortdesc = Deletes a machine learning model that was learned using the "fit" command.
description = List all models using the "listmodels" command.
comment1 = Delete the "errors_over_time" model:
example1 = | deletemodel errors_over_time
usage = public
related = fit apply summary listmodels

##################
# sample
##################
[sample-command]
syntax = sample (ratio=<float between 0 and 1>)? (count=<positive integer>)? (proportional=<name of numeric field> (inverse)?)? (partitions=<natural number greater than 1> (fieldname=<string>)?)? (seed=<number>)? (by <split_by_field>)?
shortdesc = Randomly samples or partitions events.
description = The "sample" command samples in different modes: ratio (returns an event with the given probability), count (returns exactly that number of events), and proportional (samples each event with a probability specified by a field value).\p\The "sample" command uses partition mode to randomly divide events into a given number of partitions.
comment1 = Retrieve approximately 1% of events at random:
example1 = ... | sample ratio=0.01
comment2 = Retrieve exactly 20 events at random from each host:
example2 = ... | sample count=20 by host
comment3 = Return each event with a probability determined by the value of "some_field":
example3 = ... | sample proportional="some_field"
comment4 = Partition events into 7 groups, with the chosen group returned in a field called partition_number:
example4 = ... | sample partitions=7 fieldname="partition_number"
usage = public
tags = sample random sampling partition

[sample-seed-option]
syntax = seed=<number>
description = The random seed. If unspecified, a pseudorandom value is used.

[sample-ratio-option]
syntax = ratio=<float between 0 and 1>
description = The probability with which to sample.

[sample-count-option]
syntax = count=<positive integer>
description = The number of randomly-chosen events to return. If the sample count exceeds the total number of events in the search, all events are returned.

[sample-by-option]
syntax = by <split_by_field>
description = When in count mode with parameter k, adding a split-by clause returns exactly k events for each value of the specified field. If there are j < k events for some values, all j events are included in the results.

[sample-proportional-option]
syntax = proportional=<name of numeric field>
description = The field to use for determining the sampling probability of each event.

[sample-inverse-option]
syntax = inverse
description = In proportional sampling mode, returns samples with one minus the probability specified in the proportional field.

[sample-partitions-option]
syntax = partitions=<natural number greater than 1>
description = The number of partitions to create.

[sample-fieldname-option]
syntax = fieldname=<string>
description = The name of the field in which to store the partition number. Defaults to 'partition_number'.