You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
339 lines
11 KiB
339 lines
11 KiB
#!/usr/bin/env python
|
|
import json
|
|
import re
|
|
from ast import literal_eval
|
|
|
|
from .base_util import is_valid_identifier
|
|
|
|
# Global parameter-parsing regex (scoring and fitting)
|
|
params_re = re.compile(r"([_a-zA-Z][_a-zA-Z0-9]*)\s*=\s*(.*)")
|
|
|
|
|
|
def is_truthy(s):
|
|
return str(s).lower() in ['1', 't', 'true', 'y', 'yes', 'enable', 'enabled']
|
|
|
|
|
|
def is_falsy(s):
|
|
return str(s).lower() in ['0', 'f', 'false', 'n', 'no', 'disable', 'disabled']
|
|
|
|
|
|
def booly(s):
|
|
if is_truthy(s):
|
|
return True
|
|
elif is_falsy(s):
|
|
return False
|
|
|
|
raise RuntimeError('Failed to convert "%s" to a boolean value' % str(s))
|
|
|
|
|
|
def unquote_arg(arg):
|
|
if len(arg) > 0 and (arg[0] == "'" or arg[0] == '"') and arg[0] == arg[-1]:
|
|
return arg[1:-1]
|
|
return arg
|
|
|
|
|
|
def _comma_separated_to_list(arg):
|
|
return arg.strip(',').split(',')
|
|
|
|
|
|
def convert_params(
|
|
params,
|
|
floats=None,
|
|
ints=None,
|
|
strs=None,
|
|
bools=None,
|
|
ranges=None,
|
|
multiple_floats=None,
|
|
aliases=None,
|
|
ignore_extra=False,
|
|
):
|
|
"""Convert key-value pairs into their types & error accordingly."""
|
|
|
|
def _assign_default(obj, is_array=True, is_dict=False):
|
|
if obj is None:
|
|
if is_array:
|
|
return []
|
|
if is_dict:
|
|
return {}
|
|
else:
|
|
raise RuntimeError("Must enable is_array or is_dict")
|
|
return obj
|
|
|
|
floats = _assign_default(floats)
|
|
ints = _assign_default(ints)
|
|
strs = _assign_default(strs)
|
|
bools = _assign_default(bools)
|
|
ranges = _assign_default(ranges)
|
|
multiple_floats = _assign_default(multiple_floats)
|
|
aliases = _assign_default(aliases, is_array=False, is_dict=True)
|
|
out_params = {}
|
|
for p in params:
|
|
op = aliases.get(p, p)
|
|
if p in floats:
|
|
try:
|
|
out_params[op] = float(params[p])
|
|
except:
|
|
raise RuntimeError("Invalid value for %s: must be a float" % p)
|
|
elif p in ints:
|
|
try:
|
|
out_params[op] = int(params[p])
|
|
except:
|
|
raise RuntimeError("Invalid value for %s: must be an int" % p)
|
|
elif p in strs:
|
|
out_params[op] = str(unquote_arg(params[p]))
|
|
if len(out_params[op]) == 0:
|
|
raise RuntimeError("Invalid value for %s: must be a non-empty string" % p)
|
|
elif p in bools:
|
|
try:
|
|
out_params[op] = booly(params[p])
|
|
except RuntimeError:
|
|
raise RuntimeError("Invalid value for %s: must be a boolean" % p)
|
|
elif p in ranges:
|
|
try:
|
|
out_params[op] = tuple(int(i) for i in params[p].split('-'))
|
|
if len(out_params[op]) != 2:
|
|
raise RuntimeError
|
|
except:
|
|
raise RuntimeError("Invalid value for %s: must be a range e.g. %s=1-5" % (p, p))
|
|
elif p in multiple_floats:
|
|
param = params[p]
|
|
try:
|
|
out_params[op] = (float(param),)
|
|
except:
|
|
try:
|
|
out_params[op] = tuple(
|
|
float(i) for i in (str(unquote_arg(param))).split(',')
|
|
)
|
|
except:
|
|
raise RuntimeError(
|
|
'Invalid value for {}: must have one or multiple float values e.g. {}="0.01,0.02,0.03".'.format(
|
|
p, p
|
|
)
|
|
)
|
|
elif not ignore_extra:
|
|
raise RuntimeError("Unexpected parameter: %s" % p)
|
|
|
|
return out_params
|
|
|
|
|
|
def parse_namespace_model_name(model_name):
|
|
namespace = 'user'
|
|
is_onnx = False
|
|
if ':' in model_name:
|
|
try:
|
|
values = model_name.split(':')
|
|
if len(values) == 3:
|
|
if values[1].lower() == 'onnx':
|
|
namespace, real_model_name, is_onnx = values[0], values[2], True
|
|
else:
|
|
raise RuntimeError('You may only specify `onnx` keyword after namespace:')
|
|
elif len(values) == 2:
|
|
namespace, real_model_name = values[0], values[1]
|
|
namespace = namespace.lower()
|
|
# Handle scenario where namespace is read incorrectly as onnx
|
|
if namespace == 'onnx':
|
|
is_onnx = True
|
|
namespace = 'user'
|
|
else:
|
|
raise RuntimeError()
|
|
except Exception:
|
|
raise RuntimeError(
|
|
'Invalid model name: you may have at most one ":" separating your namespace and model name, e.g. '
|
|
'"app:example_model_name"'
|
|
)
|
|
else:
|
|
real_model_name = model_name
|
|
namespace = namespace.lower()
|
|
|
|
if not is_valid_identifier(real_model_name):
|
|
raise RuntimeError('Invalid model name "%s"' % real_model_name)
|
|
|
|
if namespace not in ['user', 'app']:
|
|
raise RuntimeError('You may only specify namespace "app", "user" .')
|
|
|
|
if is_onnx:
|
|
real_model_name = f"{real_model_name}.onnx"
|
|
|
|
return namespace, real_model_name
|
|
|
|
|
|
def parse_args(argv):
|
|
options = {}
|
|
|
|
from_seen = False
|
|
|
|
while argv:
|
|
arg = argv.pop(0)
|
|
if arg.lower() == 'into':
|
|
if 'model_name' in options:
|
|
raise RuntimeError('Syntax error: you may specify "into" only once')
|
|
|
|
try:
|
|
raw_model_name = unquote_arg(argv.pop(0))
|
|
except:
|
|
raise RuntimeError('Syntax error: "into" keyword requires argument')
|
|
options['namespace'], options['model_name'] = parse_namespace_model_name(
|
|
raw_model_name
|
|
)
|
|
if len(options['model_name']) == 0 or len(options['namespace']) == 0:
|
|
raise RuntimeError('Syntax error: "into" keyword requires argument')
|
|
elif arg.lower() == 'by':
|
|
if 'split_by' in options:
|
|
raise RuntimeError('Syntax error: you may specify "by" only once')
|
|
|
|
try:
|
|
split_by = unquote_arg(argv.pop(0))
|
|
assert len(split_by) > 0
|
|
except:
|
|
raise RuntimeError('Syntax error: "by" keyword requires argument')
|
|
options['split_by'] = _comma_separated_to_list(split_by)
|
|
elif arg.lower() == 'as':
|
|
if 'output_name' in options:
|
|
raise RuntimeError('Syntax error: you may specify "as" only once')
|
|
|
|
try:
|
|
options['output_name'] = unquote_arg(argv.pop(0))
|
|
assert len(options['output_name']) > 0
|
|
except:
|
|
raise RuntimeError('Syntax error: "as" keyword requires argument')
|
|
elif arg.lower() == 'from' or arg == "~":
|
|
if from_seen:
|
|
raise RuntimeError('Syntax error: you may specify "from" only once')
|
|
|
|
options.setdefault('feature_variables', [])
|
|
if len(options['feature_variables']) > 0:
|
|
options['target_variable'] = options.pop('feature_variables')
|
|
|
|
from_seen = True
|
|
else:
|
|
m = params_re.match(arg)
|
|
if m:
|
|
params = options.setdefault('params', {})
|
|
params[m.group(1)] = m.group(2)
|
|
else:
|
|
arg = unquote_arg(arg)
|
|
if len(arg) == 0:
|
|
continue
|
|
args = options.setdefault('args', [])
|
|
args.append(arg)
|
|
|
|
variables = options.setdefault('feature_variables', [])
|
|
assert isinstance(arg, str)
|
|
variables.append(arg)
|
|
|
|
return options
|
|
|
|
|
|
def parse_score_args(argv):
|
|
options = {}
|
|
against_seen = False
|
|
|
|
while argv:
|
|
arg = argv.pop(0)
|
|
if arg.lower() == 'against' or arg == "~":
|
|
# For two-array scoring methods, syntax is ..| score <scoring_method> a1 a2 .. AGAINST b1 b2 ..
|
|
# For single-array methods, syntax is ..| score <scoring_method> a1 a2 ..
|
|
if against_seen:
|
|
raise RuntimeError('Syntax error: you may specify "against" only once.')
|
|
|
|
options.setdefault('b_variables', [])
|
|
if len(options['b_variables']) > 0:
|
|
# All fields before "AGAINST" are a_variables, after are b_variables.
|
|
options['a_variables'] = options.pop('b_variables')
|
|
|
|
against_seen = True
|
|
|
|
else:
|
|
m = params_re.match(arg)
|
|
if m:
|
|
params = options.setdefault('params', {})
|
|
params[m.group(1)] = m.group(2)
|
|
else:
|
|
arg = unquote_arg(arg)
|
|
if len(arg) == 0:
|
|
continue
|
|
args = options.setdefault('args', [])
|
|
args.append(arg)
|
|
|
|
# Append to 'b_variables' until 'against' is seen
|
|
variables = options.setdefault('b_variables', [])
|
|
assert isinstance(arg, str)
|
|
variables.append(arg)
|
|
|
|
if 'a_variables' not in options:
|
|
# If "AGAINST" is not provided, set all fields to a_variables.
|
|
options.setdefault('a_variables', options.pop('b_variables', []))
|
|
|
|
# Set b_variables to be an empty list by default
|
|
options.setdefault('b_variables', [])
|
|
return options
|
|
|
|
|
|
def missing_keys_in_dict(keys, dct):
|
|
"""
|
|
Return missing keys in the specified dict from the list of keys passed in
|
|
|
|
Args:
|
|
keys (list or tuple): list of keys (strings)
|
|
dct (dict): a dict to check for the keys in
|
|
|
|
Returns:
|
|
(list): missing keys from the dict
|
|
|
|
"""
|
|
return [key for key in keys if key not in dct]
|
|
|
|
|
|
def object_to_dict(obj):
|
|
"""
|
|
Convert an object structure to a dict
|
|
|
|
i.e. attributes -> dict keys
|
|
|
|
Args:
|
|
obj (object): Python object
|
|
|
|
Returns:
|
|
(dict): dict with all attributes converted to keys
|
|
|
|
"""
|
|
return obj.__dict__
|
|
|
|
|
|
def unicode_to_str_in_dict(dct):
|
|
"""
|
|
Convert keys and values of a dict that are unicode to str
|
|
|
|
Args:
|
|
dct (dict): dict whose keys and values may be of type unicode
|
|
|
|
Returns:
|
|
(dict): a new dict with the unicode keys and values converted to str
|
|
"""
|
|
return literal_eval(json.dumps(dct))
|
|
|
|
|
|
def get_param_choice(params, param_name, value_choices, default_value):
|
|
"""
|
|
Get the value of a parameter from a dict of parameter names and values.
|
|
If the parameter name is None, return a default value. Otherwise, return its value
|
|
from the dict. In addition, check that the value must belong to a given list. If not,
|
|
raise a RuntimeError.
|
|
|
|
Args:
|
|
params (dict): the keys are the parameter names, and the values are parameter values
|
|
param_name (str): name of the parameter of interest
|
|
param_choice (list): the list of acceptable parameter values
|
|
default_choice (str): default value to return in case param_name is None
|
|
|
|
Returns:
|
|
(str) value of parameter given by param_name
|
|
"""
|
|
if param_name is None:
|
|
return default_value
|
|
param_val = params.get(param_name, default_value)
|
|
if param_val not in value_choices:
|
|
choices = ", ".join(value_choices)
|
|
raise RuntimeError(f"Invalid value error: '{param_name}' must be one of {choices}")
|
|
return param_val
|