You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

902 lines
32 KiB

# coding=utf-8
#
# Copyright © Splunk, Inc. All Rights Reserved.
""" app_configuration_spec module
The app_configuration_spec module defines this class hierarchy:
.. code-block::
AppConfigurationSpec(NamedObject)
|
└-> stanzas: (name: string, AppConfigurationStanzaDeclaration(NamedObject))*
|
├-> pattern: SRE_Pattern
|
├-> documentation: AppConfigurationDocumentation*
|
├-> placement: AppConfigurationPlacement
|
├-> position: FilePosition
|
└-> settings: (name: string, AppConfigurationSettingDeclaration(NamedObject)*
|
├-> pattern: SRE_Pattern
|
├-> data_type: string
|
├-> position: FilePosition
|
├-> documentation: AppConfigurationDocumentation*
|
└-> placement: AppConfigurationPlacement
|
├-> forwarder: Boolean
|
├-> indexer: Boolean
|
├-> search_head: Boolean
|
└-> workloads: tuple(string{1,3})
"""
from __future__ import absolute_import, division, print_function, unicode_literals
from builtins import object
from collections import OrderedDict
from os import path
import re
import sys
from keyword import iskeyword
from . _configuration_validation_plugin import AppConfigurationValidationPlugin
from . _internal import FileBuffer, NamedObject
from .. utils import SlimLogger, encode_string, escape_non_alphanumeric_chars
from .. utils.internal import string
class AppConfigurationDocumentation(object):
def __init__(self, text, bulleted, indentation, line_spacing, position): # pylint: disable=too-many-arguments
self._text = text
self._bulleted = bulleted
self._indentation = indentation
self._line_spacing = line_spacing
self._position = position
def __str__(self):
spaces = ' ' * self._indentation
newlines = '\n' * self._line_spacing
return (spaces + '* ' + self._text + newlines) if self._bulleted else (spaces + self._text + newlines)
@property
def position(self):
return self._position
@property
def text(self):
return self._text
class AppConfigurationPlacement(object):
__slots__ = ('_workloads',)
def __new__(cls, workloads):
try:
workloads = cls._normalize_workloads(workloads)
except KeyError:
raise ValueError('Unrecognized placement: ' + ', '.join((string(workload) for workload in workloads)))
instance = cls._workloads_set[workloads]
if instance is not None:
return instance
if sys.version_info < (3, 0):
return super(AppConfigurationPlacement, cls).__new__(cls, workloads)
return super(AppConfigurationPlacement, cls).__new__(cls)
def __init__(self, workloads):
self._workloads = AppConfigurationPlacement._normalize_workloads(workloads)
AppConfigurationPlacement._workloads_set[self._workloads] = self
# region Special methods
def __repr__(self):
return repr(self._workloads)
def __str__(self):
return '@placement ' + ', '.join(self._workloads)
# endregion
# region Properties
all_workloads = None
@property
def forwarder(self):
return 'forwarder' in self._workloads
@property
def indexer(self):
return 'indexer' in self._workloads
@property
def search_head(self):
return 'search-head' in self._workloads
@property
def workloads(self):
return self._workloads
# endregion
# region Methods
def is_disjoint(self, other):
return not self.is_overlapping(other)
def is_overlapping(self, other):
if not isinstance(other, AppConfigurationPlacement):
other = AppConfigurationPlacement(other)
workloads = self.workloads
return any((workload in workloads for workload in other.workloads))
def to_dict(self):
names = self._serialization_names
return OrderedDict(((names[v], getattr(self, v)) for v in ('forwarder', 'indexer', 'search_head')))
def union(self, other):
if other is None:
return self
# noinspection PyProtectedMember
return AppConfigurationPlacement(other._workloads + self._workloads) # pylint: disable=protected-access
# endregion
# region Protected
@classmethod
def _normalize_workloads(cls, item):
return tuple(sorted((cls._synonymous_names[name] for name in frozenset(item)), reverse=True))
_serialization_names = {
'search_head': 'searchHead',
'forwarder': 'forwarder',
'indexer': 'indexer'}
_synonymous_names = {
'search-head': 'search-head', 'search_head': 'search-head', 'searchHead': 'search-head',
'forwarder': 'forwarder',
'indexer': 'indexer'}
_workloads_set = OrderedDict([
(('search-head',), None),
(('forwarder',), None),
(('indexer',), None),
(('indexer', 'forwarder'), None),
(('search-head', 'indexer'), None),
(('search-head', 'forwarder'), None),
(('search-head', 'indexer', 'forwarder'), None)
])
# endregion
AppConfigurationPlacement.all_workloads = AppConfigurationPlacement(('search-head', 'indexer', 'forwarder'))
class AppConfigurationSettingDeclaration(NamedObject):
def __init__(self, name):
NamedObject.__init__(self, name)
self._sections = OrderedDict()
self._declaration = None
# region Special methods
def __repr__(self):
arguments = (
'name=' + repr(self._name),
'data_type=' + repr(self.data_type),
'placement=' + repr(self.placement),
'position=' + repr(self.position)
)
return 'AppConfigurationSetting(' + ', '.join(arguments) + ')'
def __str__(self):
return self._declaration.__str__()
# endregion
# region Properties
@property
def data_type(self):
return self._declaration.data_type
@property
def documentation(self):
return self._declaration.documentation
@property
def pattern(self):
return self._declaration.pattern
@property
def placement(self):
return self._declaration.placement
@property
def position(self):
return self._declaration.position
# endregion
# region Methods
def add(self, section):
self._sections[section.name] = self._declaration = section
# endregion
class Section(NamedObject):
def __init__(self, name, data_type, placement, position):
NamedObject.__init__(self, name)
self._data_type = data_type
self._placement = placement
self._position = position
self._documentation = []
self._pattern = self._compile_pattern(name)
# region Special methods
def __repr__(self):
arguments = (
'name=' + repr(self._name),
'data_type=' + repr(self._data_type),
'placement=' + repr(self._placement),
'position=' + repr(self._position)
)
return 'AppConfigurationSetting.Section(' + ', '.join(arguments) + ')'
def __str__(self):
return self._name + ' = ' + self._data_type
# endregion
# region Properties
@property
def data_type(self):
return self._data_type
@property
def documentation(self):
return self._documentation
@property
def pattern(self):
return self._pattern
@property
def placement(self):
return self._placement
@property
def position(self):
return self._position
# endregion
# region Protected
# TODO: Refactor AppConfiguration{SettingDeclaration,StanzaDeclaration}._compile_pattern into shared/unique code
# Difference: stanza name patterns are a bit more complex on the _sub_replacement_pattern side and therefore
# have different replace(match) functions
def _compile_pattern(self, name):
def replace(match):
group_name = to_valid_identifier(match.expand(match.group(1)), match.start(1))
return r'(?P<' + group_name + '>.*?)'
def to_valid_identifier(group_name, start):
if len(group_name) == 0:
group_name = '__unnamed_group_' + string(start)
else:
group_name = self._sub_invalid_identifier_characters('_', group_name)
if group_name[0].isdigit() or iskeyword(group_name):
group_name = '_' + group_name
return group_name
# guards against compilation of embedded regular expressions
escaped_text = escape_non_alphanumeric_chars(name)
try:
pattern = re.compile(self._sub_replacement_pattern(replace, escaped_text) + r'\Z', re.M | re.U)
return pattern
except re.error as error:
SlimLogger.fatal(
self.position, ': Could not compile regular expression for stanza header [', name, ']: ', error)
# TODO: Ensure optional match strings in stanza names are consistent with SpecFiles.cpp
# Is there just the one use case for optional matches: inputs.conf.spec?
_sub_invalid_identifier_characters = re.compile(r'\\[_\W](?<!\\\\)|(?=\w)[^a-zA-Z0-9]', re.M | re.U).sub
_sub_replacement_pattern = re.compile(r'\\<(.*?)\\>', re.M | re.U).sub
# endregion
pass # pylint: disable=unnecessary-pass
class AppConfigurationSpec(NamedObject):
def __init__(self, name, app_root):
NamedObject.__init__(self, name)
self._app_root = app_root
self._sections = OrderedDict()
self._declarations = OrderedDict()
self._validation_plugin = AppConfigurationValidationPlugin.get(name, app_root)
# region Special methods
def __repr__(self):
return 'AppConfigurationSpec(name=' + repr(self._name) + 'stanzas=' + repr(self._declarations) + ')'
def __str__(self):
return encode_string(self._name)
# endregion
# region Methods
def load(self, filename):
section = AppConfigurationSpec.Section.load(filename, self._validation_plugin)
section_declarations = section.stanza_declarations
declarations = self._declarations
for name in section_declarations:
section_declaration = section_declarations[name]
try:
declaration = declarations[name]
except KeyError:
declaration = AppConfigurationStanzaDeclaration(name, section_declaration.position)
declarations[name] = declaration
declaration.add(section_declaration)
self._sections[filename] = section
def match(self, stanza):
declarations = self._declarations
matches = []
for name in self._declarations:
declaration = declarations[name]
match = declaration.pattern.match(stanza)
if match is not None:
matches.append(declaration)
return matches if len(matches) > 0 else None
def stanza_declarations(self):
declarations = self._declarations
return (declarations[name] for name in declarations)
def to_dict(self):
copies = self._sections
return OrderedDict(((name, copies[name]) for name in self._sections)) # we copy to protect our internals
# endregion
class Section(NamedObject):
def __init__(self, file_buffer):
name = path.basename(file_buffer.filename)
if name.endswith('.conf.spec'):
end = len('.conf.spec')
if end < len(name):
name = name[:end]
NamedObject.__init__(self, name)
self._buffer = file_buffer
# region Properties
@property
def filename(self):
return self._buffer.filename
def get(self, stanza):
return self._buffer.stanza_declarations[stanza]
@classmethod
def load(cls, filename, validation_plugin):
file_buffer = _AppConfigurationSpecBuffer(filename, validation_plugin)
file_buffer.load()
return cls(file_buffer)
@property
def stanza_declarations(self):
return self._buffer.stanza_declarations
# endregion
# region Methods
def save(self, filename=None):
self._buffer.save(filename)
# endregion
pass # pylint: disable=unnecessary-pass
class AppConfigurationStanzaDeclaration(NamedObject):
def __init__(self, name, position):
NamedObject.__init__(self, name)
self._placement = None
self._sections = OrderedDict()
self._declarations = OrderedDict()
self._patterned_declarations = None
self._pattern = self._compile_pattern(name, position)
# region Special methods
def __repr__(self):
name, declarations = repr(self._name), repr(self._declarations)
return 'AppConfigurationStanzaDeclaration(name=' + name + ', setting_declarations=' + declarations + ')'
def __str__(self):
return '[' + self._name.replace('\n', '\\n') + ']'
# endregion
# region Properties
@property
def pattern(self):
return self._pattern
@property
def placement(self):
return self._placement
# endregion
# region Methods
def add(self, section):
section_declarations = section.setting_declarations
declarations = self._declarations
for name in section_declarations:
try:
declaration = declarations[name]
except KeyError:
declaration = AppConfigurationSettingDeclaration(name)
declarations[name] = declaration
declaration.add(section_declarations[name])
self._sections[section.position.file] = section
self._placement = section.placement.union(self._placement)
def match(self, setting):
# match exact
declarations = self._declarations
name = setting.name
try:
return declarations[name]
except KeyError:
pass
# match pattern
declarations = self._patterned_declarations
if declarations is None:
declarations = [d for d in list(self._declarations.values()) if d.name != d.pattern]
self._patterned_declarations = declarations
for declaration in declarations:
match = declaration.pattern.match(name)
if match is None:
continue
return declaration
return None
def setting_declarations(self):
declarations = self._declarations
return (declarations[name] for name in declarations)
# endregion
# region Protected
def _compile_pattern(self, name, position):
def replace(match):
group_name = to_valid_identifier(match.expand(match.group(2)), match.start(2))
prefix = match.group(1)
suffix = match.group(3)
if len(prefix) > 0 and len(suffix) > 0:
# Match zero or one occurrence of arbitrary text with a symbolic `group` name
# The text must be followed by a colon (':'), if the pattern `suffix` is ':'
sub_pattern = r'(?P<' + group_name + '>.*?' + suffix[2:] + ')?'
else:
# Match one occurrence of arbitrary text with a symbolic `group` name
sub_pattern = r'(?P<' + group_name + '>.*?)'
return sub_pattern
def to_valid_identifier(group_name, start):
if len(group_name) == 0:
group_name = '__unnamed_group_' + string(start)
else:
group_name = self._sub_invalid_identifier_characters('_', group_name)
if group_name[0].isdigit() or iskeyword(group_name):
group_name = '_' + group_name
if group_name in group_names:
group_name += '_' + string(start)
group_names.add(group_name)
return group_name
names = name.split('|')
group_names = set()
for index, text in enumerate(names):
scheme = self._match_scheme_name(text)
if scheme is not None:
text = text[scheme.end():]
# guards against compilation of embedded regular expressions
escaped_text = escape_non_alphanumeric_chars(text)
pattern = self._sub_replacement_pattern(replace, escaped_text)
if scheme is None:
names[index] = pattern
continue
names[index] = scheme.group(1) + '|' + scheme.group(0) + pattern
pattern = '(?:' + '|'.join(names) + ')\\Z' if len(names) > 1 else names[0] + '\\Z'
try:
pattern = re.compile(pattern, re.M | re.U)
return pattern
except re.error as error:
SlimLogger.fatal(
position, ': Could not compile regular expression for stanza header [', name, ']: ', error)
_match_scheme_name = re.compile(r'([0-9a-zA-Z][0-9a-zA-Z_-]*)://', re.M | re.U).match
_sub_invalid_identifier_characters = re.compile(r'\\[_\W](?<!\\\\)|(?=\w)[^a-zA-Z0-9]', re.M | re.U).sub
_sub_replacement_pattern = re.compile(r'((?:\\\[)?)\\<(.*?)\\>((?:\\\]\\:)?)', re.M | re.U).sub
# endregion
class Section(NamedObject):
def __init__(self, name, position):
NamedObject.__init__(self, name)
self._declarations = OrderedDict()
self._documentation = []
self._placement = None
self._position = position
# region Special methods
def __repr__(self):
name, position = repr(self._name), repr(self._position)
return 'AppConfigurationStanzaDeclaration.Section(name=' + name + 'position=' + position + ')'
def __str__(self):
return '[' + self._name.replace('\n', '\\n') + ']'
# endregion
# region Properties
@property
def documentation(self):
return self._documentation
@property
def placement(self):
return self._placement
@property
def position(self):
return self._position
@property
def setting_declarations(self):
return self._declarations
# endregion
class _AppConfigurationSpecBuffer(FileBuffer):
def __init__(self, filename, validation_plugin):
FileBuffer.__init__(self, filename)
self._stanza_declarations = None
self._validation_plugin = validation_plugin
# region Properties
@property
def stanza_declarations(self):
return self._stanza_declarations
# endregion
# region Protected
_any_stanza_name = '<__any_stanza_name>'
_is_bulleted_paragraph = re.compile(r'\*\s', re.M | re.U).match
_match_any_stanza_name = re.compile(r'<[^<>]+>', re.M | re.U).match
_match_placement_directive = re.compile(r'@\s*placement\s+', re.M | re.U).match
_search_last_whitespace = re.compile(r'\s*$', re.MULTILINE).search
_split_comma_delimited_text = re.compile(r'\s*, \s*', re.M | re.U).split
# pylint: disable=protected-access
def _load(self, reader, **kwargs):
""" Reads the conf.spec file associated with the current Buffer
"""
# TODO: SPL-123949: Refactor AppConfigurationSpecBuffer._load to improve understandability
# pylint: disable=too-many-branches, too-many-locals, too-many-statements
stanza = AppConfigurationStanzaDeclaration.Section('default', reader.position)
setting_declaration_section = AppConfigurationSettingDeclaration.Section
stanzas = self._stanza_declarations = OrderedDict()
stanzas['default'] = current_item = stanza
aggregate_placement = default_placement = placement = None
match_assignment_statement = self._match_assignment_statement
skip_whitespace = self._skip_whitespace
for line in reader: # pylint: disable=too-many-nested-blocks
try:
match = skip_whitespace(line)
start = match.end()
if start >= len(line):
# blank line
item = '\n'
start = 0
elif line[start] in ';#':
# comment
item = line[start:]
elif line[start] == '@':
# directive
item = self._parse_directive(line, start)
if default_placement is None:
# We're in the global settings section (i.e., outside of any stanza declaration) and this
# placement directive appears before the first setting declaration
assert stanza.name == 'default' and len(stanza.setting_declarations) == 0
default_placement = item
placement = item
aggregate_placement = placement.union(aggregate_placement)
else:
line = reader.read_continuation(line)
if start == 0 and line[start] == '[':
# stanza declaration
item = self._parse_stanza_declaration(line, start, reader, stanzas)
if default_placement is None:
# We're exiting the global settings section (i.e., entering the first stanza) without
# having encountered a placement directive or a setting declaration
default_placement = AppConfigurationPlacement.all_workloads
self._end_stanza_declaration(stanza, aggregate_placement, default_placement, reader.position)
placement = aggregate_placement = item.placement
current_item = stanza = item
else:
match = match_assignment_statement(line, start)
if match is None:
# documentation for the current item which is either a stanza or a setting declaration
item = self._parse_documentation(line, start, reader)
current_item.documentation.append(item)
else:
# setting declaration
if default_placement is None:
# We're in the global settings section (i.e., outside of any stanza declaration) and
# this setting declaration appears before the first, if any placement directive
assert stanza.name == 'default' and len(stanza.setting_declarations) == 0
default_placement = AppConfigurationPlacement.all_workloads
if aggregate_placement is None:
# We're in the global settings section or some stanza declaration (the default or some
# other stanza) and we've hit this setting before hitting a placement directive
assert placement is None
placement = aggregate_placement = default_placement
name, data_type, position = match.group(1), match.group(2), reader.position
item = setting_declaration_section(name, data_type[:-1], placement, position)
stanza.setting_declarations[item.name] = current_item = item
self._append(item, reader.position, indentation=start)
except self._Error as error:
SlimLogger.error(reader.position, ': ', error)
if default_placement is None:
# We're exiting the global settings section because we hit the end of the current spec file without having
# encountered a placement directive or setting declaration. In short, we have a spec file without any
# placement directives or setting declarations
assert stanza.name == 'default' and len(stanza.setting_declarations) == 0 and aggregate_placement is None
stanza._placement = aggregate_placement = default_placement = AppConfigurationPlacement.all_workloads
self._end_stanza_declaration(stanza, aggregate_placement, default_placement, reader.position)
if len(stanzas) == 1:
# The default stanza is the only stanza, hence all settings are global and there are no restrictions or
# special handling based on stanza name (we'll match any stanza name)
stanza = AppConfigurationStanzaDeclaration.Section(self._any_stanza_name, reader.position)
stanzas[stanza.name] = stanza
self._end_stanza_declaration(stanza, default_placement, default_placement, reader.position)
return
self._fix_up(reader.position)
# pylint: disable=protected-access
def _end_stanza_declaration(self, item, aggregate_placement, default_placement, position):
if item.name == 'default' and len(item.setting_declarations) == 0:
# There are no global settings so we don't add a disabled setting because it doesn't influence the placement
# of settings for any specific stanza
return
placement = item._placement = default_placement if aggregate_placement is None else aggregate_placement
self._validation_plugin.fix_up(item, placement, position)
def _fix_up(self, position):
# TODO: Incorporate this issue into module-level documentation
# Issue:
# * any setting can go into the default stanza, including the disabled setting
# * the disabled setting will be found in a specific stanza, not the default stanza
# * it is that stanza's disabled setting placement that will determine the placement of the disabled setting
# in the default stanza
# * however, it is the union of all placements for all stanzas that should determine the placement of the
# disabled setting in the default stanza
# Approach:
# keep this info in the default stanza and make sure we search from a specific stanza to the default
# stanza to match a specific stanza's setting. Search in the reverse order when matching a global setting.
stanzas = self.stanza_declarations
default_stanza = stanzas['default']
try:
default_disabled = default_stanza.setting_declarations['disabled']
except KeyError:
default_disabled = AppConfigurationSettingDeclaration.Section('disabled', '<bool>', None, position)
default_stanza.setting_declarations['disabled'] = default_disabled
default_disabled_placement = aggregate_placement = default_disabled.placement
for name in stanzas:
if name == 'default':
continue
stanza = stanzas[name]
disabled = stanza.setting_declarations['disabled']
disabled._placement = disabled.placement.union(default_disabled_placement)
aggregate_placement = disabled.placement.union(aggregate_placement)
default_stanza._placement = default_disabled._placement = aggregate_placement
def _parse_directive(self, line, start):
match = self._match_placement_directive(line, start)
if match is None:
raise self._Error('expected placement directive, not ' + line[start:])
start = match.end()
end = self._search_last_whitespace(line, start).start()
workloads = self._split_comma_delimited_text(line[start:end])
try:
return AppConfigurationPlacement(workloads)
except ValueError:
raise self._Error('Unrecognized workload in placement directive: ' + ', '.join(workloads))
def _parse_documentation(self, line, start, reader):
bulleted = self._is_bulleted_paragraph(line[start:start + 2]) is not None
indentation = start
line_spacing = 0
position = reader.position
if bulleted:
start += 2
paragraph = line[start:].rstrip('\n')
for linep in reader:
linepp, line_spacing, start = self._read_blank(linep, reader)
if line_spacing > 0 or (len(linepp) > 0 and linepp[start] in '[@#;*'):
# We've hit a blank line or a new record (stanza, directive, comment, or documentation)
if len(linepp) > 0:
reader.put_back(linepp)
break
if len(linepp) > 0:
paragraph += '\n' + linepp.rstrip('\n')
item = AppConfigurationDocumentation(paragraph, bulleted, indentation, line_spacing, position)
return item
def _parse_stanza_declaration(self, line, start, reader, stanzas):
# TODO: insist that stanzas start in column 1 because spec file authors have a habit of using stanza headers
# in examples. See, for instance, distsearch.conf.spec, at or about line 336: " [bundleEnforcerBlacklist]."
start += 1
match = self._search_right_square_bracket(line, start)
if match is None:
SlimLogger.warning(reader.position, ': missing terminating right square bracket at end of stanza header')
end = -1
else:
end = match.start()
name = line[start:end]
if self._match_any_stanza_name(name):
name = self._any_stanza_name
try:
declaration = stanzas[name]
except KeyError:
declaration = AppConfigurationStanzaDeclaration.Section(name, reader.position)
stanzas[name] = declaration
return declaration
def _read_blank(self, line, reader):
""" Reads to the first non-blank line, if the current line is blank
:param line: current line
:type line: string
:param reader: used to read subsequent lines, if the current line is blank.
:type reader: FileReader
:return: (`line`, `count` `start`) where:
`line` is the first non-blank line read or the current line, if the current line is blank. An empty string
value indicates that EOF was encountered.
`count` is the number of blank lines read. The current line is included in the `count`.
`start` is the index of the first non-blank character on `line`
:rtype: tuple
"""
skip_whitespace = self._skip_whitespace
start = None
count = 0
while True:
match = skip_whitespace(self.line_to_unicode(line))
start = match.end()
if start < len(line): # non-blank line
break
count += 1
try:
line = next(reader)
except StopIteration:
start = 0
line = ''
break
return line, count, start
# endregion
pass # pylint: disable=unnecessary-pass
# convert NoneType to unicode, part of next() fix. SPL-168604
def line_to_unicode(self, line):
if line is None:
line = u''
return line