#!/usr/bin/env python # coding=utf-8 # # Copyright © Splunk, Inc. All Rights Reserved. from __future__ import absolute_import, division, print_function, unicode_literals from builtins import object from collections import Iterable, OrderedDict # pylint: disable=no-name-in-module from os import path import fnmatch import io import re from . _configuration import slim_configuration from . _encoders import encode_filename from . logger import * from . internal import string __all__ = ['SlimIgnore'] class SlimIgnore(object): def __init__(self, app_name, source_directory): excludes, includes = SlimIgnore._parse_patterns(source_directory) excludes = SlimIgnore._compile_patterns(excludes) if len(includes) == 0: def _filter(item): value = None if SlimIgnore.Item(self, item).match(excludes) is True else item return value else: includes = SlimIgnore._compile_patterns(includes) def _filter(item): ignore_item = SlimIgnore.Item(self, item) if ignore_item.match(excludes) is False: return item if ignore_item.match(includes) is True: return item return None self._source_pathname = path.normcase(path.abspath(app_name)) self._source_basename = path.basename(self._source_pathname) self._filter = _filter # region Methods def filter(self, item): return self._filter(item) def ifilter(self, iterable): assert isinstance(iterable, Iterable) for item in iterable: if self._filter(item) is None: continue yield item # endregion # region Protected _backslash_quoted_characters = re.compile(r'\\(.)') _trailing_spaces = re.compile(r'\\ +\n?$') _wildcard = re.compile(r'\.\*') _path_sep = '\\' + path.sep _escaped_path_sep = _path_sep.replace('\\', '\\\\') @staticmethod def _compile_patterns(ordered_dict): pattern = '|'.join('(?P<' + t + string(n) + '>' + p + ')' for n, (p, t) in enumerate(ordered_dict.items())) return re.compile(pattern, re.M | re.S | re.U) @staticmethod def _strip_regex(rgx): # ?m - match multiline # ?s - equivalent to 'dot' or 'match all' except new line # \Z - match only end of string if rgx.endswith('(?ms)'): rgx = rgx[:-5] if rgx.endswith('\\Z'): rgx = rgx[:-2] if rgx.startswith('(?s:') and rgx.endswith(')'): rgx = rgx[4:-1] return rgx @staticmethod def _parse_patterns(source_directory): # pylint: disable=too-many-locals, too-many-branches, too-many-statements escaped_path_sep = SlimIgnore._escaped_path_sep path_sep = SlimIgnore._path_sep translate = fnmatch.translate excludes = OrderedDict() includes = OrderedDict() for line in SlimIgnore._read_lines(source_directory): line, subn = SlimIgnore._trailing_spaces.subn(r' ', line) if subn == 0: line = line.rstrip() line = SlimIgnore._backslash_quoted_characters.sub(r'\1', line) if not line.startswith('!'): target = excludes elif len(line) > 1: target = includes line = line[1:] else: continue # blank line if line.endswith('/**'): line = line[:-2] if not line.endswith('/'): pattern_type = 'f' elif len(line) > 1: pattern_type = 'd' line = line[:-1] else: continue # blank line names = path.normcase(path.normpath(line)).split(path.sep) first_name = names[0] if len(names) == 1: unparsed_name = translate(first_name) parsed_name = SlimIgnore._strip_regex(unparsed_name) # Pattern matches an unanchored file or directory node name (e.g., 'foo' or 'ba*r') pattern = r'(?:.*' + path_sep + ')?' + parsed_name else: # Pattern matches an anchored or unanchored path segment # anchored patterns start with '/' (e.g., '/foo*/bar') # unanchored patterns start with '**/' or a node name (e.g., '**/foo/ba*r' or 'foo/ba*r') start = 1 if len(first_name) == 0 or first_name == '**' else 0 for i, name in enumerate(names[start:], start): # We trim the trailing seven unnecessary characters produced by fnmatch.translate: r'(?ms)\Z' unparsed_name = translate(name) name = SlimIgnore._strip_regex(unparsed_name) if name == '**': names[i] = '.*' else: names[i] = SlimIgnore._wildcard.sub(r'[^' + escaped_path_sep + ']*', name) pattern = path_sep.join(names[start:]) + r'\Z' if first_name == '**': # TODO: Should the comparison not be len(first_name) == 0 or first_name == '**' pattern = r'(?:.*' + path_sep + ')?' + pattern try: current_pattern_type = target[pattern] except KeyError: target[pattern] = pattern_type else: if pattern_type == 'f' and current_pattern_type == 'd': del target[pattern] # remove the currently-defined pattern from our targeted dictionary target[pattern] = pattern_type # add the new pattern to the end of our targeted dictionary excludes[r'\.slimignore\Z'] = 'f' # excludes[path_sep + r'\.slimignore\Z'] = 'f' return excludes, includes @staticmethod def _read_lines(source_directory): for slimignore_file in ( path.join(source_directory, '.slimignore'), path.join(slim_configuration.user_config, 'ignore'), path.join(slim_configuration.system_config, 'ignore')): try: with io.open(slimignore_file, encoding='utf-8') as istream: for line in istream: line = line.lstrip() if len(line) == 0 or line.startswith('#'): continue # blank- or comment-line yield line except IOError as error: if error.errno != 2: # no such file SlimLogger.fatal('Could not open ', encode_filename(slimignore_file), ': ', error) # endregion class Item(object): def __init__(self, slim_ignore, item): self._underlying_item = item item_type = type(item) if item_type is string: # NOTE: We break from gitignore semantics by using path.isdir. It returns True for symbolic links to # directories. We do this because we follow links when creating source packages; replacing symbolic # links with the content of the link target. self._isdir = path.isdir name = path.normcase(path.abspath(item)) source_pathname = slim_ignore._source_pathname # pylint: disable=protected-access common_prefix = path.commonprefix([source_pathname, name]) self._name = path.normcase(None if len(common_prefix) == len(name) else name[len(common_prefix) + 1:]) else: # Expectation: We've got a TarInfo object. That said, we'll accept any object with name and isdir attrs self._isdir = item_type.isdir name = path.normcase(item.name) source_basename = slim_ignore._source_basename # pylint: disable=protected-access common_prefix = path.commonprefix([source_basename, name]) self._name = None if len(common_prefix) == len(name) else name[len(common_prefix) + 1:] def __str__(self): return self._name def match(self, pattern): if self._name is None: # Expectation: this is the source root directory name (see Item.__init__) return False match = pattern.match(self._name) if match is None: return False group = match.lastgroup if group[0] == 'd': if not self._isdir(self._underlying_item): return False return True