You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

224 lines
8.5 KiB

#!/usr/bin/env python
# coding=utf-8
#
# Copyright © Splunk, Inc. All Rights Reserved.
from __future__ import absolute_import, division, print_function, unicode_literals
from builtins import object
from collections import Iterable, OrderedDict # pylint: disable=no-name-in-module
from os import path
import fnmatch
import io
import re
from . _configuration import slim_configuration
from . _encoders import encode_filename
from . logger import *
from . internal import string
__all__ = ['SlimIgnore']
class SlimIgnore(object):
def __init__(self, app_name, source_directory):
excludes, includes = SlimIgnore._parse_patterns(source_directory)
excludes = SlimIgnore._compile_patterns(excludes)
if len(includes) == 0:
def _filter(item):
value = None if SlimIgnore.Item(self, item).match(excludes) is True else item
return value
else:
includes = SlimIgnore._compile_patterns(includes)
def _filter(item):
ignore_item = SlimIgnore.Item(self, item)
if ignore_item.match(excludes) is False:
return item
if ignore_item.match(includes) is True:
return item
return None
self._source_pathname = path.normcase(path.abspath(app_name))
self._source_basename = path.basename(self._source_pathname)
self._filter = _filter
# region Methods
def filter(self, item):
return self._filter(item)
def ifilter(self, iterable):
assert isinstance(iterable, Iterable)
for item in iterable:
if self._filter(item) is None:
continue
yield item
# endregion
# region Protected
_backslash_quoted_characters = re.compile(r'\\(.)')
_trailing_spaces = re.compile(r'\\ +\n?$')
_wildcard = re.compile(r'\.\*')
_path_sep = '\\' + path.sep
_escaped_path_sep = _path_sep.replace('\\', '\\\\')
@staticmethod
def _compile_patterns(ordered_dict):
pattern = '|'.join('(?P<' + t + string(n) + '>' + p + ')' for n, (p, t) in enumerate(ordered_dict.items()))
return re.compile(pattern, re.M | re.S | re.U)
@staticmethod
def _strip_regex(rgx):
# ?m - match multiline
# ?s - equivalent to 'dot' or 'match all' except new line
# \Z - match only end of string
if rgx.endswith('(?ms)'):
rgx = rgx[:-5]
if rgx.endswith('\\Z'):
rgx = rgx[:-2]
if rgx.startswith('(?s:') and rgx.endswith(')'):
rgx = rgx[4:-1]
return rgx
@staticmethod
def _parse_patterns(source_directory): # pylint: disable=too-many-locals, too-many-branches, too-many-statements
escaped_path_sep = SlimIgnore._escaped_path_sep
path_sep = SlimIgnore._path_sep
translate = fnmatch.translate
excludes = OrderedDict()
includes = OrderedDict()
for line in SlimIgnore._read_lines(source_directory):
line, subn = SlimIgnore._trailing_spaces.subn(r' ', line)
if subn == 0:
line = line.rstrip()
line = SlimIgnore._backslash_quoted_characters.sub(r'\1', line)
if not line.startswith('!'):
target = excludes
elif len(line) > 1:
target = includes
line = line[1:]
else:
continue # blank line
if line.endswith('/**'):
line = line[:-2]
if not line.endswith('/'):
pattern_type = 'f'
elif len(line) > 1:
pattern_type = 'd'
line = line[:-1]
else:
continue # blank line
names = path.normcase(path.normpath(line)).split(path.sep)
first_name = names[0]
if len(names) == 1:
unparsed_name = translate(first_name)
parsed_name = SlimIgnore._strip_regex(unparsed_name)
# Pattern matches an unanchored file or directory node name (e.g., 'foo' or 'ba*r')
pattern = r'(?:.*' + path_sep + ')?' + parsed_name
else:
# Pattern matches an anchored or unanchored path segment
# anchored patterns start with '/' (e.g., '/foo*/bar')
# unanchored patterns start with '**/' or a node name (e.g., '**/foo/ba*r' or 'foo/ba*r')
start = 1 if len(first_name) == 0 or first_name == '**' else 0
for i, name in enumerate(names[start:], start):
# We trim the trailing seven unnecessary characters produced by fnmatch.translate: r'(?ms)\Z'
unparsed_name = translate(name)
name = SlimIgnore._strip_regex(unparsed_name)
if name == '**':
names[i] = '.*'
else:
names[i] = SlimIgnore._wildcard.sub(r'[^' + escaped_path_sep + ']*', name)
pattern = path_sep.join(names[start:]) + r'\Z'
if first_name == '**': # TODO: Should the comparison not be len(first_name) == 0 or first_name == '**'
pattern = r'(?:.*' + path_sep + ')?' + pattern
try:
current_pattern_type = target[pattern]
except KeyError:
target[pattern] = pattern_type
else:
if pattern_type == 'f' and current_pattern_type == 'd':
del target[pattern] # remove the currently-defined pattern from our targeted dictionary
target[pattern] = pattern_type # add the new pattern to the end of our targeted dictionary
excludes[r'\.slimignore\Z'] = 'f'
# excludes[path_sep + r'\.slimignore\Z'] = 'f'
return excludes, includes
@staticmethod
def _read_lines(source_directory):
for slimignore_file in (
path.join(source_directory, '.slimignore'),
path.join(slim_configuration.user_config, 'ignore'),
path.join(slim_configuration.system_config, 'ignore')):
try:
with io.open(slimignore_file, encoding='utf-8') as istream:
for line in istream:
line = line.lstrip()
if len(line) == 0 or line.startswith('#'):
continue # blank- or comment-line
yield line
except IOError as error:
if error.errno != 2: # no such file
SlimLogger.fatal('Could not open ', encode_filename(slimignore_file), ': ', error)
# endregion
class Item(object):
def __init__(self, slim_ignore, item):
self._underlying_item = item
item_type = type(item)
if item_type is string:
# NOTE: We break from gitignore semantics by using path.isdir. It returns True for symbolic links to
# directories. We do this because we follow links when creating source packages; replacing symbolic
# links with the content of the link target.
self._isdir = path.isdir
name = path.normcase(path.abspath(item))
source_pathname = slim_ignore._source_pathname # pylint: disable=protected-access
common_prefix = path.commonprefix([source_pathname, name])
self._name = path.normcase(None if len(common_prefix) == len(name) else name[len(common_prefix) + 1:])
else:
# Expectation: We've got a TarInfo object. That said, we'll accept any object with name and isdir attrs
self._isdir = item_type.isdir
name = path.normcase(item.name)
source_basename = slim_ignore._source_basename # pylint: disable=protected-access
common_prefix = path.commonprefix([source_basename, name])
self._name = None if len(common_prefix) == len(name) else name[len(common_prefix) + 1:]
def __str__(self):
return self._name
def match(self, pattern):
if self._name is None:
# Expectation: this is the source root directory name (see Item.__init__)
return False
match = pattern.match(self._name)
if match is None:
return False
group = match.lastgroup
if group[0] == 'd':
if not self._isdir(self._underlying_item):
return False
return True