You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
224 lines
8.5 KiB
224 lines
8.5 KiB
#!/usr/bin/env python
|
|
# coding=utf-8
|
|
#
|
|
# Copyright © Splunk, Inc. All Rights Reserved.
|
|
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
from builtins import object
|
|
from collections import Iterable, OrderedDict # pylint: disable=no-name-in-module
|
|
from os import path
|
|
|
|
import fnmatch
|
|
import io
|
|
import re
|
|
|
|
from . _configuration import slim_configuration
|
|
from . _encoders import encode_filename
|
|
from . logger import *
|
|
from . internal import string
|
|
|
|
|
|
__all__ = ['SlimIgnore']
|
|
|
|
|
|
class SlimIgnore(object):
|
|
|
|
def __init__(self, app_name, source_directory):
|
|
|
|
excludes, includes = SlimIgnore._parse_patterns(source_directory)
|
|
excludes = SlimIgnore._compile_patterns(excludes)
|
|
|
|
if len(includes) == 0:
|
|
def _filter(item):
|
|
value = None if SlimIgnore.Item(self, item).match(excludes) is True else item
|
|
return value
|
|
else:
|
|
includes = SlimIgnore._compile_patterns(includes)
|
|
|
|
def _filter(item):
|
|
ignore_item = SlimIgnore.Item(self, item)
|
|
if ignore_item.match(excludes) is False:
|
|
return item
|
|
if ignore_item.match(includes) is True:
|
|
return item
|
|
return None
|
|
|
|
self._source_pathname = path.normcase(path.abspath(app_name))
|
|
self._source_basename = path.basename(self._source_pathname)
|
|
self._filter = _filter
|
|
|
|
# region Methods
|
|
|
|
def filter(self, item):
|
|
return self._filter(item)
|
|
|
|
def ifilter(self, iterable):
|
|
assert isinstance(iterable, Iterable)
|
|
for item in iterable:
|
|
if self._filter(item) is None:
|
|
continue
|
|
yield item
|
|
|
|
# endregion
|
|
|
|
# region Protected
|
|
|
|
_backslash_quoted_characters = re.compile(r'\\(.)')
|
|
_trailing_spaces = re.compile(r'\\ +\n?$')
|
|
_wildcard = re.compile(r'\.\*')
|
|
_path_sep = '\\' + path.sep
|
|
_escaped_path_sep = _path_sep.replace('\\', '\\\\')
|
|
|
|
@staticmethod
|
|
def _compile_patterns(ordered_dict):
|
|
pattern = '|'.join('(?P<' + t + string(n) + '>' + p + ')' for n, (p, t) in enumerate(ordered_dict.items()))
|
|
return re.compile(pattern, re.M | re.S | re.U)
|
|
|
|
@staticmethod
|
|
def _strip_regex(rgx):
|
|
# ?m - match multiline
|
|
# ?s - equivalent to 'dot' or 'match all' except new line
|
|
# \Z - match only end of string
|
|
if rgx.endswith('(?ms)'):
|
|
rgx = rgx[:-5]
|
|
if rgx.endswith('\\Z'):
|
|
rgx = rgx[:-2]
|
|
if rgx.startswith('(?s:') and rgx.endswith(')'):
|
|
rgx = rgx[4:-1]
|
|
return rgx
|
|
|
|
@staticmethod
|
|
def _parse_patterns(source_directory): # pylint: disable=too-many-locals, too-many-branches, too-many-statements
|
|
|
|
escaped_path_sep = SlimIgnore._escaped_path_sep
|
|
path_sep = SlimIgnore._path_sep
|
|
translate = fnmatch.translate
|
|
excludes = OrderedDict()
|
|
includes = OrderedDict()
|
|
for line in SlimIgnore._read_lines(source_directory):
|
|
line, subn = SlimIgnore._trailing_spaces.subn(r' ', line)
|
|
|
|
if subn == 0:
|
|
line = line.rstrip()
|
|
|
|
line = SlimIgnore._backslash_quoted_characters.sub(r'\1', line)
|
|
|
|
if not line.startswith('!'):
|
|
target = excludes
|
|
elif len(line) > 1:
|
|
target = includes
|
|
line = line[1:]
|
|
else:
|
|
continue # blank line
|
|
|
|
if line.endswith('/**'):
|
|
line = line[:-2]
|
|
|
|
if not line.endswith('/'):
|
|
pattern_type = 'f'
|
|
elif len(line) > 1:
|
|
pattern_type = 'd'
|
|
line = line[:-1]
|
|
else:
|
|
continue # blank line
|
|
|
|
names = path.normcase(path.normpath(line)).split(path.sep)
|
|
first_name = names[0]
|
|
if len(names) == 1:
|
|
unparsed_name = translate(first_name)
|
|
parsed_name = SlimIgnore._strip_regex(unparsed_name)
|
|
# Pattern matches an unanchored file or directory node name (e.g., 'foo' or 'ba*r')
|
|
pattern = r'(?:.*' + path_sep + ')?' + parsed_name
|
|
else:
|
|
# Pattern matches an anchored or unanchored path segment
|
|
# anchored patterns start with '/' (e.g., '/foo*/bar')
|
|
# unanchored patterns start with '**/' or a node name (e.g., '**/foo/ba*r' or 'foo/ba*r')
|
|
start = 1 if len(first_name) == 0 or first_name == '**' else 0
|
|
|
|
for i, name in enumerate(names[start:], start):
|
|
# We trim the trailing seven unnecessary characters produced by fnmatch.translate: r'(?ms)\Z'
|
|
unparsed_name = translate(name)
|
|
name = SlimIgnore._strip_regex(unparsed_name)
|
|
if name == '**':
|
|
names[i] = '.*'
|
|
else:
|
|
names[i] = SlimIgnore._wildcard.sub(r'[^' + escaped_path_sep + ']*', name)
|
|
|
|
pattern = path_sep.join(names[start:]) + r'\Z'
|
|
|
|
if first_name == '**': # TODO: Should the comparison not be len(first_name) == 0 or first_name == '**'
|
|
pattern = r'(?:.*' + path_sep + ')?' + pattern
|
|
|
|
try:
|
|
current_pattern_type = target[pattern]
|
|
except KeyError:
|
|
target[pattern] = pattern_type
|
|
else:
|
|
if pattern_type == 'f' and current_pattern_type == 'd':
|
|
del target[pattern] # remove the currently-defined pattern from our targeted dictionary
|
|
target[pattern] = pattern_type # add the new pattern to the end of our targeted dictionary
|
|
|
|
excludes[r'\.slimignore\Z'] = 'f'
|
|
# excludes[path_sep + r'\.slimignore\Z'] = 'f'
|
|
return excludes, includes
|
|
|
|
@staticmethod
|
|
def _read_lines(source_directory):
|
|
for slimignore_file in (
|
|
path.join(source_directory, '.slimignore'),
|
|
path.join(slim_configuration.user_config, 'ignore'),
|
|
path.join(slim_configuration.system_config, 'ignore')):
|
|
try:
|
|
with io.open(slimignore_file, encoding='utf-8') as istream:
|
|
for line in istream:
|
|
line = line.lstrip()
|
|
if len(line) == 0 or line.startswith('#'):
|
|
continue # blank- or comment-line
|
|
yield line
|
|
except IOError as error:
|
|
if error.errno != 2: # no such file
|
|
SlimLogger.fatal('Could not open ', encode_filename(slimignore_file), ': ', error)
|
|
|
|
# endregion
|
|
|
|
class Item(object):
|
|
|
|
def __init__(self, slim_ignore, item):
|
|
|
|
self._underlying_item = item
|
|
item_type = type(item)
|
|
|
|
if item_type is string:
|
|
# NOTE: We break from gitignore semantics by using path.isdir. It returns True for symbolic links to
|
|
# directories. We do this because we follow links when creating source packages; replacing symbolic
|
|
# links with the content of the link target.
|
|
self._isdir = path.isdir
|
|
name = path.normcase(path.abspath(item))
|
|
source_pathname = slim_ignore._source_pathname # pylint: disable=protected-access
|
|
common_prefix = path.commonprefix([source_pathname, name])
|
|
self._name = path.normcase(None if len(common_prefix) == len(name) else name[len(common_prefix) + 1:])
|
|
else:
|
|
# Expectation: We've got a TarInfo object. That said, we'll accept any object with name and isdir attrs
|
|
self._isdir = item_type.isdir
|
|
name = path.normcase(item.name)
|
|
source_basename = slim_ignore._source_basename # pylint: disable=protected-access
|
|
common_prefix = path.commonprefix([source_basename, name])
|
|
self._name = None if len(common_prefix) == len(name) else name[len(common_prefix) + 1:]
|
|
|
|
def __str__(self):
|
|
return self._name
|
|
|
|
def match(self, pattern):
|
|
if self._name is None:
|
|
# Expectation: this is the source root directory name (see Item.__init__)
|
|
return False
|
|
match = pattern.match(self._name)
|
|
if match is None:
|
|
return False
|
|
group = match.lastgroup
|
|
if group[0] == 'd':
|
|
if not self._isdir(self._underlying_item):
|
|
return False
|
|
return True
|