# -*-  indent-tabs-mode:nil;  -*- 
# Copyright (C) 2005-2023 Splunk Inc. All Rights Reserved. 

import datetime
import random

from vim25.mo import ManagedObjectReference
from vim25 import utils
from vim25.connection import Connection

import vim25.inventory as inventory
from vim25.metrics_list_matcher import MetricsListMatcher
from vim25.metrics_cache import MetricsCache

from hydra.six.moves import range

from collections import defaultdict
import math
import re

SEP = '\t'
HEADER_LIM = 100
ENTITY_ABBREV = {'HostSystem': 'host', 'VirtualMachine': 'vm', 
				 'ResourcePool': 'rp', 'ClusterComputeResource': 'cluster'}
# available performance metrics will be queried from every entity in this group
ALL_METRICS_FROM_ALL_ENTITIES_TYPES = ["ClusterComputeResource", "ResourcePool"]

class PerfCollector(Connection):
	"""Class responsible for retrieval and output of performance metrics.

	Instantiated with:
	  config - dict of collection parameters
	  logger - logger instance

	Caches: 
	  performance metrics (from queryAvailablePerfMetrics)
	  performance counters (from PerfCounterInfo)
	
	The caches are typically multi-level dictionaries, with the VCs at the root level.
	"""
	def __init__(self, config, logger):
		self.config = config
		self.dbg_info = "[Performance Handler: {task}] [PerfCollector] ".format(task=config['perf_collection_type'])
		self.logger = logger
		self.logger.debug(self.dbg_info + "Instantiating perf collection class for %s (%s)" % (config['perf_collection_type'], str(config['perf_target_hosts'])))
		self._ref_rate_cache = {}
		self._counters_cache = {} # format: {self.domain: {'pcs_by_key': {}, 'pcs_fqname_by_key': {}}}
		self._metrics_cache = {}  # format: {self.domain: MetricsCache(200, 50)}
		self._entity_metric_allow_deny_list_cache = {}
		self._vc_saved_id = None
		# self.logger.debug(self.dbg_info + "Done initializing perf collector")
		# self._update_counters_cache()
		# self.logger.debug(self.dbg_info + "Done updating perf counter caches")


	def update_config(self, newconfig):
		"""
		Updates the config member variable.  Note that target-specific keys
		in the config are expected to be different, so they are is excluded from
		comparison.
		"""
		def config_same(c1, c2):
			exclude_keys = ['perf_target_hosts', 'username', 'target', 'target_type']
			for k in c1:
				if k in exclude_keys: continue
				if c1[k] != c2[k]: return False
			return True
		# If config is different, blow away the caches
		if not config_same(self.config, newconfig):
			self.logger.debug(self.dbg_info + "Found different config, blowing away caches")
			self._counters_cache = {} 
			self._metrics_cache = {} 
		self.config = newconfig
		

	def _update_counters_cache(self):
		"""Checks the current vc ID agains the saved vc_id and if we have changed vcs (or we
		haven't talked to one before) create (or re-create) dictionaries of performance counters 
		keyed by ID.  Returns the vcenter UUID."""
		def populate_counter_dicts():
			pcis = self.perfManager.getPerfCounter().PerfCounterInfo
			pcs_by_key = {}
			pcs_fqname_by_key = {}
			for pc in pcis:
				pcs_by_key[pc.key] = pc
				pcs_fqname_by_key[pc.key] = self._get_fqname(pc)
			return {'pcs_by_key': pcs_by_key, 'pcs_fqname_by_key': pcs_fqname_by_key}
			
		if self.domain != self._vc_saved_id:
			self._vc_saved_id = self.domain
			if self.domain not in self._counters_cache:
				self._counters_cache[self.domain] = populate_counter_dicts()
				self.logger.debug(self.dbg_info + "Populated counters cache for domain %s", self.domain)
			self.pcs_by_key = self._counters_cache[self.domain]['pcs_by_key']
			self.pcs_fqname_by_key = self._counters_cache[self.domain]['pcs_fqname_by_key']
		return self._vc_saved_id

		
	def _update_entity_lists(self):
		"""Generates up-to-date entity lists by querying inventory.
		vc_id: Virtual center's UUID, needed as part of the inventory hash

		Returns: dict of entities keyed by entity_type (e.g.
				 'HostSystem'/'VirtualMachine' if this collector does host/vm perf, or 
				 'ResourcePool'/'ClusterComputeResource'

		Entities are represented via MORs; if a MOR is needed from MOID (e.g. 
		to construct a host MOR from the config parameter), one is obtained by
		instantiating vim25.mo.ManagedObjectReference(moid, type)"""
		def get_inventory_by_entity(e):
			if   e == 'HostSystem': return [host_mor] # defined later in the outer function
			elif e == 'VirtualMachine': return self._find_vms_for_host(host_moid)
			elif e == 'ResourcePool': return self._find_rps()
			elif e == 'ClusterComputeResource': return self._find_clusters()
			else: return []
		def update_entities_maybe(e):
			if not self._is_entity_denied(e): 
				entities[e].extend(get_inventory_by_entity(e))
			else:
				self.logger.debug(self.dbg_info + "Entity %s denied for collection" % e)
			
		entities = defaultdict(list)
		if self.config['perf_collection_type'] != "otherperf":
			for host_moid in self.config['perf_target_hosts']:
				host_mor = ManagedObjectReference(value=host_moid, _type="HostSystem")
				update_entities_maybe('HostSystem')
				update_entities_maybe('VirtualMachine')
			self.logger.debug(self.dbg_info + "Updated entity lists: number of hosts: " +
							  "%d; number of VMs: %d" % (len(entities['HostSystem']), len(entities['VirtualMachine'])))
		else:
			update_entities_maybe('ResourcePool')
			update_entities_maybe('ClusterComputeResource')
			self.logger.debug(self.dbg_info + "Updated entity lists: number of rps: " + 
							  "%d; number of clusters: %d" % (len(entities['ResourcePool']), len(entities['ClusterComputeResource'])))
			
		return entities

	def _prepare_metrics_lists(self, entities, vc_id):
		"""Prepares and caches metric lists for this collector's entities.

		Metric lists are created to conform to the allowlist/denylist specifications
		in the config.  Caching is done to ensure that for a given inventory set, 
		the metrics are only retrieved once.

		Returns: metrics as a dict of lists keyed by entity type."""
		inventory_hash = hash(frozenset([vc_id] + [hash(frozenset([y.value for y in vals])) for vals in entities.values()]))
		metrics = {}
		entity_metric_allow_deny_list = {'cluster_metric_allowlist': self.config.get("cluster_metric_allowlist", []),
					   'cluster_metric_denylist' : self.config.get("cluster_metric_denylist", []),
					   'rp_metric_allowlist' : self.config.get("rp_metric_allowlist", []),
					   'rp_metric_denylist' : self.config.get("rp_metric_denylist", [])}

		if self.domain not in self._metrics_cache:
			self._metrics_cache[self.domain] = MetricsCache(2000, 50)
		if self.domain not in self._entity_metric_allow_deny_list_cache:
			self._entity_metric_allow_deny_list_cache[self.domain] = {}
		cache = self._metrics_cache[self.domain]
		if inventory_hash in cache and (entity_metric_allow_deny_list == self._entity_metric_allow_deny_list_cache[self.domain]):
			metrics = cache[inventory_hash]
			self.logger.debug(self.dbg_info + "Got a list of metrics from cache")
		else:
			self.logger.debug(self.dbg_info + "Getting a NEW list of metrics")
			self._entity_metric_allow_deny_list_cache[self.domain] = entity_metric_allow_deny_list
			for entity_type in entities:
				if not entity_type in metrics:
					metrics[entity_type] = self.get_all_metrics(entities[entity_type])
			cache[inventory_hash] = metrics
		return metrics

	def _is_entity_denied(self, e):
		return any(re.search(x, e) is not None for x in self.config['perf_entity_denylist'])
		
	def _get_fqname(self, pc):
		return "_".join(['p', pc.rollupType, pc.groupInfo.key, pc.nameInfo.key, pc.unitInfo.key])
		
	def _aggregate_only(self, entity_type):
		return (not self.config[ENTITY_ABBREV[entity_type] + '_instance_denylist'] 
				and not self.config[ENTITY_ABBREV[entity_type] + '_instance_allowlist'])


	def _query_refresh_rate(self, entity):
		pps = self.perfManager.queryPerfProviderSummary(entity)
		return pps.refreshRate
		
	def _get_ref_rate_for_entity(self, entity):
		"""Gets the refresh rate for the metrics.  
		
		This value is assumed to be fixed for a given entity type on a given
		collection run.  For instance, if collecting ResourcePool data from
		managed hosts the 'current'/20-sec refresh rate is not available and we have
		to use the 300-second summary roll-up.  However, ResourcePools collected
		from unmanaged hosts only have the 20-second data and NO 300-second summary.
		We never deal with managed and unmanaged hosts in the same collection run,
		so we just cache the highest available refresh rate when we first see 
		an entity of a given type and use that value for the duration of collection."""
		def get_ref_rate():
			pps = self.perfManager.queryPerfProviderSummary(entity)
			if pps.currentSupported:
				rr = pps.refreshRate
			elif pps.summarySupported:
				rr = min([x.samplingPeriod for x in self.perfManager.getHistoricalInterval().PerfInterval])
			else:
				raise Exception("Unable to determine perf collection rate")
			self._ref_rate_cache[entity._type] = rr
			return rr
				
		if entity._type in self._ref_rate_cache:
			return self._ref_rate_cache[entity._type]
		else:
			return get_ref_rate()

	def _query_perf(self, entities, pmids, start_time=None, end_time=None, max_samples=None):
		"""Construct PerfQuerySpec and invoke queryPerf vipython method on the performance manager object.
		
		entities (list of MORs)
		start/end_time are optional; they form a (start, end] half-closed interval
		Returns: list of PerfEntityMetricCSV
		
		Long lists of entities require several calls to queryPerf
		"""
		max_api_call = 0
		matching_metrics_limit = 64
		entities_len = len(entities)
		pmids_len = len(pmids)
		total_call = float(entities_len * pmids_len)
		if total_call < 64:
			NUM_CLUSTER_SINGLE_COLLECTION = 1
		else:
			max_api_call = total_call / matching_metrics_limit
			if not max_api_call == 0 :
				NUM_CLUSTER_SINGLE_COLLECTION = math.ceil(entities_len / max_api_call)
		if not entities or not pmids:
			self.logger.debug(self.dbg_info + "Skipping collection due to empty lists of entities and/or metrics")
			return []
		num_collections = math.ceil(len(entities) / float(NUM_CLUSTER_SINGLE_COLLECTION))
		chunk_size = int(math.ceil(len(entities) / num_collections))
		assert chunk_size >= 0 and chunk_size <= len(entities)
		res = []
		try:
			for i in range(int(num_collections)):
				# python is OK with slice indexes being longer than max list index
				chunk = entities[i * chunk_size : (i + 1) * chunk_size]
				qspecs = [Connection.vim25client.new('PerfQuerySpec', entity=x, metricId=pmids, format= self.config.get('perf_format_type', 'csv'), intervalId=self._get_ref_rate_for_entity(x), 
												startTime=start_time, endTime=end_time) for x in chunk]
				res.extend(self.perfManager.queryPerf(qspecs))
			self.logger.debug(self.dbg_info + "Collected data: collection_type={coll}, entity_type={type} first_entity={first_ent} "
							  "len_in={num_ent} len_out={len_res} start_time={s} "
							  "end_time={e}".format(coll=self.config['perf_collection_type'], type=entities[0]._type, first_ent=entities[0].value,
													num_ent=len(entities), len_res=len(res), s=start_time, e=end_time))
		except Exception as e:
			self.logger.error("Max allowed metrics size of 64 has been exceeded for ClusterComputeResource.")
			raise
		return res

	def _find_vms_for_host(self, host):
		"""Constructs a list of powered-on VMs given a host MOR; returns a list of VM MORs"""
		# Get vm list
		hierarchy_collector = inventory.CreateHierarchyCollector(targetConfigObject='PerfInventory', oneTime=True)[1]
		gen_collect_propex = hierarchy_collector.collectPropertiesEx(hierarchy_collector.fSpecList)
		vms_list = []
		for vms in gen_collect_propex:
			if vms is None:
				break
			else:
				for x in vms:
					if( hasattr(x.propSet[1].val, "value") and x.propSet[1].val.value == host):
						if( hasattr(x.propSet[2], "val") and x.propSet[2].val == "poweredOn"):
							vms_list.append(x.obj)
		self.logger.debug("Powered on VMs list=%s host=%s" , vms_list, host)
		inventory.DestroyHierarchyCollector(hierarchy_collector)
		del gen_collect_propex, hierarchy_collector
		return vms_list

	def _find_rps(self):
		hierarchy_collector = inventory.CreateHierarchyCollector(targetConfigObject='PerfResourcePoolList', oneTime=True)[1]
		gen_collect_propex = hierarchy_collector.collectPropertiesEx(hierarchy_collector.fSpecList)
		rps_list = []
		for rps in gen_collect_propex:
			if rps is None:
				break
			else:
				for x in rps:
					rps_list.append(x.obj)
		inventory.DestroyHierarchyCollector(hierarchy_collector)
		del gen_collect_propex, hierarchy_collector
		return rps_list
	
	def _find_clusters(self): 
		hierarchy_collector = inventory.CreateHierarchyCollector(targetConfigObject='PerfClusterComputeResourceList', oneTime=True)[1]
		gen_collect_propex = hierarchy_collector.collectPropertiesEx(hierarchy_collector.fSpecList)
		ccrs_list = []
		for ccrs in gen_collect_propex:
			if ccrs is None:
				break
			else:
				for x in ccrs:
					ccrs_list.append(x.obj)
		inventory.DestroyHierarchyCollector(hierarchy_collector)
		del gen_collect_propex, hierarchy_collector
		return ccrs_list

	def _create_mo(self, moid, _type):
		return Connection.vim25client.createExactManagedObject(mor=ManagedObjectReference(value=moid, _type=_type))

	def _check_format_type(self, format_type):
		'''
			Check if performance performance type, raise exception is not correct.
			@param format_type: specified peformance type in str format
			
			@return: Exception, if it is not supported format, otherwise None
		'''
		if not format_type in ['csv', 'normal']:
			self.logger.error("[Performance Handler] Specify performance format is incorrect. Specify format type either csv or normal.")
			raise Exception("[Performance Handler] Specify performance format is incorrect. Specify format type either csv or normal.")
		
	def _process_timestamps(self, perfdata, format):
		'''
			Get list of timestamps value in %Y-%m-%dT%H:%M:%SZ format of perfdata
			
			@return list of timestamps in %Y-%m-%dT%H:%M:%SZ format
		'''
		if format == 'csv':
			return perfdata.sampleInfoCSV.split(',')[1::2]
		else:
			timestamps = []
			for sampleInfo in perfdata.sampleInfo:
				# same time format as csv type
				timestamps.append(sampleInfo.timestamp.strftime('%Y-%m-%dT%H:%M:%SZ'))
			return timestamps

	def get_all_metrics(self, entities, mode='regex'):
		"""Gets performance metrics for a list of entities provided a given refresh rate and 
		relevant allowlists and denylists.
		
		entities (list of MORs) - these have to be of the SAME TYPE (e.g. all VirtualMachine)
		
		Keyword args:
		mode: MetricsListMatcher mode parameter ["regex" | "verbatim"]

		Returns:
		list of PerfMetricId's

		Implementation notes:
		Empirically, at 20-second collection intervals, two entities of the same type share counterIds 
		this is NOT necessarily true for other collection intervals.   However, different entities
		of the same type will NOT necessarily share instance ids. Thus, available PerfMetricIds, in general, 
		differ from entity to entity.  When getting intance-level data, we then must either specify ALL 
		available instance Ids in the perfMetricIds OR leave the instance string as "*"; this is
		more efficient and is the current approach.
		"""
		def aggregate_instances_maybe(pmids, style): 
			inst_field = {"glob": "*", "aggregate": ""}
			if style not in inst_field:
				raise ValueError("Style must be in {0}".format(list(inst_field)))
			res = []
			aggregate_cids = set()
			for mid in pmids:
				if mid.counterId not in aggregate_cids:
					aggregate_cids.add(mid.counterId)
					mid.instance = inst_field[style]
					res.append(mid)
			return res
		
		m = []
		if not entities: return m
		self.logger.debug(self.dbg_info + "Querying and pruning available metrics")
		# If all the metrics are identical, we can build the list of metrics 
		# based on the first entity in the list only.  However, this assumption turns out to be wrong
		# in general, e.g. if we have empty clusters, they do not have all of the relevant metrics
		# (in particular, they are missing the clusterServices metrics)
		entity = entities[0]
		refresh_rate = self._get_ref_rate_for_entity(entity)
		all_metrics = []
		all_metrics_d = {}
		d_key = lambda m: str(m.counterId) + str(m.instance)
		if entity._type in ALL_METRICS_FROM_ALL_ENTITIES_TYPES:
			for e in entities:
				for m in self.perfManager.queryAvailablePerfMetric(e, intervalId=refresh_rate):
					if d_key(m) not in all_metrics_d: all_metrics_d[d_key(m)] = m
		else:
			# Check Perf Metric for 5 instance to avoid if some bad VM or Host has limited perfConunter
			# See SOLNVMW-3358 for more information
			for e in random.sample(entities, min(len(entities), 5)):
				for m in self.perfManager.queryAvailablePerfMetric(e, intervalId=refresh_rate):
					if d_key(m) not in all_metrics_d: all_metrics_d[d_key(m)] = m
		all_metrics = list(all_metrics_d.values())
		counter_matcher = MetricsListMatcher(self.config[ENTITY_ABBREV[entity._type] + '_metric_allowlist'],
											 self.config[ENTITY_ABBREV[entity._type] + '_metric_denylist'], mode)
		instance_matcher = MetricsListMatcher(self.config[ENTITY_ABBREV[entity._type] + '_instance_allowlist'],
											  self.config[ENTITY_ABBREV[entity._type] + '_instance_denylist'], mode)
		
		pmid_to_fqname = lambda pmid: self.pcs_fqname_by_key[pmid.counterId]
		# Filtering logic: first prune the list of metrics to conform to the allow/denylists
		# Then match against the instance allow/denylists as follows: 
		# - if a metric conforms to the instance allow/denylists, it is included in the collection;
		#   but we must uniquify by counterIds and set instance attributes to "*"
		# - if a metric DOES NOT conform to the instance allow/denylists, we only care about the aggregated
		#   metric for that particular counterId.  Thus, we want to get the "rejected" list for
		#   instance-level collection, set all imstance attributes to "" and uniquify by counterId attribute
		instance_level_metrics = counter_matcher.prune(all_metrics, pmid_to_fqname)
		if entity._type == "HostSystem": self.logger.debug(self.dbg_info + "Total number of metrics: {0}; pruned to all inst-level: {1}".format(len(all_metrics), len(instance_level_metrics)))
		if not self._aggregate_only(entity._type):
			instance_level_metrics, aggregated_metrics = instance_matcher.prune(instance_level_metrics, pmid_to_fqname, return_excluded=True)
			inst = aggregate_instances_maybe(instance_level_metrics, style='glob')
			agg = aggregate_instances_maybe(aggregated_metrics, style='aggregate')
			if entity._type == "HostSystem": self.logger.debug(self.dbg_info + "Final tally: %d inst-level and %d aggr" % (len(inst), len(agg)))
			self.logger.debug(self.dbg_info + "Done querying and pruning available metrics")
			return inst + agg
		else:
			if entity._type == "HostSystem": self.logger.debug(self.dbg_info + "Requesting all metrics as aggregations")
			self.logger.debug(self.dbg_info + "Done querying and pruning available metrics")
			return aggregate_instances_maybe(instance_level_metrics, style='aggregate')


	def get_metric_names(self, metrics):
		"""Get dict of lists of (full_metric_name, instance_name) tuples given the metrics[entity_type' dict"""
		pmid_to_fqname = lambda pmid: self.pcs_fqname_by_key[pmid.counterId]
		res = {}
		for entity_type in metrics:
				res[entity_type] = sorted([(pmid_to_fqname(x), x.instance) for x in metrics[entity_type]],
										  key=lambda x: [x[0].split('_')[i] for i in (1,2,0,3)])
		return res

	def group_perf_data(self, perfdata_array, format):
		"""Parses performance data and returns a nested dict which can be used
		for outputting data in table form. 
		
		Table keys are formed by the (timestamp, group, entity_type) tuples.
		For each table key, the entries include moid, counter instance, and a list of metrics;
		this information is stored in a nested dictionary."""
		res = {}
		for perfdata in perfdata_array: # entities
			if (format == 'csv' and perfdata.sampleInfoCSV is None) or (format == 'normal' and perfdata.sampleInfo is None): 
				self.logger.debug(self.dbg_info + "Missing sample info for entity={0} of type={1}, skipping record".format(
					perfdata.entity.value, perfdata.entity._type))
				continue
			timestamps = timestamps = self._process_timestamps(perfdata, format)
			for pmser in perfdata.value: # counters (group, instance, name)
				if format == 'csv':
					data_values = pmser.value.split(',')
				else:
					# normal format type has value in array format
					# Converting long to str format same as csv format
					data_values = [str(x) for x in pmser.value]
				pc = self.pcs_by_key[pmser.id.counterId]
				fqname = self.pcs_fqname_by_key[pmser.id.counterId]
				group = pc.groupInfo.key
				entity_name = perfdata.entity.value
				# instance value of None or "" means this is an aggregated metric
				inst = pmser.id.instance if pmser.id.instance else "aggregated"
				for tsi in range(len(timestamps)): # times
					# timestamps are returned as UTC: 2013-04-01T23:06:00Z
					ts = timestamps[tsi]
					key = (ts, group, perfdata.entity._type)
					# res[key] contains table data; 
					# res[key][0] is the table (stored as nested dict), res[key][1] holds a set() of headers
					if key not in res: res[key] = ({}, set())
					if fqname not in res[key][1]: res[key][1].add(fqname)
					if entity_name not in res[key][0]: res[key][0][entity_name] = {}
					if inst not in res[key][0][entity_name]: res[key][0][entity_name][inst] = {}
					res[key][0][entity_name][inst][fqname] = data_values[tsi]
		return res


	def output_results(self, grouped_data, output, host):
		"""Takes the output of group_perf_data and an output handler and 
		creates data tables."""
		
		def build_header(headers_list):
			return "%s"*7 % ("moid", SEP, "instance", SEP, "samp_int", SEP, SEP.join(headers_list))
				
		def build_line(entity, inst, samp_int, data, headers_list):
			def retrieve(name):
				# values labelled percent are actually in units of % * 100, so must convert
				div_by_100_tostr = lambda x: str(float(x) / 100) if x else ""
				val = data.get(name, "")
				return div_by_100_tostr(val) if (re.search("percent$", name) is not None) else val
				
			return "%s"*7 % (entity, SEP, inst, SEP, samp_int, SEP,
								   SEP.join([retrieve(fqname) for fqname in headers_list]))

		def build_metadata(ts, host, group, entity_type):
			#Handle the destination index for the data, note that we must handle empty strings and change them to None
			dest_index = self.config.get("perf_index", False)
			if not dest_index:
				dest_index = None
			
			return {'sourcetype': 'vmware:perf:{group}'.format(group=group),
					'source': 'VMPerf:{entity_type}'.format(entity_type=entity_type),
					'host': '{host}'.format(host=host),
					'time': utils.ConvertIsoUtcDate(ts),
					'index': dest_index}

		buf = []
		unbroken = False
		mi_metadata = {}
		for key in grouped_data:
			ts, g, entity_type = key
			samp_int = str(self._ref_rate_cache[entity_type]) # get sampling interval
			mi_metadata = build_metadata(ts, host, g, entity_type)
			headers_list = list(grouped_data[key][1])
			cur_header = build_header(headers_list)
			buf = [ cur_header ]
			linecount = 0
			for entity in grouped_data[key][0]:
				for inst in grouped_data[key][0][entity]:
					buf.append(build_line(entity, inst, samp_int, grouped_data[key][0][entity][inst], headers_list))
					linecount += 1
					if linecount > HEADER_LIM:
						output.sendData('\n'.join(buf), unbroken=unbroken, **mi_metadata)
						if unbroken: output.sendDoneKey(**mi_metadata)
						buf = [ cur_header ]
						linecount = 0
			output.sendData('\n'.join(buf), unbroken=unbroken, **mi_metadata)
			if unbroken: output.sendDoneKey(**mi_metadata)

						
	def run_collection(self, start_time, end_time):
		"""Updates the vc, entity lists, metrics lists; iterates over entities
		by type, calling queryPerf.  Returns a concatenated array of data 
		returned by queryPerf (array entries correspond to entities).

		start_time (datetime) - earliest data timestamp, argument to the queryPerf vipython call
		end_time (datetime) - latest data timestamp, argument to the queryPerf vipython call

		(start, end] form a half-closed interval
		"""
		
		if end_time - start_time < datetime.timedelta(seconds=1):
			start_time = end_time - datetime.timedelta(seconds=1)
			
		perf_data = []
		vc_id = self._update_counters_cache()
		entities = self._update_entity_lists()
		metrics = self._prepare_metrics_lists(entities, vc_id)
		if 'ResourcePool' in metrics and len(metrics['ResourcePool']) > 0: 
			self.logger.warn(self.dbg_info + "Resource pool collection turned on; may cause performance degradation")

		for entity_type in entities:
			self.logger.debug(self.dbg_info + "calling QueryPerf on %s", entity_type)
			perf_data += self._query_perf(entities[entity_type], metrics[entity_type], start_time=start_time, end_time=end_time)
		self.logger.debug(self.dbg_info + "Done grabbing data from vc")
		return perf_data
		
	def collect_performance(self, start_time, end_time, output_handler, host=None):
		"""Kicks off the data collection: updates inventory, metric lists (if need be), queries the VC for data, and formats results.

		start_time (datetime) - earliest data timestamp, argument to the queryPerf vipython call
		end_time (datetime) - latest data timestamp, argument to the queryPerf vipython call
		output_handler - received from the invoking handler, used to direct the output
		host - name of the target collection VC (used primarily to set host field in the output manager)

		(start, end] form a half-closed interval
		"""
		# get format type (default 'csv')
		format_type = self.config.get('perf_format_type', 'csv')
		self._check_format_type(format_type)
		
		if host is None: host = self._vc_saved_id
		self.output_results(self.group_perf_data(self.run_collection(start_time, end_time), format=format_type), output_handler, host)
		self.logger.debug(self.dbg_info + "Successfully collected perf data batch: type={0}".format(self.config['perf_collection_type']))