"""check_aris - ARIS infosystem checks."""

import re
import ldap3
from arcnagios import arcutils
from arcnagios.infosys import ldaputils
from arcnagios.infosys.ldaputils import ldap_entry_dn
from arcnagios.infosys.ldap_nagios_plugin import LDAPNagiosPlugin
from arcnagios.infosys.arcinfosys import NorduGridCluster, NorduGridQueue, \
        glue_class_map, GlueCE, GlueCluster, GlueSubCluster
from arcnagios.nagutils import OK, WARNING, CRITICAL, status_by_name, \
        ServiceUnknown
from arcnagios.utils import counted_noun

class Glue_Validator:

    def __init__(self, subject, shortname, log):
        self.subject = subject
        self.shortname = shortname
        self.error_count = 0
        self.log = log

    def compare_attribute(self, attrname, arcval, mapping = None, soft = False):
        def normalize(x):
            # Work around missing single-valuedness restrictions.
            if isinstance(x, list):
                if x == []:         return None
                elif len(x) == 1:   return x[0]
                else:               return set(x)
            else:
                return x
        glueval = normalize(getattr(self.subject, attrname))
        arcval = normalize(arcval)
        if arcval and not mapping is None:
            if not arcval in mapping:
                # Some of values for nordugrid-queue-status like "inactive,
                # gridftpd is down" seems more informative than part of a
                # strict enumeration.
                self.log.warning('[%s].%s: Not comparing %r due to unknown '
                                 'mapping.'%(self.shortname, attrname, arcval))
                return
            arcval = mapping[arcval]
        if glueval != arcval:
            if soft:
                self.log.warning('[%s].%s: %s != %s'
                                 %(self.shortname, attrname, glueval, arcval))
            else:
                self.log.error('[%s].%s: %s != %s'
                               %(self.shortname, attrname, glueval, arcval))
                self.error_count += 1
        else:
            self.log.debug('Checked %s: %s', attrname, glueval)

class GlueCE_Validator(Glue_Validator):

    def compare_with(self, cluster, queue):
        ck = self.compare_attribute
        #ck('GlueCEUniqueID', '%s?queue=%s'%(cluster.contactstring, queue.name))
        ck('GlueCEName', queue.name)
        # GlueCEInformationServiceURL
        ck('GlueCEInfoLRMSType', cluster.lrms_type)
        ck('GlueCEInfoLRMSVersion', cluster.lrms_version)
        ck('GlueCEInfoHostName', cluster.name)
        # GlueCEInfoGatekeeperPort - irrelevant for ARC
        # GlueCEInfoJobmanager - irrelevant for ARC
        ck('GlueCEInfoContactString',
           '%s?queue=%s'%(cluster.contactstring, queue.name))
        #ck('GlueCEInfoTotalCPUs', cluster.totalcpus) - deprecated in Glue
        # GlueCEInfoApplicationDir - not in ARC
        # GlueCEInfoDataDir - not in ARC
        # GlueCEDefaultSE - not in ARC
        ck('GlueCEStateStatus', queue.status,
           mapping = {'active': 'Production', 'inactive': 'Closed'})
        # ck('GlueCEStateRunningJobs', queue.running)

        # Skip running state since we may have fetched the LDAP entries at
        # different times.
        #nwait = queue.gridqueued + queue.localqueued + queue.prelrmsqueued
        #ck('GlueCEStateWaitingJobs', nwait)
        #ck('GlueCEStateTotaljobs', nwait + queue.running)

        # GlueCEStateEstimatedResponseTime - not in ARC
        # GlueCEStateWorstResponseTime - not in ARC
        # GlueCEStateFreeJobSlots - skip running state
        # GlueCEStateFreeCPUs     - skip running state
        # GlueCEPolicyMaxWallTime - not yet in ARC
        ck('GlueCEPolicyMaxCPUTime', queue.maxcputime)
        if not queue.maxqueuable is None: # FIXME: Likely a translation issue.
            ck('GlueCEPolicyMaxTotalJobs', queue.maxqueuable)
        ck('GlueCEPolicyMaxRunningJobs', queue.maxrunning)
        # GlueCEPolicyPriority - not in ARC
        # GlueCEPolicyAssignedJobSlots - ambiguous mapping
        if getattr(cluster, 'acl', None): # FIXME: Likely a translation issue.
            ck('GlueCEAccessControlBaseRule', cluster.acl)

class GlueCluster_Validator(Glue_Validator):

    def compare_with(self, cluster):
        ck = self.compare_attribute
        ck('GlueClusterUniqueID', cluster.name)
        ck('GlueClusterName', cluster.aliasname, soft = True)
        # GlueClusterTmpDir - not in ARC
        # GlueClusterWNTmpDir - not in ARC

class GlueSubCluster_Validator(Glue_Validator):

    def compare_with(self, cluster, corq):
        ck = self.compare_attribute
        ck('GlueSubClusterUniqueID', corq.name)
        ck('GlueSubClusterName', corq.name)
        ck('GlueSubClusterPhysicalCPUs', corq.totalcpus)
        # GlueSubClusterLogicalCPUs - not in ARC
        # GlueSubClusterLocation* - not in ARC
        # GlueHostOperatingSystem* - not in ARC
        # GlueHostProcessor* - not in ARC
        ck('GlueHostRAMSize', corq.nodememory)
        # GlueHostVirtualSize - not in ARC
        ck('GlueHostNetworkAdapterOutboundIP', 'outbound' in cluster.nodeaccess)
        ck('GlueHostNetworkAdapterInboundIP', 'inbound' in cluster.nodeaccess)
        ck('GlueHostArchitecturePlatformType', corq.architecture)
        # GlueHostBenchmarkS[IF]00 - Are only the 2000 benchmarks mapped?
        ck('GlueHostApplicationSoftwareRunTimeEnvironment',
           cluster.runtimeenvironment)

class Check_aris(LDAPNagiosPlugin):

    main_config_section = 'arcinfosys'

    def __init__(self):
        LDAPNagiosPlugin.__init__(self)

        ap = self.argparser.add_argument_group('ARIS Options')
        ap.add_argument('--if-no-clusters', dest = 'if_no_clusters',
                type = status_by_name, default = WARNING,
                metavar = 'NAGIOS-STATUS',
                help = 'Nagios status to report if no cluster is found.')
        ap.add_argument('--if-no-queues', dest = 'if_no_queues',
                type = status_by_name, default = WARNING,
                metavar = 'NAGIOS-STATUS',
                help = 'Nagios status to report if a cluster has no queues.')
        ap.add_argument('--cluster', dest = 'clusters', action = 'append',
                default = [],
                metavar = 'CLUSTER-NAME',
                help = 'Pass one or more times to check specific clusters.')
        ap.add_argument('--cluster-test', dest = 'cluster_tests',
                action = 'append', default = [],
                metavar = 'TESTNAME',
                help = 'Enable a custom test to run against nordugrid-cluster '
                       'entries.')
        ap.add_argument('--queue-test', dest = 'queue_tests',
                action = 'append', default = [],
                metavar = 'TESTNAME',
                help = 'Enable a custom test to run against nordugrid-queue '
                       'entries.')
        ap.add_argument('--enable-glue',
                action = 'store_true', default = False,
                help = 'Enable loading and schema-checks of the Glue schema '
                       'entries if present.')
        ap.add_argument('--compare-glue',
                action = 'store_true', default = False,
                help = 'Enable comparison of Glue entries with ARC. '
                       'Only a limited set of attributes are compared. '
                       'Implies --enable-glue.')
        ap.add_argument('--check-contact',
                action = 'store_true', default = False,
                help = 'Try to list the nordugrid-cluster-contactstring URLs. '
                       'This requires a proxy certificate.')

        self._arcclient = arcutils.ArcClient()

    def parse_args(self, args):
        LDAPNagiosPlugin.parse_args(self, args)
        if self.opts.compare_glue:
            self.opts.enable_glue = True

    def custom_verify_regex(self, section, obj):
        variable = self.config.get(section, 'variable')
        values = getattr(obj, variable, [])
        if not isinstance(values, list):
            values = [values]
        for (code, pfx) in [(CRITICAL, 'critical'), (WARNING, 'warning')]:
            if self.config.has_option(section, pfx + '.pattern'):
                sp = self.config.get(section, pfx + '.pattern')
                p = re.compile(sp)
                if not any(re.match(p, value) for value in values):
                    if self.config.has_option(section, pfx + '.message'):
                        msg = self.config.get(section, pfx + '.message')
                    else:
                        msg = '%s did not match %s'%(variable, sp)
                    return code, msg
        return OK, None

    def custom_verify_limit(self, section, obj):
        expr = self.config.get(section, 'value')
        x = eval(expr, vars(obj)) # pylint: disable=eval-used
        for (code, pfx) in [(CRITICAL, 'critical'), (WARNING, 'warning')]:
            msg = None
            if self.config.has_option(section, pfx + '.message'):
                msg = self.config.get(section, pfx + '.message')
            if self.config.has_option(section, pfx + '.min'):
                x_min = self.config.getfloat(section, pfx + '.min')
                if x < x_min:
                    return code, (msg or '%s = %s is below %s limit %s.'
                                         % (expr, x, pfx, x_min))
            if self.config.has_option(section, pfx + '.max'):
                x_max = self.config.getfloat(section, pfx + '.max')
                if x > x_max:
                    return code, (msg or '%s = %s is above %s limit %s'
                                         % (expr, x, pfx, x_max))
        return OK, None

    def custom_verify_obj(self, obj, tests):
        # Custom tests.
        for test in tests or []:
            section = 'arcinfosys.aris.%s'%test
            if not self.config.has_section(section):
                raise ServiceUnknown('Missing section %s to define '
                                     'the test %s.'%(section, test))
            if not self.config.has_option(section, 'type'):
                raise ServiceUnknown('The type variable is missing is %s.'
                                     %section)
            typ = self.config.get(section, 'type')
            if typ == 'limit':
                code, msg = self.custom_verify_limit(section, obj)
            elif typ == 'regex':
                code, msg = self.custom_verify_regex(section, obj)
            else:
                raise ServiceUnknown('Unhandled type %s in %s.' % (typ, section))
            if code:
                self.log.error(msg)
                self.nagios_report.update_status_code(code)

    def verify_nordugrid_cluster(self, ent):
        """Validate and do custom checks on a nordugrid-cluster entry."""

        try:
            cluster = NorduGridCluster(self._ldap_server, ent)
        except ldaputils.LDAPValidationError as exn:
            self.log.error('Validation of cluster entry %s failed.',
                    ldap_entry_dn(ent))
            self.log.error(str(exn))
            self.nagios_report.update_status_code(CRITICAL)
            return None
        self.debug_dump_obj(cluster, 'nordugrid-cluster entry')

        tests = self.opts.cluster_tests
        if not tests:
            for setting in [
                    'cluster_tests[%s]'%cluster.name,
                    'cluster_tests[%s]'%self.opts.host,
                ]:
                if self.config.has_option('arcinfosys.aris', setting):
                    raw_tests = self.config.get('arcinfosys.aris', setting)
                    tests = [test.strip() for test in raw_tests.split(',')]
                    break
        self.custom_verify_obj(cluster, tests)
        return cluster

    def verify_nordugrid_queue(self, cluster, ent):
        """Validate and do custom checks on a nordugrid-queue entry."""

        try:
            queue = NorduGridQueue(self._ldap_server, ent)
        except ldaputils.LDAPValidationError as exn:
            self.log.error('Validation of queue entry %s failed.',
                    ldap_entry_dn(ent))
            self.log.error(str(exn))
            self.nagios_report.update_status(CRITICAL)
            return None
        self.debug_dump_obj(queue, 'nordugrid-queue entry')

        tests = self.opts.queue_tests
        if not tests:
            for setting in [
                    'queue_tests[%s/%s]'%(cluster.name, queue.name),
                    'queue_tests[%s]'%queue.name,
                    'queue_tests[%s]'%self.opts.host,
                ]:
                if self.config.has_option('arcinfosys.aris', setting):
                    raw_tests = self.config.get('arcinfosys.aris', setting)
                    tests = [test.strip() for test in raw_tests.split(',')]
                    break
        self.custom_verify_obj(queue, tests)
        return queue

    def verify_glue_entry(self, ent, clusters, queues):

        # Schema-check and construct an object representation.
        obj = None
        for objcls in ent['objectClass']:
            if objcls in glue_class_map:
                try:
                    obj = glue_class_map[objcls](self._ldap_server, ent)
                    self.log.debug('Schema-checked %s.', ldap_entry_dn(ent))
                except ldaputils.LDAPValidationError as exn:
                    self.log.error('Schema-check failed for %s: %s',
                            ldap_entry_dn(ent), exn)
                    self.nagios_report.update_status_code(CRITICAL)
                break
        if obj is None or not self.opts.compare_glue:
            return obj

        def get_cluster(cluster_name):
            if not cluster_name in clusters:
                self.log.error('No cluster %s corresponding to %s.',
                               cluster_name, obj.structural_objectclass)
                self.log.info('Present clusters are %s', ', '.join(clusters))
                self.nagios_report.update_status_code(CRITICAL)
                return None
            else:
                return clusters[cluster_name]

        def get_queue(cluster_name, queue_name):
            if not (cluster_name, queue_name) in queues:
                self.log.error('No queue %s/%s corresponding to %s.',
                               cluster_name, queue_name,
                               obj.structural_objectclass)
                self.log.info('Present queues are %s.', ', '.join(queues))
                return None
            else:
                return queues[(cluster_name, queue_name)]

        # Compare GlueCE with ARC entries.
        if isinstance(obj, GlueCE):
            cluster = get_cluster(obj.GlueCEInfoHostName)
            if cluster is None:
                return None
            queue = get_queue(obj.GlueCEInfoHostName, obj.GlueCEName)
            if queue is None:
                return None
            self.log.debug('Comparing %s with ARC entries.', queue.name)
            vl = GlueCE_Validator(obj, '%s/%s'%(cluster.name, queue.name),
                                  self.log)
            vl.compare_with(cluster, queue)
            if vl.error_count:
                self.nagios_report.update_status_code(CRITICAL)

        # Compare GlueCluster with ARC entries.
        elif isinstance(obj, GlueCluster):
            cluster = get_cluster(obj.GlueClusterUniqueID)
            if cluster is None:
                return None
            vl = GlueCluster_Validator(obj, cluster.name, self.log)
            vl.compare_with(cluster)
            if vl.error_count:
                self.nagios_report.update_status_code(CRITICAL)

        # Compare GlueSubCluster with ARC entries.
        elif isinstance(obj, GlueSubCluster):
            subcluster_id = obj.GlueSubClusterUniqueID
            vl = GlueSubCluster_Validator(obj, subcluster_id, self.log)
            if '/' in subcluster_id:
                cluster_name, queue_name = subcluster_id.split('/')
                cluster = get_cluster(cluster_name)
                if cluster is None:
                    return None
                queue = get_queue(cluster_name, queue_name)
                if queue is None:
                    return None
                if cluster.homogeniety:
                    self.log.error('Expected inhomogenious cluster for '
                                   'GlueSubClusterUniqueID = %r.',
                                   obj.GlueSubClusterUniqueID)
                    self.nagios_report.update_status_code(CRITICAL)
                vl.compare_with(cluster, queue)
            else:
                cluster = get_cluster(subcluster_id)
                if cluster is None:
                    return None
                if not cluster.homogeniety:
                    self.log.error('Expected homogenious cluster for '
                                   'GlueSubClusterUniqueID = %r.',
                                   obj.GlueSubClusterUniqueID)
                    self.nagios_report.update_status_code(CRITICAL)
                vl.compare_with(cluster, cluster)

        return obj

    def check(self):
        """The entry point for the ARIS probe."""

        self.prepare_check()

        # Query cluster entries.
        basedn = self.opts.ldap_basedn or 'Mds-Vo-name=local,o=grid'
        if self.opts.clusters:
            filts = ['(nordugrid-cluster-name=%s)'
                        % ldap3.utils.conv.escape_filter_chars(cluster)
                     for cluster in self.opts.clusters]
            filt = '(&(objectClass=nordugrid-cluster)(|%s))' % ''.join(filts)
        else:
            filt = '(objectClass=nordugrid-cluster)'
        sr = self.ldap_search(basedn, filt, search_scope = 'LEVEL')
        cluster_count = len(sr)
        if cluster_count == 0:
            msg = 'No clusters found.'
            if self.opts.if_no_clusters:
                self.nagios_report.update_status(self.opts.if_no_clusters, msg)
            else:
                self.log.warning(msg)

        # Report error if expected entries are missing.
        if self.opts.clusters:
            found_clusters = set()
            for _, ent in sr:
                found_clusters.update(ent['nordugrid-cluster-name'])
            for cluster in self.opts.clusters:
                if not cluster in found_clusters:
                    self.log.error('Missing entry for %s.', cluster)
                    self.nagios_report.update_status_code(CRITICAL)

        # These are indexed for later comparison with Glue entries.
        clusters = {} # indexed by cluster.name
        queues = {}   # indexed by (cluster.name, queue.name)
        for ent in sr:
            # Check nordugrid-cluster entry.
            cluster = self.verify_nordugrid_cluster(ent)
            if cluster is None:
                self.log.warning('Skipping queue checks for invalid cluster '
                                 'entry %s.', ldap_entry_dn(ent))
                continue
            clusters[cluster.name] = cluster

            # Query and check the corresponding queue entries.
            self.log.debug('Checking queues for %s.', cluster.name)
            qbasedn = 'nordugrid-cluster-name=%s,%s'%(cluster.name, basedn)
            qsr = self.ldap_search(qbasedn, '(objectClass=nordugrid-queue)')
            if qsr == []:
                msg = 'No queue defined for %s.'%cluster.name
                if self.opts.if_no_queues:
                    self.nagios_report.update_status(
                            self.opts.if_no_queues, msg)
                else:
                    self.log.warning(msg)
            else:
                middleware = getattr(cluster, 'middleware',
                                     ['unknown middleware'])
                self.log.info('- Cluster %r runs %s.',
                              cluster.name, ', '.join(middleware))
                for qent in qsr:
                    queue = self.verify_nordugrid_queue(cluster, qent)
                    queues[(cluster.name, queue.name)] = queue
                    status = getattr(queue, 'status', 'unknown')
                    self.log.info('-- Queue %r is %s.', queue.name, status)

        # Check Glue entries if enabled.
        if self.opts.enable_glue:
            glue_counts = {}
            sr = self.ldap_search(
                    'Mds-Vo-name=resource,o=grid', '(objectClass=*)',
                    search_scope = 'LEVEL')
            for ent in sr:
                obj = self.verify_glue_entry(ent, clusters, queues)
                if obj:
                    oc = obj.structural_objectclass
                    if not oc in glue_counts:
                        glue_counts[oc] = 1
                    else:
                        glue_counts[oc] += 1
            for oc, count in glue_counts.items():
                self.log.info('Validated %s.',
                              counted_noun(count, '%s entry'%oc, '%s entries'%oc))

        # Try to access the nordugrid-cluster-contactstring if requested.
        if self.opts.check_contact:
            self.require_voms_proxy()
            for cluster in clusters.values():
                try:
                    url = cluster.contactstring
                    xs = self._arcclient.arcls(url).get()
                    self.log.info('%s contains %d entries.', url, len(xs))
                except arcutils.CalledProcessError:
                    self.nagios_report.update_status(CRITICAL,
                        'Contact URL %s is inaccessible.'%url)
                except AttributeError:
                    self.nagios_report.update_status(CRITICAL,
                        'The cluster %s has no contact string.'%cluster.name)

        # Pass a default status and exit.
        try:
            cinfo = ' (%s)' % ', '.join([', '.join(c.middleware)
                                         for c in clusters.values()])
            qinfo = ' (%s)' % ', '.join([q.status for q in queues.values()])
        except AttributeError:
            cinfo = ''
            qinfo = ''
        self.nagios_report.update_status(OK,
                '%s%s, %s%s'
                    %(counted_noun(cluster_count, 'cluster'), cinfo,
                      counted_noun(len(queues), 'queue'), qinfo))
        return self.nagios_exit(subject = 'ARIS service')
