sampledoc

Source code for Bcfg2.Server.Lint.MergeFiles

import os
import copy
from difflib import SequenceMatcher
import Bcfg2.Server.Lint
from Bcfg2.Server.Plugins.Cfg import CfgGenerator


class MergeFiles(Bcfg2.Server.Lint.ServerPlugin):
    """ find Probes or Cfg files with multiple similar files that
    might be merged into one """
[docs] def Run(self): if 'Cfg' in self.core.plugins: self.check_cfg() if 'Probes' in self.core.plugins: self.check_probes() @classmethod def Errors(cls): return {"merge-cfg": "warning", "merge-probes": "warning"} def check_cfg(self): """ check Cfg for similar files """ for filename, entryset in self.core.plugins['Cfg'].entries.items():
[docs] candidates = dict([(f, e) for f, e in entryset.entries.items() if isinstance(e, CfgGenerator)]) for mset in self.get_similar(candidates): self.LintError("merge-cfg", "The following files are similar: %s. " "Consider merging them into a single Genshi " "template." % ", ".join([os.path.join(filename, p) for p in mset])) def check_probes(self): """ check Probes for similar files """ probes = self.core.plugins['Probes'].probes.entries
[docs] for mset in self.get_similar(probes): self.LintError("merge-probes", "The following probes are similar: %s. " "Consider merging them into a single probe." % ", ".join([p for p in mset])) def get_similar(self, entries): """ Get a list of similar files from the entry dict. Return value is a list of lists, each of which gives the filenames of
[docs] similar files """ if "threshold" in self.config: # accept threshold either as a percent (e.g., "threshold=75") or # as a ratio (e.g., "threshold=.75") threshold = float(self.config['threshold']) if threshold > 1: threshold /= 100 else: threshold = 0.75 rv = [] elist = list(entries.items()) while elist: result = self._find_similar(elist.pop(0), copy.copy(elist), threshold) if len(result) > 1: elist = [(fname, fdata) for fname, fdata in elist if fname not in result] rv.append(result) return rv def _find_similar(self, ftuple, others, threshold): """ Find files similar to the one described by ftupe in the list of other files. ftuple is a tuple of (filename, data);
others is a list of such tuples. threshold is a float between 0 and 1 that describes how similar two files much be to rate as 'similar' """ fname, fdata = ftuple rv = [fname] while others: cname, cdata = others.pop(0) seqmatch = SequenceMatcher(None, fdata.data, cdata.data) # perform progressively more expensive comparisons if (seqmatch.real_quick_ratio() > threshold and seqmatch.quick_ratio() > threshold and seqmatch.ratio() > threshold): rv.extend(self._find_similar((cname, cdata), copy.copy(others), threshold)) return rv