package weka.clusterers;

import cern.colt.matrix.impl.AbstractFormatter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import java.util.Vector;
import weka.classifiers.rules.DecisionTableHashKey;
import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.EuclideanDistance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.NormalizableDistance;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.SparseInstance;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.ReplaceMissingValues;

/* loaded from: input_file:lib/weka-dev-3.9.6.jar:weka/clusterers/Canopy.class */
public class Canopy extends RandomizableClusterer implements UpdateableClusterer, NumberOfClustersRequestable, OptionHandler, TechnicalInformationHandler {
    private static final long serialVersionUID = 2067574593448223334L;
    protected Instances m_canopies;
    protected List<double[]> m_canopyT2Density;
    protected List<double[][]> m_canopyCenters;
    protected List<double[]> m_canopyNumMissingForNumerics;
    protected List<long[]> m_clusterCanopies;
    public static final double DEFAULT_T2 = -1.0d;
    public static final double DEFAULT_T1 = -1.25d;
    protected int m_instanceCount;
    protected Filter m_missingValuesReplacer;
    protected Instances m_trainingData;
    protected double m_userT2 = -1.0d;
    protected double m_userT1 = -1.25d;
    protected double m_t1 = this.m_userT1;
    protected double m_t2 = this.m_userT2;
    protected int m_periodicPruningRate = 10000;
    protected double m_minClusterDensity = 2.0d;
    protected int m_maxCanopyCandidates = 100;
    protected boolean m_didPruneLastTime = true;
    protected int m_numClustersRequested = -1;
    protected boolean m_dontReplaceMissing = false;
    protected NormalizableDistance m_distanceFunction = new EuclideanDistance();

    public String globalInfo() {
        return "Cluster data using the capopy clustering algorithm, which requires just one pass over the data. Can run in eitherbatch or incremental mode. Results are generally not as good when running incrementally as the min/max for each numeric attribute is not known in advance. Has a heuristic (based on attribute std. deviations), that can be used in batch mode, for setting the T2 distance. The T2 distance determines how many canopies (clusters) are formed. When the user specifies a specific number (N) of clusters to generate, the algorithm will return the top N canopies (as determined by T2 density) when N < number of canopies (this applies to both batch and incremental learning); when N > number of canopies, the difference is made up by selecting training instances randomly (this can only be done when batch training). For more information see:\n\n" + getTechnicalInformation().toString();
    }

    @Override // weka.core.TechnicalInformationHandler
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation technicalInformation = new TechnicalInformation(TechnicalInformation.Type.INPROCEEDINGS);
        technicalInformation.setValue(TechnicalInformation.Field.AUTHOR, "A. McCallum and K. Nigam and L.H. Ungar");
        technicalInformation.setValue(TechnicalInformation.Field.TITLE, "Efficient Clustering of High Dimensional Data Sets with Application to Reference Matching");
        technicalInformation.setValue(TechnicalInformation.Field.BOOKTITLE, "Proceedings of the sixth ACM SIGKDD internation conference on knowledge discovery and data mining ACM-SIAM symposium on Discrete algorithms");
        technicalInformation.setValue(TechnicalInformation.Field.YEAR, "2000");
        technicalInformation.setValue(TechnicalInformation.Field.PAGES, "169-178");
        return technicalInformation;
    }

    @Override // weka.clusterers.AbstractClusterer, weka.clusterers.Clusterer, weka.core.CapabilitiesHandler
    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.disableAll();
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        capabilities.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        return capabilities;
    }

    @Override // weka.clusterers.RandomizableClusterer, weka.clusterers.AbstractClusterer, weka.core.OptionHandler
    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        vector.addElement(new Option("\tNumber of clusters.\n\t(default 2).", "N", 1, "-N <num>"));
        vector.addElement(new Option("\tMaximum number of candidate canopies to retain in memory\n\tat any one time. T2 distance plus, data characteristics,\n\twill determine how many candidate canopies are formed before\n\tperiodic and final pruning are performed, which might result\n\tin exceess memory consumption. This setting avoids large numbers\n\tof candidate canopies consuming memory. (default = 100)", "-max-candidates", 1, "-max-candidates <num>"));
        vector.addElement(new Option("\tHow often to prune low density canopies. \n\t(default = every 10,000 training instances)", "periodic-pruning", 1, "-periodic-pruning <num>"));
        vector.addElement(new Option("\tMinimum canopy density, below which a canopy will be pruned\n\tduring periodic pruning. (default = 2 instances)", "min-density", 1, "-min-density"));
        vector.addElement(new Option("\tThe T2 distance to use. Values < 0 indicate that\n\ta heuristic based on attribute std. deviation should be used to set this.\n\tNote that this heuristic can only be used when batch training\n\t(default = -1.0)", "t2", 1, "-t2"));
        vector.addElement(new Option("\tThe T1 distance to use. A value < 0 is taken as a\n\tpositive multiplier for T2. (default = -1.5)", "t1", 1, "-t1"));
        vector.addElement(new Option("\tDon't replace missing values with mean/mode when running in batch mode.\n", "M", 0, "-M"));
        vector.addAll(Collections.list(super.listOptions()));
        return vector.elements();
    }

    @Override // weka.clusterers.RandomizableClusterer, weka.clusterers.AbstractClusterer, weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        String option = Utils.getOption('N', strArr);
        if (option.length() > 0) {
            setNumClusters(Integer.parseInt(option));
        }
        String option2 = Utils.getOption("max-candidates", strArr);
        if (option2.length() > 0) {
            setMaxNumCandidateCanopiesToHoldInMemory(Integer.parseInt(option2));
        }
        String option3 = Utils.getOption("periodic-pruning", strArr);
        if (option3.length() > 0) {
            setPeriodicPruningRate(Integer.parseInt(option3));
        }
        String option4 = Utils.getOption("min-density", strArr);
        if (option4.length() > 0) {
            setMinimumCanopyDensity(Double.parseDouble(option4));
        }
        String option5 = Utils.getOption("t2", strArr);
        if (option5.length() > 0) {
            setT2(Double.parseDouble(option5));
        }
        String option6 = Utils.getOption("t1", strArr);
        if (option6.length() > 0) {
            setT1(Double.parseDouble(option6));
        }
        setDontReplaceMissingValues(Utils.getFlag('M', strArr));
        super.setOptions(strArr);
    }

    @Override // weka.clusterers.RandomizableClusterer, weka.clusterers.AbstractClusterer, weka.core.OptionHandler
    public String[] getOptions() {
        Vector vector = new Vector();
        vector.add("-N");
        vector.add("" + getNumClusters());
        vector.add("-max-candidates");
        vector.add("" + getMaxNumCandidateCanopiesToHoldInMemory());
        vector.add("-periodic-pruning");
        vector.add("" + getPeriodicPruningRate());
        vector.add("-min-density");
        vector.add("" + getMinimumCanopyDensity());
        vector.add("-t2");
        vector.add("" + getT2());
        vector.add("-t1");
        vector.add("" + getT1());
        if (getDontReplaceMissingValues()) {
            vector.add("-M");
        }
        Collections.addAll(vector, super.getOptions());
        return (String[]) vector.toArray(new String[vector.size()]);
    }

    public static boolean nonEmptyCanopySetIntersection(long[] jArr, long[] jArr2) throws Exception {
        if (jArr.length != jArr2.length) {
            throw new Exception("Canopy lists need to be the same length");
        }
        if (jArr.length == 0 || jArr2.length == 0) {
            return false;
        }
        for (int i = 0; i < jArr.length; i++) {
            if ((jArr[i] & jArr2[i]) != 0) {
                return true;
            }
        }
        return false;
    }

    private static void updateCanopyAssignment(long[] jArr, int i) {
        int i2 = i / 64;
        jArr[i2] = jArr[i2] | (1 << (i % 64));
    }

    public long[] assignCanopies(Instance instance) throws Exception {
        if (this.m_missingValuesReplacer != null) {
            this.m_missingValuesReplacer.input(instance);
            instance = this.m_missingValuesReplacer.output();
        }
        long[] jArr = new long[(this.m_canopies.size() / 64) + 1];
        double d = Double.MAX_VALUE;
        double d2 = 0.0d;
        int i = -1;
        for (int i2 = 0; i2 < this.m_canopies.numInstances(); i2++) {
            double distance = this.m_distanceFunction.distance(instance, this.m_canopies.instance(i2));
            if (distance < d) {
                d = distance;
                i = i2;
            }
            if (distance < this.m_t1) {
                updateCanopyAssignment(jArr, i2);
                d2 += 1.0d;
            }
        }
        if (d2 == 0.0d) {
            updateCanopyAssignment(jArr, i);
        }
        return jArr;
    }

    protected void updateCanopyCenter(Instance instance, double[][] dArr, double[] dArr2) {
        for (int i = 0; i < instance.numAttributes(); i++) {
            if (instance.attribute(i).isNumeric()) {
                if (dArr[i].length == 0) {
                    dArr[i] = new double[1];
                }
                if (instance.isMissing(i)) {
                    int i2 = i;
                    dArr2[i2] = dArr2[i2] + 1.0d;
                } else {
                    double[] dArr3 = dArr[i];
                    dArr3[0] = dArr3[0] + instance.value(i);
                }
            } else if (instance.attribute(i).isNominal()) {
                if (dArr[i].length == 0) {
                    dArr[i] = new double[instance.attribute(i).numValues() + 1];
                }
                if (instance.isMissing(i)) {
                    double[] dArr4 = dArr[i];
                    int length = dArr[i].length - 1;
                    dArr4[length] = dArr4[length] + 1.0d;
                } else {
                    double[] dArr5 = dArr[i];
                    int value = (int) instance.value(i);
                    dArr5[value] = dArr5[value] + 1.0d;
                }
            }
        }
    }

    @Override // weka.clusterers.UpdateableClusterer
    public void updateClusterer(Instance instance) throws Exception {
        if (this.m_instanceCount > 0 && this.m_instanceCount % this.m_periodicPruningRate == 0) {
            pruneCandidateCanopies();
        }
        this.m_instanceCount++;
        if (this.m_missingValuesReplacer != null) {
            this.m_missingValuesReplacer.input(instance);
            instance = this.m_missingValuesReplacer.output();
        }
        this.m_distanceFunction.update(instance);
        boolean z = true;
        int i = 0;
        while (true) {
            if (i >= this.m_canopies.numInstances()) {
                break;
            }
            if (this.m_distanceFunction.distance(instance, this.m_canopies.instance(i)) < this.m_t2) {
                double[] dArr = this.m_canopyT2Density.get(i);
                dArr[0] = dArr[0] + 1.0d;
                z = false;
                updateCanopyCenter(instance, this.m_canopyCenters.get(i), this.m_canopyNumMissingForNumerics.get(i));
                break;
            }
            i++;
        }
        if (!z || this.m_canopies.numInstances() >= this.m_maxCanopyCandidates) {
            return;
        }
        this.m_canopies.add(instance);
        this.m_canopyT2Density.add(new double[]{1.0d});
        double[][] dArr2 = new double[instance.numAttributes()][0];
        double[] dArr3 = new double[instance.numAttributes()];
        updateCanopyCenter(instance, dArr2, dArr3);
        this.m_canopyCenters.add(dArr2);
        this.m_canopyNumMissingForNumerics.add(dArr3);
    }

    protected void pruneCandidateCanopies() {
        if (this.m_didPruneLastTime || this.m_canopies.size() != this.m_maxCanopyCandidates) {
            this.m_didPruneLastTime = false;
            for (int numInstances = this.m_canopies.numInstances() - 1; numInstances >= 0; numInstances--) {
                double d = this.m_canopyT2Density.get(numInstances)[0];
                if (d < this.m_minClusterDensity) {
                    double[] remove = this.m_canopyT2Density.remove(this.m_canopyT2Density.size() - 1);
                    if (numInstances < this.m_canopyT2Density.size()) {
                        this.m_canopyT2Density.set(numInstances, remove);
                    }
                    if (getDebug()) {
                        System.err.println("Pruning a candidate canopy with density: " + d);
                    }
                    this.m_didPruneLastTime = true;
                    double[][] remove2 = this.m_canopyCenters.remove(this.m_canopyCenters.size() - 1);
                    if (numInstances < this.m_canopyCenters.size()) {
                        this.m_canopyCenters.set(numInstances, remove2);
                    }
                    double[] remove3 = this.m_canopyNumMissingForNumerics.remove(this.m_canopyNumMissingForNumerics.size() - 1);
                    if (numInstances < this.m_canopyNumMissingForNumerics.size()) {
                        this.m_canopyNumMissingForNumerics.set(numInstances, remove3);
                    }
                    if (numInstances != this.m_canopies.numInstances() - 1) {
                        this.m_canopies.swap(numInstances, this.m_canopies.numInstances() - 1);
                    }
                    this.m_canopies.delete(this.m_canopies.numInstances() - 1);
                }
            }
        }
    }

    @Override // weka.clusterers.AbstractClusterer, weka.clusterers.Clusterer
    public double[] distributionForInstance(Instance instance) throws Exception {
        if (this.m_canopies == null || this.m_canopies.size() == 0) {
            throw new Exception("No canopies available to cluster with!");
        }
        double[] dArr = new double[numberOfClusters()];
        if (this.m_missingValuesReplacer != null) {
            this.m_missingValuesReplacer.input(instance);
            instance = this.m_missingValuesReplacer.output();
        }
        for (int i = 0; i < this.m_canopies.numInstances(); i++) {
            dArr[i] = 1.0d / (1.0d + this.m_distanceFunction.distance(instance, this.m_canopies.instance(i)));
        }
        Utils.normalize(dArr);
        return dArr;
    }

    private void assignCanopiesToCanopyCenters() {
        this.m_clusterCanopies = new ArrayList();
        for (int i = 0; i < this.m_canopies.size(); i++) {
            try {
                this.m_clusterCanopies.add(assignCanopies(this.m_canopies.instance(i)));
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    protected void adjustCanopies(double[] dArr) {
        if (this.m_numClustersRequested < 0) {
            assignCanopiesToCanopyCenters();
            this.m_trainingData = new Instances(this.m_canopies, 0);
            return;
        }
        if (this.m_canopies.numInstances() > this.m_numClustersRequested) {
            int[] stableSort = Utils.stableSort(dArr);
            Instances instances = new Instances(this.m_canopies, 0);
            int i = 0;
            int length = stableSort.length - 1;
            while (i < this.m_numClustersRequested) {
                instances.add(this.m_canopies.instance(stableSort[length]));
                i++;
                length--;
            }
            this.m_canopies = instances;
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            ArrayList arrayList3 = new ArrayList();
            int i2 = 0;
            int length2 = stableSort.length - 1;
            while (i2 < instances.numInstances()) {
                arrayList.add(this.m_canopyCenters.get(stableSort[length2]));
                arrayList2.add(this.m_canopyT2Density.get(stableSort[length2]));
                arrayList3.add(this.m_canopyNumMissingForNumerics.get(stableSort[length2]));
                i2++;
                length2--;
            }
            this.m_canopyCenters = arrayList;
            this.m_canopyT2Density = arrayList2;
            this.m_canopyNumMissingForNumerics = arrayList3;
        } else if (this.m_canopies.numInstances() < this.m_numClustersRequested && this.m_trainingData != null && this.m_trainingData.numInstances() > 0) {
            Random random = new Random(getSeed());
            for (int i3 = 0; i3 < 10; i3++) {
                random.nextInt();
            }
            HashMap hashMap = new HashMap();
            DecisionTableHashKey decisionTableHashKey = null;
            for (int i4 = 0; i4 < this.m_canopies.numInstances(); i4++) {
                try {
                    decisionTableHashKey = new DecisionTableHashKey(this.m_canopies.instance(i4), this.m_canopies.numAttributes(), true);
                    hashMap.put(decisionTableHashKey, null);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            for (int numInstances = this.m_trainingData.numInstances() - 1; numInstances >= 0; numInstances--) {
                int nextInt = random.nextInt(numInstances + 1);
                try {
                    decisionTableHashKey = new DecisionTableHashKey(this.m_trainingData.instance(nextInt), this.m_trainingData.numAttributes(), true);
                } catch (Exception e2) {
                    e2.printStackTrace();
                }
                if (!hashMap.containsKey(decisionTableHashKey)) {
                    Instance instance = this.m_trainingData.instance(nextInt);
                    this.m_canopies.add(instance);
                    this.m_canopyT2Density.add(new double[]{1.0d});
                    double[][] dArr2 = new double[instance.numAttributes()][0];
                    double[] dArr3 = new double[instance.numAttributes()];
                    updateCanopyCenter(instance, dArr2, dArr3);
                    this.m_canopyCenters.add(dArr2);
                    this.m_canopyNumMissingForNumerics.add(dArr3);
                    hashMap.put(decisionTableHashKey, null);
                }
                this.m_trainingData.swap(numInstances, nextInt);
                if (this.m_canopies.numInstances() == this.m_numClustersRequested) {
                    break;
                }
            }
        }
        assignCanopiesToCanopyCenters();
        this.m_trainingData = new Instances(this.m_canopies, 0);
    }

    @Override // weka.clusterers.UpdateableClusterer
    public void updateFinished() {
        if (this.m_canopies == null || this.m_canopies.numInstances() == 0) {
            return;
        }
        pruneCandidateCanopies();
        double[] dArr = new double[this.m_canopies.size()];
        for (int i = 0; i < this.m_canopies.numInstances(); i++) {
            double[] dArr2 = this.m_canopyT2Density.get(i);
            double[][] dArr3 = this.m_canopyCenters.get(i);
            double[] dArr4 = this.m_canopyNumMissingForNumerics.get(i);
            double[] dArr5 = new double[this.m_canopies.numAttributes()];
            for (int i2 = 0; i2 < this.m_canopies.numAttributes(); i2++) {
                if (this.m_canopies.attribute(i2).isNumeric()) {
                    if (dArr4[i2] == dArr2[0]) {
                        dArr5[i2] = Utils.missingValue();
                    } else {
                        dArr5[i2] = dArr3[i2][0] / (dArr2[0] - dArr4[i2]);
                    }
                } else if (this.m_canopies.attribute(i2).isNominal()) {
                    int maxIndex = Utils.maxIndex(dArr3[i2]);
                    if (maxIndex == dArr3[i2].length - 1) {
                        dArr5[i2] = Utils.missingValue();
                    } else {
                        dArr5[i2] = maxIndex;
                    }
                }
            }
            this.m_canopies.set(i, this.m_canopies.instance(i) instanceof SparseInstance ? new SparseInstance(1.0d, dArr5) : new DenseInstance(1.0d, dArr5));
            this.m_canopies.instance(i).setWeight(dArr2[0]);
            dArr[i] = dArr2[0];
        }
        adjustCanopies(dArr);
    }

    public void initializeDistanceFunction(Instances instances) throws Exception {
        if (this.m_missingValuesReplacer != null) {
            instances = Filter.useFilter(instances, this.m_missingValuesReplacer);
        }
        this.m_distanceFunction.setInstances(instances);
    }

    protected void setT2T1BasedOnStdDev(Instances instances) throws Exception {
        double d = 0.0d;
        for (int i = 0; i < instances.numAttributes(); i++) {
            if (instances.attribute(i).isNominal()) {
                d += 0.25d;
            } else if (instances.attribute(i).isNumeric()) {
                AttributeStats attributeStats = instances.attributeStats(i);
                if (instances.numInstances() - attributeStats.missingCount > 2) {
                    double d2 = attributeStats.numericStats.stdDev;
                    double d3 = attributeStats.numericStats.min;
                    double d4 = attributeStats.numericStats.max;
                    if (!Utils.isMissingValue(d2) && d4 - d3 > 0.0d) {
                        d += (0.5d * d2) / (d4 - d3);
                    }
                }
            }
        }
        double sqrt = Math.sqrt(d);
        if (sqrt > 0.0d) {
            this.m_t2 = sqrt;
        }
    }

    @Override // weka.clusterers.AbstractClusterer, weka.clusterers.Clusterer
    public void buildClusterer(Instances instances) throws Exception {
        this.m_t1 = this.m_userT1;
        this.m_t2 = this.m_userT2;
        if (instances.numInstances() == 0 && this.m_userT2 < 0.0d) {
            System.err.println("The heuristic for setting T2 based on std. dev. can't be used when running in incremental mode. Using default of 1.0.");
            this.m_t2 = 1.0d;
        }
        this.m_canopyT2Density = new ArrayList();
        this.m_canopyCenters = new ArrayList();
        this.m_canopyNumMissingForNumerics = new ArrayList();
        if (instances.numInstances() > 0) {
            if (!this.m_dontReplaceMissing) {
                this.m_missingValuesReplacer = new ReplaceMissingValues();
                this.m_missingValuesReplacer.setInputFormat(instances);
                instances = Filter.useFilter(instances, this.m_missingValuesReplacer);
            }
            Random random = new Random(getSeed());
            for (int i = 0; i < 10; i++) {
                random.nextInt();
            }
            instances.randomize(random);
            if (this.m_userT2 < 0.0d) {
                setT2T1BasedOnStdDev(instances);
            }
        }
        this.m_t1 = this.m_userT1 > 0.0d ? this.m_userT1 : (-this.m_userT1) * this.m_t2;
        this.m_distanceFunction.setInstances(instances);
        this.m_canopies = new Instances(instances, 0);
        if (instances.numInstances() > 0) {
            this.m_trainingData = new Instances(instances);
        }
        for (int i2 = 0; i2 < instances.numInstances(); i2++) {
            if (getDebug() && i2 % this.m_periodicPruningRate == 0) {
                System.err.println("Processed: " + i2);
            }
            updateClusterer(instances.instance(i2));
        }
        updateFinished();
    }

    @Override // weka.clusterers.AbstractClusterer, weka.clusterers.Clusterer
    public int numberOfClusters() throws Exception {
        return this.m_canopies.numInstances();
    }

    public void setMissingValuesReplacer(Filter filter) {
        this.m_missingValuesReplacer = filter;
    }

    public Instances getCanopies() {
        return this.m_canopies;
    }

    public void setCanopies(Instances instances) {
        this.m_canopies = instances;
    }

    public List<long[]> getClusterCanopyAssignments() {
        return this.m_clusterCanopies;
    }

    public void setClusterCanopyAssignments(List<long[]> list) {
        this.m_clusterCanopies = list;
    }

    public double getActualT2() {
        return this.m_t2;
    }

    public double getActualT1() {
        return this.m_t1;
    }

    public String t1TipText() {
        return "The T1 distance to use. Values < 0 are taken as a positive multiplier for the T2 distance";
    }

    public void setT1(double d) {
        this.m_userT1 = d;
    }

    public double getT1() {
        return this.m_userT1;
    }

    public String t2TipText() {
        return "The T2 distance to use. Values < 0 indicate that this should be set using a heuristic based on attribute standard deviation (note that this onlyworks when batch training)";
    }

    public void setT2(double d) {
        this.m_userT2 = d;
    }

    public double getT2() {
        return this.m_userT2;
    }

    public String numClustersTipText() {
        return "Set number of clusters. -1 means number of clusters is determined by T2 distance";
    }

    @Override // weka.clusterers.NumberOfClustersRequestable
    public void setNumClusters(int i) throws Exception {
        this.m_numClustersRequested = i;
    }

    public int getNumClusters() {
        return this.m_numClustersRequested;
    }

    public String periodicPruningRateTipText() {
        return "How often to prune low density canopies during training";
    }

    public void setPeriodicPruningRate(int i) {
        this.m_periodicPruningRate = i;
    }

    public int getPeriodicPruningRate() {
        return this.m_periodicPruningRate;
    }

    public String minimumCanopyDensityTipText() {
        return "The minimum T2-based density below which a canopy will be pruned during periodic pruning";
    }

    public void setMinimumCanopyDensity(double d) {
        this.m_minClusterDensity = d;
    }

    public double getMinimumCanopyDensity() {
        return this.m_minClusterDensity;
    }

    public String maxNumCandidateCanopiesToHoldInMemory() {
        return "The maximum number of candidate canopies to retain in main memory during training. T2 distance and data characteristics determine how many candidate canopies are formed before periodic and final pruning are performed. There may not be enough memory available if T2 is set too low.";
    }

    public void setMaxNumCandidateCanopiesToHoldInMemory(int i) {
        this.m_maxCanopyCandidates = i;
    }

    public int getMaxNumCandidateCanopiesToHoldInMemory() {
        return this.m_maxCanopyCandidates;
    }

    public String dontReplaceMissingValuesTipText() {
        return "Replace missing values globally with mean/mode.";
    }

    public void setDontReplaceMissingValues(boolean z) {
        this.m_dontReplaceMissing = z;
    }

    public boolean getDontReplaceMissingValues() {
        return this.m_dontReplaceMissing;
    }

    public static String printSingleAssignment(long[] jArr) {
        StringBuilder sb = new StringBuilder();
        boolean z = true;
        sb.append(" <");
        for (int i = 0; i < jArr.length; i++) {
            long j = jArr[i];
            int i2 = i * 64;
            for (int i3 = 0; i3 < 64; i3++) {
                if (((1 << i3) & j) != 0) {
                    sb.append("" + (!z ? "," : "") + (i2 + i3));
                    if (z) {
                        z = false;
                    }
                }
            }
        }
        sb.append(">");
        return sb.toString();
    }

    public static String printCanopyAssignments(Instances instances, List<long[]> list) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < instances.size(); i++) {
            sb.append("Cluster " + i + ": ");
            sb.append(instances.instance(i));
            if (list != null && list.size() == instances.size()) {
                sb.append(printSingleAssignment(list.get(i)));
            }
            sb.append("\n");
        }
        return sb.toString();
    }

    public String toString(boolean z) {
        StringBuffer stringBuffer = new StringBuffer();
        if (this.m_canopies == null) {
            return "No clusterer built yet";
        }
        if (z) {
            stringBuffer.append("\nCanopy clustering\n=================\n");
            stringBuffer.append("\nNumber of canopies (cluster centers) found: " + this.m_canopies.numInstances());
        }
        stringBuffer.append("\nT2 radius: " + String.format("%-10.3f", Double.valueOf(this.m_t2)));
        stringBuffer.append("\nT1 radius: " + String.format("%-10.3f", Double.valueOf(this.m_t1)));
        stringBuffer.append(AbstractFormatter.DEFAULT_SLICE_SEPARATOR);
        stringBuffer.append(printCanopyAssignments(this.m_canopies, this.m_clusterCanopies));
        stringBuffer.append("\n");
        return stringBuffer.toString();
    }

    public String toString() {
        return toString(true);
    }

    public void cleanUp() {
        this.m_canopyNumMissingForNumerics = null;
        this.m_canopyT2Density = null;
        this.m_canopyCenters = null;
    }

    public static Canopy aggregateCanopies(List<Canopy> list, double d, double d2, NormalizableDistance normalizableDistance, Filter filter, int i) {
        Instances instances = new Instances(list.get(0).getCanopies(), 0);
        Instances instances2 = new Instances(instances, 0);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        ArrayList arrayList5 = new ArrayList();
        ArrayList arrayList6 = new ArrayList();
        for (Canopy canopy : list) {
            Instances canopies = canopy.getCanopies();
            for (int i2 = 0; i2 < canopies.numInstances(); i2++) {
                instances.add(canopies.instance(i2));
                arrayList5.add(canopy.m_canopyCenters.get(i2));
                arrayList6.add(canopy.m_canopyNumMissingForNumerics.get(i2));
            }
        }
        for (int i3 = 0; i3 < instances.numInstances(); i3++) {
            boolean z = true;
            Instance instance = instances.instance(i3);
            double[][] dArr = (double[][]) arrayList5.get(i3);
            double[] dArr2 = (double[]) arrayList6.get(i3);
            int i4 = 0;
            while (true) {
                if (i4 >= arrayList4.size()) {
                    break;
                }
                if (normalizableDistance.distance(instance, (Instance) arrayList4.get(i4)) < d2) {
                    z = false;
                    double[][] dArr3 = (double[][]) arrayList.get(i4);
                    double[] dArr4 = (double[]) arrayList2.get(i4);
                    double[] dArr5 = (double[]) arrayList3.get(i4);
                    dArr5[0] = dArr5[0] + instance.weight();
                    for (int i5 = 0; i5 < instance.numAttributes(); i5++) {
                        int i6 = i5;
                        dArr4[i6] = dArr4[i6] + dArr2[i5];
                        for (int i7 = 0; i7 < dArr3[i5].length; i7++) {
                            double[] dArr6 = dArr3[i5];
                            int i8 = i7;
                            dArr6[i8] = dArr6[i8] + dArr[i5][i7];
                        }
                    }
                } else {
                    i4++;
                }
            }
            if (z) {
                arrayList4.add(instance);
                instances2.add(instance);
                arrayList.add(dArr);
                arrayList2.add(dArr2);
                arrayList3.add(new double[]{instance.weight()});
            }
        }
        Canopy canopy2 = new Canopy();
        canopy2.setCanopies(instances2);
        canopy2.setMissingValuesReplacer(filter);
        canopy2.m_distanceFunction = normalizableDistance;
        canopy2.m_canopyCenters = arrayList;
        canopy2.m_canopyNumMissingForNumerics = arrayList2;
        canopy2.m_canopyT2Density = arrayList3;
        canopy2.m_t2 = d2;
        canopy2.m_t1 = d;
        try {
            canopy2.setNumClusters(i);
        } catch (Exception e) {
        }
        canopy2.updateFinished();
        return canopy2;
    }

    public static void main(String[] strArr) {
        runClusterer(new Canopy(), strArr);
    }
}
