package weka.core.converters;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import weka.core.Capabilities;
import weka.core.DictionaryBuilder;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.OptionMetadata;
import weka.core.RevisionUtils;
import weka.core.stemmers.NullStemmer;
import weka.core.stemmers.Stemmer;
import weka.core.stopwords.Null;
import weka.core.stopwords.StopwordsHandler;
import weka.core.tokenizers.Tokenizer;

/* loaded from: input_file:weka-stable-3.8.4.jar:weka/core/converters/DictionarySaver.class */
public class DictionarySaver extends AbstractFileSaver implements BatchConverter, IncrementalConverter {
    private static final long serialVersionUID = -19891905988830722L;
    protected transient OutputStream m_binaryStream;
    protected DictionaryBuilder m_dictionaryBuilder = new DictionaryBuilder();
    protected boolean m_dictionaryIsBinary;
    private long m_periodicPruningRate;

    public DictionarySaver() {
        resetOptions();
    }

    public String globalInfo() {
        return "Writes a dictionary constructed from string attributes in incoming instances to a destination.";
    }

    @OptionMetadata(displayName = "Save dictionary in binary form", description = "Save as a binary serialized dictionary", commandLineParamName = "binary-dict", commandLineParamSynopsis = "-binary-dict", commandLineParamIsFlag = true, displayOrder = 2)
    public void setSaveBinaryDictionary(boolean z) {
        this.m_dictionaryIsBinary = z;
    }

    public boolean getSaveBinaryDictionary() {
        return this.m_dictionaryIsBinary;
    }

    public String getAttributeIndices() {
        return this.m_dictionaryBuilder.getAttributeIndices();
    }

    @OptionMetadata(displayName = "Range of attributes to operate on", description = "Specify range of attributes to act on. This is a comma separated list of attribute\nindices, with \"first\" and \"last\" valid values.", commandLineParamName = "R", commandLineParamSynopsis = "-R <range>", displayOrder = 4)
    public void setAttributeIndices(String str) {
        this.m_dictionaryBuilder.setAttributeIndices(str);
    }

    public boolean getInvertSelection() {
        return this.m_dictionaryBuilder.getInvertSelection();
    }

    @OptionMetadata(displayName = "Invert selection", description = "Set attributes selection mode. If false, only selected attributes in the range will\nbe worked on. If true, only non-selected attributes will be processed", commandLineParamName = "V", commandLineParamSynopsis = "-V", commandLineParamIsFlag = true, displayOrder = 5)
    public void setInvertSelection(boolean z) {
        this.m_dictionaryBuilder.setInvertSelection(z);
    }

    public boolean getLowerCaseTokens() {
        return this.m_dictionaryBuilder.getLowerCaseTokens();
    }

    @OptionMetadata(displayName = "Lower case tokens", description = "Convert all tokens to lowercase when matching against dictionary entries.", commandLineParamName = "L", commandLineParamSynopsis = "-L", commandLineParamIsFlag = true, displayOrder = 10)
    public void setLowerCaseTokens(boolean z) {
        this.m_dictionaryBuilder.setLowerCaseTokens(z);
    }

    @OptionMetadata(displayName = "Stemmer to use", description = "The stemming algorithm (classname plus parameters) to use.", commandLineParamName = "stemmer", commandLineParamSynopsis = "-stemmer <spec>", displayOrder = 11)
    public void setStemmer(Stemmer stemmer) {
        if (stemmer != null) {
            this.m_dictionaryBuilder.setStemmer(stemmer);
        } else {
            this.m_dictionaryBuilder.setStemmer(new NullStemmer());
        }
    }

    public Stemmer getStemmer() {
        return this.m_dictionaryBuilder.getStemmer();
    }

    @OptionMetadata(displayName = "Stop words handler", description = "The stopwords handler to use (default = Null)", commandLineParamName = "stopwords-handler", commandLineParamSynopsis = "-stopwords-handler <spec>", displayOrder = 12)
    public void setStopwordsHandler(StopwordsHandler stopwordsHandler) {
        if (stopwordsHandler != null) {
            this.m_dictionaryBuilder.setStopwordsHandler(stopwordsHandler);
        } else {
            this.m_dictionaryBuilder.setStopwordsHandler(new Null());
        }
    }

    public StopwordsHandler getStopwordsHandler() {
        return this.m_dictionaryBuilder.getStopwordsHandler();
    }

    @OptionMetadata(displayName = "Tokenizer", description = "The tokenizing algorithm (classname plus parameters) to use.\n(default: weka.core.tokenizers.WordTokenizer)", commandLineParamName = "tokenizer", commandLineParamSynopsis = "-tokenizer <spec>", displayOrder = 13)
    public void setTokenizer(Tokenizer tokenizer) {
        this.m_dictionaryBuilder.setTokenizer(tokenizer);
    }

    public Tokenizer getTokenizer() {
        return this.m_dictionaryBuilder.getTokenizer();
    }

    public long getPeriodicPruning() {
        return this.m_periodicPruningRate;
    }

    @OptionMetadata(displayName = "Periodic pruning rate", description = "Prune the dictionary every x instances\n(default = 0 - i.e. no periodic pruning)", commandLineParamName = "P", commandLineParamSynopsis = "-P <integer>", displayOrder = 14)
    public void setPeriodicPruning(long j) {
        this.m_periodicPruningRate = j;
    }

    public int getWordsToKeep() {
        return this.m_dictionaryBuilder.getWordsToKeep();
    }

    @OptionMetadata(displayName = "Number of words to attempt to keep", description = "The number of words (per class if there is a class attribute assigned) to attempt to keep.", commandLineParamName = "W", commandLineParamSynopsis = "-W <integer>", displayOrder = 15)
    public void setWordsToKeep(int i) {
        this.m_dictionaryBuilder.setWordsToKeep(i);
    }

    public int getMinTermFreq() {
        return this.m_dictionaryBuilder.getMinTermFreq();
    }

    @OptionMetadata(displayName = "Minimum term frequency", description = "The minimum term frequency to use when pruning the dictionary\n(default = 1).", commandLineParamName = "M", commandLineParamSynopsis = "-M <integer>", displayOrder = 16)
    public void setMinTermFreq(int i) {
        this.m_dictionaryBuilder.setMinTermFreq(i);
    }

    public boolean getDoNotOperateOnPerClassBasis() {
        return this.m_dictionaryBuilder.getDoNotOperateOnPerClassBasis();
    }

    @OptionMetadata(displayName = "Do not operate on a per-class basis", description = "If this is set, the maximum number of words and the\nminimum term frequency is not enforced on a per-class\nbasis but based on the documents in all the classes\n(even if a class attribute is set).", commandLineParamName = "O", commandLineParamSynopsis = "-O", commandLineParamIsFlag = true, displayOrder = 17)
    public void setDoNotOperateOnPerClassBasis(boolean z) {
        this.m_dictionaryBuilder.setDoNotOperateOnPerClassBasis(z);
    }

    @OptionMetadata(displayName = "Sort dictionary", description = "Sort the dictionary alphabetically", commandLineParamName = "sort", commandLineParamSynopsis = "-sort", commandLineParamIsFlag = true, displayOrder = 18)
    public void setKeepDictionarySorted(boolean z) {
        this.m_dictionaryBuilder.setSortDictionary(z);
    }

    public boolean getKeepDictionarySorted() {
        return this.m_dictionaryBuilder.getSortDictionary();
    }

    @Override // weka.core.converters.AbstractSaver, weka.core.CapabilitiesHandler
    public Capabilities getCapabilities() {
        Capabilities capabilities = super.getCapabilities();
        capabilities.enable(Capabilities.Capability.NOMINAL_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.NUMERIC_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.DATE_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.STRING_ATTRIBUTES);
        capabilities.enable(Capabilities.Capability.MISSING_VALUES);
        capabilities.enable(Capabilities.Capability.NOMINAL_CLASS);
        capabilities.enable(Capabilities.Capability.NUMERIC_CLASS);
        capabilities.enable(Capabilities.Capability.DATE_CLASS);
        capabilities.enable(Capabilities.Capability.STRING_CLASS);
        capabilities.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        capabilities.enable(Capabilities.Capability.NO_CLASS);
        return capabilities;
    }

    @Override // weka.core.converters.AbstractFileSaver, weka.core.converters.FileSourcedConverter
    public String getFileDescription() {
        return "Plain text or binary serialized dictionary files created from text in string attributes";
    }

    @Override // weka.core.converters.AbstractSaver, weka.core.converters.Saver
    public void writeIncremental(Instance instance) throws IOException {
        int writeMode = getWriteMode();
        Instances instances = getInstances();
        if (getRetrieval() == 1 || getRetrieval() == 0) {
            throw new IOException("Batch and incremental saving cannot be mixed.");
        }
        if (writeMode == 1) {
            if (instances == null) {
                setWriteMode(2);
                if (instance != null) {
                    throw new IOException("Structure (header Information) has to be set in advance");
                }
            } else {
                setWriteMode(3);
            }
            writeMode = getWriteMode();
        }
        if (writeMode == 2) {
            cancel();
        }
        if (writeMode == 3) {
            this.m_dictionaryBuilder.reset();
            try {
                this.m_dictionaryBuilder.setup(instances);
                setWriteMode(0);
                writeMode = getWriteMode();
            } catch (Exception e) {
                throw new IOException(e);
            }
        }
        if (writeMode == 0) {
            if (instances == null) {
                throw new IOException("No instances information available.");
            }
            if (instance != null) {
                this.m_dictionaryBuilder.processInstance(instance);
                return;
            }
            try {
                this.m_dictionaryBuilder.finalizeDictionary();
                if (retrieveFile() == null && getWriter() == null) {
                    if (getSaveBinaryDictionary()) {
                        throw new IOException("Can't output binary dictionary to standard out!");
                    }
                    this.m_dictionaryBuilder.saveDictionary(System.out);
                } else if (getSaveBinaryDictionary()) {
                    this.m_dictionaryBuilder.saveDictionary(this.m_binaryStream);
                } else {
                    this.m_dictionaryBuilder.saveDictionary(getWriter());
                }
                resetStructure();
                resetWriter();
            } catch (Exception e2) {
                throw new IOException(e2);
            }
        }
    }

    @Override // weka.core.converters.AbstractSaver, weka.core.converters.Saver
    public void writeBatch() throws IOException {
        if (getInstances() == null) {
            throw new IOException("No instances to save");
        }
        if (getRetrieval() == 2) {
            throw new IOException("Batch and incremental saving cannot be mixed.");
        }
        setRetrieval(1);
        setWriteMode(0);
        this.m_dictionaryBuilder.reset();
        try {
            this.m_dictionaryBuilder.setup(getInstances());
            for (int i = 0; i < getInstances().numInstances(); i++) {
                this.m_dictionaryBuilder.processInstance(getInstances().instance(i));
            }
            try {
                this.m_dictionaryBuilder.finalizeDictionary();
                if (retrieveFile() == null && getWriter() == null) {
                    if (getSaveBinaryDictionary()) {
                        throw new IOException("Can't output binary dictionary to standard out!");
                    }
                    this.m_dictionaryBuilder.saveDictionary(System.out);
                    setWriteMode(1);
                    return;
                }
                if (getSaveBinaryDictionary()) {
                    this.m_dictionaryBuilder.saveDictionary(this.m_binaryStream);
                } else {
                    this.m_dictionaryBuilder.saveDictionary(getWriter());
                }
                setWriteMode(1);
                resetWriter();
                setWriteMode(2);
            } catch (Exception e) {
                throw new IOException(e);
            }
        } catch (Exception e2) {
            throw new IOException(e2);
        }
    }

    @Override // weka.core.converters.AbstractFileSaver, weka.core.converters.AbstractSaver
    public void resetOptions() {
        super.resetOptions();
        setFileExtension(".dict");
    }

    @Override // weka.core.converters.AbstractFileSaver
    public void resetWriter() {
        super.resetWriter();
        this.m_binaryStream = null;
    }

    @Override // weka.core.converters.AbstractFileSaver, weka.core.converters.AbstractSaver, weka.core.converters.Saver
    public void setDestination(OutputStream outputStream) throws IOException {
        super.setDestination(outputStream);
        this.m_binaryStream = new BufferedOutputStream(outputStream);
    }

    @Override // weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 12690 $");
    }

    public static void main(String[] strArr) {
        runFileSaver(new DictionarySaver(), strArr);
    }
}
