package meka.core.converters;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.CommandlineRunnable;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.converters.AbstractLoader;
import weka.core.converters.BatchConverter;
import weka.core.json.JSONInstances;
import weka.gui.beans.xml.XMLBeans;

/* loaded from: input_file:lib/meka-1.9.7.jar:meka/core/converters/MultiLabelTextDirectoryLoader.class */
public class MultiLabelTextDirectoryLoader extends AbstractLoader implements BatchConverter, OptionHandler, CommandlineRunnable {
    private static final long serialVersionUID = 2592118773712247647L;
    public static final String FILE_ID = "file-ID";
    public static final String TEXT = "text";
    protected Instances m_structure = null;
    protected File m_sourceFile = new File(System.getProperty("user.dir"));
    protected boolean m_Debug = false;
    protected boolean m_OutputFilename = false;
    protected String m_charSet = "";

    public MultiLabelTextDirectoryLoader() {
        setRetrieval(0);
    }

    public String globalInfo() {
        return "Loads text files in a directory.\nFirst sub-directory level is used for the class attribute names, the second level for the labels for each class attribute.\nDue to the multi-label nature, documents need to be copied into multiple sub-directories. The loader uses the file's name (no path) as a unique ID to identify the copies.\n\nExample:\n/text-dataset\n  /class1\n    /0\n      3.txt\n      5.txt\n    /1\n      1.txt\n      2.txt\n      4.txt\n /class2\n    /0\n      1.txt\n      4.txt\n    /1\n      2.txt\n      3.txt\n      5.txt\n\nWill generate something like this:\n\n@relation 'example: -C 2'\n\n@attribute @@class-class1@@ {0,1}\n@attribute @@class-class2@@ {0,1}\n@attribute file-ID string\n@attribute text string\n\n@data\n1,0,1.txt,'file 1\\n'\n1,1,2.txt,'file 2\\n'\n0,1,3.txt,'file 3\\n'\n1,0,4.txt,'file 4\\n'\n0,1,5.txt,'file 5\\n'\n";
    }

    @Override // weka.core.OptionHandler
    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        vector.add(new Option("\tEnables debug output.\n\t(default: off)", "D", 0, "-D"));
        vector.add(new Option("\tStores the filename in an additional attribute.\n\t(default: off)", "F", 0, "-F"));
        vector.add(new Option("\tThe directory to work on.\n\t(default: current directory)", XMLBeans.VAL_DIR, 0, "-dir <directory>"));
        vector.add(new Option("\tThe character set to use, e.g UTF-8.\n\t(default: use the default character set)", "charset", 1, "-charset <charset name>"));
        return vector.elements();
    }

    @Override // weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        setDebug(Utils.getFlag("D", strArr));
        setOutputFilename(Utils.getFlag("F", strArr));
        setDirectory(new File(Utils.getOption(XMLBeans.VAL_DIR, strArr)));
        String option = Utils.getOption("charset", strArr);
        this.m_charSet = "";
        if (option.length() > 0) {
            this.m_charSet = option;
        }
    }

    @Override // weka.core.OptionHandler
    public String[] getOptions() {
        Vector vector = new Vector();
        if (getDebug()) {
            vector.add("-D");
        }
        if (getOutputFilename()) {
            vector.add("-F");
        }
        vector.add("-dir");
        vector.add(getDirectory().getAbsolutePath());
        if (this.m_charSet != null && this.m_charSet.length() > 0) {
            vector.add("-charset");
            vector.add(this.m_charSet);
        }
        return (String[]) vector.toArray(new String[vector.size()]);
    }

    public String charSetTipText() {
        return "The character set to use when reading text files (eg UTF-8) - leave blank to use the default character set.";
    }

    public void setCharSet(String str) {
        this.m_charSet = str;
    }

    public String getCharSet() {
        return this.m_charSet;
    }

    public void setDebug(boolean z) {
        this.m_Debug = z;
    }

    public boolean getDebug() {
        return this.m_Debug;
    }

    public String debugTipText() {
        return "Whether to print additional debug information to the console.";
    }

    public void setOutputFilename(boolean z) {
        this.m_OutputFilename = z;
        reset();
    }

    public boolean getOutputFilename() {
        return this.m_OutputFilename;
    }

    public String outputFilenameTipText() {
        return "Whether to store the filename in an additional attribute.";
    }

    public String getFileDescription() {
        return "Directories";
    }

    public File getDirectory() {
        return new File(this.m_sourceFile.getAbsolutePath());
    }

    public void setDirectory(File file) throws IOException {
        setSource(file);
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void reset() {
        this.m_structure = null;
        setRetrieval(0);
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void setSource(File file) throws IOException {
        reset();
        if (file == null) {
            throw new IOException("Source directory object is null!");
        }
        this.m_sourceFile = file;
        if (!file.exists() || !file.isDirectory()) {
            throw new IOException("Directory '" + file + "' not found");
        }
    }

    protected String createClassAttributeName(String str) {
        return "@@class-" + str + "@@";
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getStructure() throws IOException {
        if (getDirectory() == null) {
            throw new IOException("No directory/source has been specified");
        }
        if (this.m_structure == null) {
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            HashMap hashMap = new HashMap();
            for (String str : getDirectory().list()) {
                File file = new File(getDirectory().getAbsolutePath() + File.separator + str);
                if (file.isDirectory()) {
                    arrayList2.add(str);
                    hashMap.put(str, new ArrayList());
                    for (String str2 : file.list()) {
                        if (new File(file.getAbsolutePath() + File.separator + str2).isDirectory()) {
                            ((ArrayList) hashMap.get(str)).add(str2);
                        }
                    }
                }
            }
            ArrayList<String> arrayList3 = new ArrayList(hashMap.keySet());
            Collections.sort(arrayList3);
            for (String str3 : arrayList3) {
                Collections.sort((List) hashMap.get(str3));
                arrayList.add(new Attribute(createClassAttributeName(str3), (List<String>) hashMap.get(str3)));
            }
            if (this.m_OutputFilename) {
                arrayList.add(new Attribute(FILE_ID, (ArrayList) null));
            }
            arrayList.add(new Attribute("text", (ArrayList) null));
            this.m_structure = new Instances(getDirectory().getAbsolutePath().replaceAll("/", "_").replaceAll("\\\\", "_").replaceAll(JSONInstances.SPARSE_SEPARATOR, "_") + ": -C " + arrayList3.size(), (ArrayList<Attribute>) arrayList, 0);
            this.m_structure.setClassIndex(this.m_structure.numAttributes() - 1);
        }
        return this.m_structure;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getDataSet() throws IOException {
        if (getDirectory() == null) {
            throw new IOException("No directory/source has been specified");
        }
        Instances structure = getStructure();
        HashMap hashMap = new HashMap();
        HashMap hashMap2 = new HashMap();
        for (String str : getDirectory().list()) {
            File file = new File(getDirectory().getAbsolutePath() + File.separator + str);
            if (file.isDirectory()) {
                hashMap2.put(str, new HashMap());
                for (String str2 : file.list()) {
                    File file2 = new File(file.getAbsolutePath() + File.separator + str2);
                    if (file2.isDirectory()) {
                        ((Map) hashMap2.get(str)).put(str2, new HashSet());
                        for (File file3 : file2.listFiles()) {
                            if (!hashMap.containsKey(file3.getName())) {
                                hashMap.put(file3.getName(), file3);
                            }
                            ((Set) ((Map) hashMap2.get(str)).get(str2)).add(file3.getName());
                        }
                    }
                }
            }
        }
        ArrayList<String> arrayList = new ArrayList(hashMap.keySet());
        Collections.sort(arrayList);
        for (String str3 : arrayList) {
            try {
                double[] dArr = new double[structure.numAttributes()];
                Arrays.fill(dArr, Utils.missingValue());
                File file4 = (File) hashMap.get(str3);
                BufferedReader bufferedReader = (this.m_charSet == null || this.m_charSet.length() == 0) ? new BufferedReader(new InputStreamReader(new FileInputStream(file4))) : new BufferedReader(new InputStreamReader(new FileInputStream(file4), this.m_charSet));
                StringBuilder sb = new StringBuilder();
                while (true) {
                    int read = bufferedReader.read();
                    if (read == -1) {
                        break;
                    }
                    sb.append((char) read);
                }
                dArr[structure.attribute("text").index()] = structure.attribute(r0).addStringValue(sb.toString());
                if (this.m_OutputFilename) {
                    dArr[structure.attribute(FILE_ID).index()] = structure.attribute(r0).addStringValue(str3);
                }
                for (String str4 : hashMap2.keySet()) {
                    int index = structure.attribute(createClassAttributeName(str4)).index();
                    Iterator it = ((Map) hashMap2.get(str4)).keySet().iterator();
                    while (true) {
                        if (it.hasNext()) {
                            if (((Set) ((Map) hashMap2.get(str4)).get((String) it.next())).contains(str3)) {
                                dArr[index] = r0.indexOfValue(r0);
                                break;
                            }
                        }
                    }
                }
                structure.add((Instance) new DenseInstance(1.0d, dArr));
            } catch (Exception e) {
                System.err.println("Failed to process file: " + str3 + ", " + hashMap.get(str3));
                e.printStackTrace();
            }
        }
        return structure;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instance getNextInstance(Instances instances) throws IOException {
        throw new IOException("MultiLabelTextDirectoryLoader can't read data sets incrementally.");
    }

    @Override // weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 12184 $");
    }

    @Override // weka.core.CommandlineRunnable
    public void preExecution() throws Exception {
    }

    @Override // weka.core.CommandlineRunnable
    public void postExecution() throws Exception {
    }

    @Override // weka.core.CommandlineRunnable
    public void run(Object obj, String[] strArr) throws IllegalArgumentException {
        if (!(obj instanceof MultiLabelTextDirectoryLoader)) {
            throw new IllegalArgumentException("Object to execute is not a MultiLabelTextDirectoryLoader!");
        }
        MultiLabelTextDirectoryLoader multiLabelTextDirectoryLoader = (MultiLabelTextDirectoryLoader) obj;
        if (strArr.length > 0) {
            try {
                multiLabelTextDirectoryLoader.setOptions(strArr);
                System.out.println(multiLabelTextDirectoryLoader.getDataSet());
                return;
            } catch (Exception e) {
                System.err.println("Failed to set options: " + Utils.arrayToString(strArr));
                e.printStackTrace();
                return;
            }
        }
        System.err.println("\nUsage:\n\tMultiLabelTextDirectoryLoader [options]\n\nOptions:\n");
        Enumeration<Option> listOptions = new MultiLabelTextDirectoryLoader().listOptions();
        while (listOptions.hasMoreElements()) {
            Option nextElement = listOptions.nextElement();
            System.err.println(nextElement.synopsis());
            System.err.println(nextElement.description());
        }
        System.err.println();
    }

    public static void main(String[] strArr) {
        MultiLabelTextDirectoryLoader multiLabelTextDirectoryLoader = new MultiLabelTextDirectoryLoader();
        multiLabelTextDirectoryLoader.run(multiLabelTextDirectoryLoader, strArr);
    }
}
