Classification using Weka – Part 3

Classification using Weka – Part 3

If you haven’t read the previous parts of this article:


As we remember, .arff configuration example presented in the previous part of the article had @DATA section with a list of feature samples. Let’s create a class that contains feature values and an optional class value. Class value is optional because when we perform classification we have feature values, but don’t have class value, it is going to be determined by classifier.

public class FeatureSample {

    @NotNull
    private final List < Double > features;

    @NotNull
    private Optional < Integer > classValue;

    public FeatureSample(@NotNull List < Double > features) {
        this(features, Optional. < Integer > absent());
    }

    public FeatureSample(@NotNull List < Double > features,
        @NotNull Optional < Integer > classValue) {
        this.features = ImmutableList.copyOf(features);
        this.classValue = classValue;
    }

    @NotNull
    public List < Double > getFeatures() {
        return features;
    }

    @NotNull
    public Optional < Integer > getClassValue() {
        return classValue;
    }

}

Let’s create a class that contains all information used in .arff format.

public class Configuration {

    private static final String WRONG_NUMBER_OF_FEATURES = “Number of features is not equal to attributes”;
    private static final String CONFIGURATION_NAME_MUST_BE_SPECIFIED = “Configuration name must be specified.”;

    public static class Builder {
        private String name;
        private List < String > attributes = new ArrayList < > ();
        private List < Integer > allowedClassValues = new ArrayList < > ();
        private List < FeatureSample > instances = new ArrayList < > ();

        public Builder name(@NotNull String name) {
            this.name = name;
            return this;
        }

        public Builder attribute(@NotNull String attr) {
            attributes.add(attr);
            return this;
        }

        public Builder classAttribute(@NotNull String attr, @NotNull List < Integer > allowedClassValues) {
            attribute(attr);
            this.allowedClassValues = allowedClassValues;
            return this;
        }

        public Builder instance(@NotNull List < Double > features, int classVal) {
            if (features.size() != attributes.size()– 1) {
                throw new IllegalArgumentException(WRONG_NUMBER_OF_FEATURES);
            }
            instances.add(new FeatureSample(features, Optional.of(classVal)));
            return this;
        }

        public Configuration build() {
            return new Configuration(name, attributes, allowedClassValues, instances);
        }

    }

    private final String name;

    private final List < String > attributes;
    private final List < Integer > allowedClassValues;
    private final List < FeatureSample > instances;

    private String text;

    public Configuration(@NotNull String name,
        @NotNull List < String > attributes,
        @NotNull List < Integer > allowedClassValues,
        @NotNull List < FeatureSample > instances) {
        if (name.isEmpty()) {
            throw new IllegalArgumentException(CONFIGURATION_NAME_MUST_BE_SPECIFIED);
        }

        this.name = name;
        this.attributes = ImmutableList.copyOf(attributes);
        this.allowedClassValues = ImmutableList.copyOf(allowedClassValues);
        this.instances = ImmutableList.copyOf(instances);
    }

    public List < Integer > getAllowedClassValues() {
        return allowedClassValues;
    }

    @NotNull
    public String prepareText() {
        String title = String.format(“@relation % s”, name);

        List < String > attributesLst = new ArrayList < > ();
        for (int i = 0; i < attributes.size()– 1; i++) {
            attributesLst.add(String.format(“@attribute % s numeric”, attributes.get(i)));
        }

        String classAllowedValues = StringUtils.join(allowedClassValues, “, “);
        String classAttr = “@attribute” + attributes.get(attributes.size()– 1) + ”{”+classAllowedValues + “
        }”;

        List < String > instancesLst = new ArrayList < > ();
        for (FeatureSample instance: instances) {
            List < String > sampleValues = new ArrayList < > ();
            for (Double val: instance.getFeatures()) {
                sampleValues.add(val != null ? val.toString() : “ ? ”);
            }
            sampleValues.add(instance.getClassValue().get().toString());
            String joined = StringUtils.join(sampleValues, “, ”);
            instancesLst.add(joined);
        }
        String attributes = StringUtils.join(attributesLst, System.lineSeparator());
        String instances = StringUtils.join(instancesLst, System.lineSeparator());

        return title + System.lineSeparator() +
            attributes + System.lineSeparator() +
            classAttr + System.lineSeparator() + System.lineSeparator() + “@data” + System.lineSeparator() +
            instances;
    }

    @Override
    public String toString() {
        if (text == null) {
            text = prepareText();
        }
        return text;
    }

}

In the next part of the article, we’ll create easy-to-use wrapper for Weka classifiers.

Leave a Reply

Your email address will not be published. Required fields are marked *