First commit
This commit is contained in:
commit
360e507bd2
11 changed files with 1216 additions and 0 deletions
180
README.markdown
Normal file
180
README.markdown
Normal file
|
@ -0,0 +1,180 @@
|
|||
# clj-ml
|
||||
|
||||
A machine learning library for Clojure built on top of Weka and friends
|
||||
|
||||
## Usage
|
||||
|
||||
* I/O of data
|
||||
|
||||
Loading data from a CSV file:
|
||||
|
||||
(use 'clj-ml.io)
|
||||
|
||||
; Loading data from an ARFF file, XRFF and CSV are also supported
|
||||
(def ds (load-instances :arff "file:///Applications/weka-3-6-2/data/iris.arff"))
|
||||
|
||||
; Saving data in a different format
|
||||
(save-instances :csv ds)
|
||||
|
||||
* Working with datasets
|
||||
|
||||
(use 'clj-ml.data)
|
||||
|
||||
; Defining a dataset
|
||||
(def ds (make-dataset ; name of the dataset
|
||||
"name"
|
||||
; two numeric attributes and one nominal
|
||||
[:length :width {:kind [:good :bad]}]
|
||||
; initial data
|
||||
[ [12 34 :good]
|
||||
[24 53 :bad] ]))
|
||||
|
||||
ds
|
||||
>#<ClojureInstances @relation name
|
||||
>
|
||||
>@attribute length numeric
|
||||
>@attribute width numeric
|
||||
>@attribute kind {good,bad}
|
||||
>
|
||||
>@data
|
||||
>12,34,good
|
||||
>24,53,bad>
|
||||
|
||||
; Using datasets like sequences
|
||||
(dataset-seq ds)
|
||||
>(#<Instance 12,34,good> #<Instance 24,53,bad>)
|
||||
|
||||
; Transforming instances into maps or vectors
|
||||
(instance-to-map (first (dataset-seq ds)))
|
||||
>{:kind :good, :width 34.0, :length 12.0}
|
||||
(instance-to-vector (dataset-at ds 0))
|
||||
|
||||
* Filtering datasets
|
||||
|
||||
(us 'clj-ml.filters)
|
||||
|
||||
(def ds (load-instances :arff
|
||||
"file:///Applications/weka-3-6-2/data/iris.arff"))
|
||||
|
||||
; Discretizing a numeric attribute using an unsupervised filter
|
||||
(def discretize (make-filter :unsupervised-discretize
|
||||
{:dataset *ds*
|
||||
:attributes [0 2]}))
|
||||
|
||||
(def filtered-ds (filter-process discretize ds))
|
||||
|
||||
* Using classifiers
|
||||
|
||||
(use 'clj-ml.classifiers)
|
||||
|
||||
; Building a classifier using a C4.5 decission tree
|
||||
(def classifier (make-classifier :decission-tree :c45))
|
||||
|
||||
; We set the class attribute for the loaded dataset
|
||||
(dataset-set-class ds 4)
|
||||
|
||||
; Training the classifier
|
||||
(classifier-train classifier ds)
|
||||
>#<J48 J48 pruned tree
|
||||
>------------------
|
||||
>
|
||||
>petalwidth <= 0.6: Iris-setosa (50.0)
|
||||
>petalwidth > 0.6
|
||||
>| petalwidth <= 1.7
|
||||
>| | petallength <= 4.9: Iris-versicolor (48.0/1.0)
|
||||
>| | petallength > 4.9
|
||||
>| | | petalwidth <= 1.5: Iris-virginica (3.0)
|
||||
>| | | petalwidth > 1.5: Iris-versicolor (3.0/1.0)
|
||||
>| petalwidth > 1.7: Iris-virginica (46.0/1.0)
|
||||
>
|
||||
>Number of Leaves : 5
|
||||
>
|
||||
>Size of the tree : 9
|
||||
|
||||
|
||||
; We evaluate the classifier using a test dataset
|
||||
; last parameter should be a different test dataset, here we are using the same
|
||||
(def evaluation (classifier-evaluate classifier :dataset ds ds))
|
||||
>=== Confusion Matrix ===
|
||||
>
|
||||
> a b c <-- classified as
|
||||
> 50 0 0 | a = Iris-setosa
|
||||
> 0 49 1 | b = Iris-versicolor
|
||||
> 0 2 48 | c = Iris-virginica
|
||||
>
|
||||
>=== Summary ===
|
||||
>
|
||||
>Correctly Classified Instances 147 98 %
|
||||
>Incorrectly Classified Instances 3 2 %
|
||||
>Kappa statistic 0.97
|
||||
>Mean absolute error 0.0233
|
||||
>Root mean squared error 0.108
|
||||
>Relative absolute error 5.2482 %
|
||||
>Root relative squared error 22.9089 %
|
||||
>Total Number of Instances 150
|
||||
|
||||
(:kappa evaluation)
|
||||
>0.97
|
||||
(:root-mean-squared-error e)
|
||||
>0.10799370769526968
|
||||
(:precision e)
|
||||
>{:Iris-setosa 1.0, :Iris-versicolor 0.9607843137254902, :Iris-virginica
|
||||
0.9795918367346939}
|
||||
|
||||
; The classifier can also be evaluated using cross-validation
|
||||
(classifier-evaluate classifier :cross-validation ds 10)
|
||||
>=== Confusion Matrix ===
|
||||
>
|
||||
> a b c <-- classified as
|
||||
> 49 1 0 | a = Iris-setosa
|
||||
> 0 47 3 | b = Iris-versicolor
|
||||
> 0 4 46 | c = Iris-virginica
|
||||
>
|
||||
>=== Summary ===
|
||||
>
|
||||
>Correctly Classified Instances 142 94.6667 %
|
||||
>Incorrectly Classified Instances 8 5.3333 %
|
||||
>Kappa statistic 0.92
|
||||
>Mean absolute error 0.0452
|
||||
>Root mean squared error 0.1892
|
||||
>Relative absolute error 10.1707 %
|
||||
>Root relative squared error 40.1278 %
|
||||
>Total Number of Instances 150
|
||||
|
||||
; A trained classifier can be used to classify new instances
|
||||
(def to-classify (make-instance ds
|
||||
{:class :Iris-versicolor,
|
||||
:petalwidth 0.2,
|
||||
:petallength 1.4,
|
||||
:sepalwidth 3.5,
|
||||
:sepallength 5.1}))
|
||||
(classifier-classify classifier to-classify)
|
||||
> 0.0
|
||||
(classifier-label to-classify)
|
||||
>#<Instance 5.1,3.5,1.4,0.2,Iris-setosa>
|
||||
|
||||
|
||||
; The classifiers can be saved and restored later
|
||||
(use 'clj-ml.utils)
|
||||
|
||||
(serialize-to-file classifier
|
||||
"/Users/antonio.garrote/Desktop/classifier.bin")
|
||||
|
||||
## Installation
|
||||
|
||||
In order to install the library you must first install Leiningen.
|
||||
You should also download the Weka 3.6.2 jar from the official weka homepage.
|
||||
If maven complains about not finding weka, follow its instructions to install
|
||||
the jar manually.
|
||||
|
||||
### To install from source
|
||||
|
||||
* git clone the project
|
||||
* $ lein deps
|
||||
* $ lein compile
|
||||
* $ lein compile-java
|
||||
* $ lein uberjar
|
||||
|
||||
## License
|
||||
|
||||
MIT License
|
8
project.clj
Normal file
8
project.clj
Normal file
|
@ -0,0 +1,8 @@
|
|||
(defproject clj-ml "0.0.3-SNAPSHOT"
|
||||
:description "Machine Learning library for Clojure"
|
||||
:java-source-path "src/java"
|
||||
:javac-fork "true"
|
||||
:dependencies [[org.clojure/clojure "1.1.0"]
|
||||
[org.clojure/clojure-contrib "1.1.0"]
|
||||
[lein-javac "0.0.2-SNAPSHOT"]
|
||||
[weka/weka "3.6.2"]])
|
188
src/clj_ml/classifiers.clj
Normal file
188
src/clj_ml/classifiers.clj
Normal file
|
@ -0,0 +1,188 @@
|
|||
;;
|
||||
;; Data processing of data with different filtering algorithms
|
||||
;; @author Antonio Garrote
|
||||
;;
|
||||
|
||||
(ns clj-ml.classifiers
|
||||
(:use [clj-ml utils data])
|
||||
(:import (java.util Date Random)
|
||||
(weka.classifiers.trees J48)
|
||||
(weka.classifiers.bayes NaiveBayes)
|
||||
(weka.classifiers.functions MultilayerPerceptron)
|
||||
(weka.classifiers Evaluation)))
|
||||
|
||||
|
||||
;; Setting up classifier options
|
||||
|
||||
(defmulti make-classifier-options
|
||||
"Creates the right parameters for a classifier"
|
||||
(fn [kind algorithm map] [kind algorithm]))
|
||||
|
||||
(defmethod make-classifier-options [:decission-tree :c45]
|
||||
([kind algorithm map]
|
||||
(let [cols-val (check-options {:unpruned "-U"
|
||||
:reduced-error-pruning "-R"
|
||||
:only-binary-splits "-B"
|
||||
:no-raising "-S"
|
||||
:no-cleanup "-L"
|
||||
:laplace-smoothing "-A"}
|
||||
map
|
||||
[""])
|
||||
cols-val-a (check-option-values {:pruning-confidence "-C"
|
||||
:minimum-instances "-M"
|
||||
:pruning-number-folds "-N"
|
||||
:shuffling-random-seed "-Q"}
|
||||
map
|
||||
cols-val)]
|
||||
(into-array cols-val-a))))
|
||||
|
||||
(defmethod make-classifier-options [:bayes :naive]
|
||||
([kind algorithm map]
|
||||
(let [cols-val (check-options {:kernel-estimator "-K"
|
||||
:supervised-discretization "-D"
|
||||
:old-format "-O"}
|
||||
map
|
||||
[""])]
|
||||
(into-array cols-val))))
|
||||
|
||||
(defmethod make-classifier-options [:neural-network :multilayer-perceptron]
|
||||
([kind algorithm map]
|
||||
(let [cols-val (check-options {:no-nominal-to-binary "-B"
|
||||
:no-numeric-normalization "-C"
|
||||
:no-normalization "-I"
|
||||
:no-reset "-R"
|
||||
:learning-rate-decay "-D"}
|
||||
map
|
||||
[""])
|
||||
cols-val-a (check-option-values {:learning-rate "-L"
|
||||
:momentum "-M"
|
||||
:epochs "-N"
|
||||
:percentage-validation-set "-V"
|
||||
:seed "-S"
|
||||
:threshold-number-errors "-E"}
|
||||
map
|
||||
cols-val)]
|
||||
(into-array cols-val-a))))
|
||||
|
||||
|
||||
;; Building classifiers
|
||||
|
||||
(defmacro make-classifier-m
|
||||
([kind algorithm classifier-class options]
|
||||
`(let [options-read# (if (empty? ~options) {} ~options)
|
||||
classifier# (new ~classifier-class)
|
||||
opts# (make-classifier-options ~kind ~algorithm options-read#)]
|
||||
(.setOptions classifier# opts#)
|
||||
classifier#)))
|
||||
|
||||
(defmulti make-classifier
|
||||
"Creates a new classifier for the given kind algorithm and options"
|
||||
(fn [kind algorithm & options] [kind algorithm]))
|
||||
|
||||
(defmethod make-classifier [:decission-tree :c45]
|
||||
([kind algorithm & options]
|
||||
(make-classifier-m kind algorithm J48 options)))
|
||||
|
||||
(defmethod make-classifier [:bayes :naive]
|
||||
([kind algorithm & options]
|
||||
(make-classifier-m kind algorithm NaiveBayes options)))
|
||||
|
||||
(defmethod make-classifier [:neural-network :multilayer-perceptron]
|
||||
([kind algorithm & options]
|
||||
(make-classifier-m kind algorithm MultilayerPerceptron options)))
|
||||
|
||||
|
||||
;; Training classifiers
|
||||
|
||||
(defn classifier-train
|
||||
"Trains a classifier with the given dataset as the training data"
|
||||
([classifier dataset]
|
||||
(do (.buildClassifier classifier dataset)
|
||||
classifier)))
|
||||
|
||||
|
||||
;; Evaluating classifiers
|
||||
|
||||
(defn- try-metric [f]
|
||||
(try (f)
|
||||
(catch Exception ex {:nan (.getMessage ex)})))
|
||||
|
||||
(defn- try-multiple-values-metric [class-values f]
|
||||
(loop [acum {}
|
||||
ks (keys class-values)]
|
||||
(if (empty? ks)
|
||||
acum
|
||||
(let [index (get class-values (first ks))
|
||||
val (f index)]
|
||||
(recur (conj acum {(first ks) val})
|
||||
(rest ks))))))
|
||||
|
||||
(defn- collect-evaluation-results
|
||||
"Collects all the statistics from the evaluation of a classifier"
|
||||
([class-values evaluation]
|
||||
(do
|
||||
(println (.toMatrixString evaluation))
|
||||
(println "=== Summary ===")
|
||||
(println (.toSummaryString evaluation))
|
||||
{:correct (try-metric #(.correct evaluation))
|
||||
:incorrect (try-metric #(.incorrect evaluation))
|
||||
:unclassified (try-metric #(.unclassified evaluation))
|
||||
:percentage-correct (try-metric #(.pctCorrect evaluation))
|
||||
:percentage-incorrect (try-metric #(.pctIncorrect evaluation))
|
||||
:percentage-unclassified (try-metric #(.pctUnclassified evaluation))
|
||||
:error-rate (try-metric #(.errorRate evaluation))
|
||||
:mean-absolute-error (try-metric #(.meanAbsoluteError evaluation))
|
||||
:relative-absolute-error (try-metric #(.relativeAbsoluteError evaluation))
|
||||
:root-mean-squared-error (try-metric #(.rootMeanSquaredError evaluation))
|
||||
:root-relative-squared-error (try-metric #(.rootRelativeSquaredError evaluation))
|
||||
:correlation-coefficient (try-metric #(.correlationCoefficient evaluation))
|
||||
:average-cost (try-metric #(.avgCost evaluation))
|
||||
:kappa (try-metric #(.kappa evaluation))
|
||||
:kb-information (try-metric #(.KBInformation evaluation))
|
||||
:kb-mean-information (try-metric #(.KBMeanInformation evaluation))
|
||||
:kb-relative-information (try-metric #(.KBRelativeInformation evaluation))
|
||||
:sf-entropy-gain (try-metric #(.SFEntropyGain evaluation))
|
||||
:sf-mean-entropy-gain (try-metric #(.SFMeanEntropyGain evaluation))
|
||||
:roc-area (try-multiple-values-metric class-values (fn [i] (try-metric #(.areaUnderROC evaluation i))))
|
||||
:false-positive-rate (try-multiple-values-metric class-values (fn [i] (try-metric #(.falsePositiveRate evaluation i))))
|
||||
:false-negative-rate (try-multiple-values-metric class-values (fn [i] (try-metric #(.falseNegativeRate evaluation i))))
|
||||
:f-measure (try-multiple-values-metric class-values (fn [i] (try-metric #(.fMeasure evaluation i))))
|
||||
:precision (try-multiple-values-metric class-values (fn [i] (try-metric #(.precision evaluation i))))
|
||||
:recall (try-multiple-values-metric class-values (fn [i] (try-metric #(.recall evaluation i))))
|
||||
:evaluation-object evaluation})))
|
||||
|
||||
(defmulti classifier-evaluate
|
||||
"Evaluetes a trained classifier using the provided dataset or cross-validation"
|
||||
(fn [classifier mode & evaluation-data] mode))
|
||||
|
||||
(defmethod classifier-evaluate :dataset
|
||||
([classifier mode & evaluation-data]
|
||||
(let [training-data (nth evaluation-data 0)
|
||||
test-data (nth evaluation-data 1)
|
||||
evaluation (new Evaluation training-data)
|
||||
class-values (dataset-class-values training-data)]
|
||||
(.evaluateModel evaluation classifier test-data (into-array []))
|
||||
(collect-evaluation-results class-values evaluation))))
|
||||
|
||||
(defmethod classifier-evaluate :cross-validation
|
||||
([classifier mode & evaluation-data]
|
||||
(let [training-data (nth evaluation-data 0)
|
||||
folds (nth evaluation-data 1)
|
||||
evaluation (new Evaluation training-data)
|
||||
class-values (dataset-class-values training-data)]
|
||||
(.crossValidateModel evaluation classifier training-data folds (new Random (.getTime (new Date))) (into-array []))
|
||||
(collect-evaluation-results class-values evaluation))))
|
||||
|
||||
|
||||
;; Classifying instances
|
||||
|
||||
(defn classifier-classify
|
||||
"Classifies an instance or data vector using the provided classifier"
|
||||
([classifier instance]
|
||||
(.classifyInstance classifier instance)))
|
||||
|
||||
(defn classifier-label
|
||||
"Classifies and assign a label to a dataset instance"
|
||||
([classifier instance]
|
||||
(let [cls (classifier-classify classifier instance)]
|
||||
(instance-set-class instance cls))))
|
228
src/clj_ml/data.clj
Normal file
228
src/clj_ml/data.clj
Normal file
|
@ -0,0 +1,228 @@
|
|||
;;
|
||||
;; Manipulation of datasets and instances
|
||||
;; @author Antonio Garrote
|
||||
;;
|
||||
|
||||
(ns clj-ml.data
|
||||
(:use [clj-ml utils])
|
||||
(:import (weka.core Instance Instances FastVector Attribute)
|
||||
(cljml ClojureInstances)))
|
||||
|
||||
|
||||
;; Construction of individual data and datasets
|
||||
|
||||
(defn attribute-name-at- [dataset-or-instance pos]
|
||||
(let [class-attr (.attribute dataset-or-instance pos)]
|
||||
(.name class-attr)))
|
||||
|
||||
(defn- index-attr [dataset-or-instance attr]
|
||||
(let [max (.numAttributes dataset-or-instance)
|
||||
attrs (key-to-str attr)]
|
||||
(loop [c 0]
|
||||
(if (= c max)
|
||||
(throw (.Exception (str "Attribute " attrs " not found")))
|
||||
(if (= attrs (attribute-name-at- dataset-or-instance c))
|
||||
c
|
||||
(recur (+ c 1 )))))))
|
||||
|
||||
(defn make-instance
|
||||
"Creates a new dataset instance from a vector"
|
||||
([dataset vector]
|
||||
(make-instance dataset 1 vector))
|
||||
([dataset weight vector]
|
||||
(let [inst (new Instance
|
||||
(count vector))]
|
||||
(do (.setDataset inst dataset)
|
||||
(loop [vs vector
|
||||
c 0]
|
||||
(if (empty? vs)
|
||||
(do
|
||||
(.setWeight inst (double weight))
|
||||
inst)
|
||||
(do
|
||||
(if (or (keyword? (first vs)) (string? (first vs)))
|
||||
;; this is a nominal entry in keyword or string form
|
||||
(.setValue inst c (key-to-str (first vs)))
|
||||
(if (sequential? (first vs))
|
||||
;; this is a map of values
|
||||
(let [k (key-to-str (nth (first vs) 0))
|
||||
val (nth (first vs) 1)
|
||||
ik (index-attr inst k)]
|
||||
(if (or (keyword? val) (string? val))
|
||||
;; this is a nominal entry in keyword or string form
|
||||
(.setValue inst ik (key-to-str val))
|
||||
(.setValue inst ik (double val))))
|
||||
;; A double value for the entry
|
||||
(.setValue inst c (double (first vs)))))
|
||||
(recur (rest vs)
|
||||
(+ c 1)))))))))
|
||||
|
||||
|
||||
(defn- parse-attributes
|
||||
"Builds a set of attributes for a dataset parsed from the given array"
|
||||
([attributes]
|
||||
(loop [atts attributes
|
||||
fv (new FastVector (count attributes))]
|
||||
(if (empty? atts)
|
||||
fv
|
||||
(do
|
||||
(let [att (first atts)]
|
||||
(.addElement fv
|
||||
(if (map? att)
|
||||
(if (sequential? (first (vals att)))
|
||||
(let [v (first (vals att))
|
||||
vfa (reduce (fn [a i] (.addElement a (key-to-str i)) a)
|
||||
(new FastVector) v)]
|
||||
(new Attribute (key-to-str (first (keys att))) vfa))
|
||||
(new Attribute (key-to-str (first (keys att))) (first (vals att))))
|
||||
(new Attribute (key-to-str att)))))
|
||||
(recur (rest atts)
|
||||
fv))))))
|
||||
|
||||
(defn make-dataset
|
||||
"Creates a new empty dataset. By default the class is set to be the last attribute."
|
||||
([name attributes capacity-or-values]
|
||||
(make-dataset name attributes 1 capacity-or-values))
|
||||
([name attributes weight capacity-or-values]
|
||||
(let [ds (if (sequential? capacity-or-values)
|
||||
;; we have received a sequence instead of a number, so we initialize data
|
||||
;; instances in the dataset
|
||||
(let [dataset (new ClojureInstances (key-to-str name) (parse-attributes attributes) (count capacity-or-values))]
|
||||
(loop [vs capacity-or-values]
|
||||
(if (empty? vs)
|
||||
dataset
|
||||
(do
|
||||
(let [inst (make-instance dataset weight (first vs))]
|
||||
(.add dataset inst))
|
||||
(recur (rest vs))))))
|
||||
;; we haven't received a vector so we create an empty dataset
|
||||
(new Instances (key-to-str name) (parse-attributes attributes) capacity-or-values))]
|
||||
;; by default the class is the last attribute in the dataset
|
||||
(.setClassIndex ds (- (.numAttributes ds) 1))
|
||||
ds)))
|
||||
|
||||
;; dataset information
|
||||
|
||||
(defn dataset-class-values [dataset]
|
||||
(let [class-attr (.classAttribute dataset)
|
||||
values (.enumerateValues class-attr)]
|
||||
(loop [continue (.hasMoreElements values)
|
||||
acum {}]
|
||||
(if continue
|
||||
(let [val (.nextElement values)
|
||||
index (.indexOfValue class-attr val)]
|
||||
(recur (.hasMoreElements values)
|
||||
(conj acum {(keyword val) index})))
|
||||
acum))))
|
||||
|
||||
(defn dataset-values-at [dataset-or-instance pos]
|
||||
(let [class-attr (.attribute dataset-or-instance pos)
|
||||
values (.enumerateValues class-attr)]
|
||||
(if (nil? values)
|
||||
:not-nominal
|
||||
(loop [continue (.hasMoreElements values)
|
||||
acum {}]
|
||||
(if continue
|
||||
(let [val (.nextElement values)
|
||||
index (.indexOfValue class-attr val)]
|
||||
(recur (.hasMoreElements values)
|
||||
(conj acum {(keyword val) index})))
|
||||
acum)))))
|
||||
|
||||
;; manipulation of instances
|
||||
|
||||
(defn instance-set-class [instance pos]
|
||||
"Sets the index of the class attribute for this instance"
|
||||
(do (.setClassValue instance pos)
|
||||
instance))
|
||||
|
||||
(defn instance-get-class [instance]
|
||||
"Get the index of the class attribute for this instance"
|
||||
(.classValue instance))
|
||||
|
||||
(defn instance-value-at [instance pos]
|
||||
"Returns the value of an instance attribute"
|
||||
(let [attr (.attribute instance pos)]
|
||||
(if (.isNominal attr)
|
||||
(let [val (.value instance pos)
|
||||
key-vals (dataset-values-at instance pos)
|
||||
key-val (loop [ks (keys key-vals)]
|
||||
(if (= (get key-vals (first ks))
|
||||
val)
|
||||
(first ks)
|
||||
(recur (rest ks))))]
|
||||
key-val)
|
||||
(.value instance pos))))
|
||||
|
||||
(defn instance-to-vector
|
||||
"Builds a vector with the values of the instance"
|
||||
[instance]
|
||||
(let [max (.numValues instance)]
|
||||
(loop [c 0
|
||||
acum []]
|
||||
(if (= c max)
|
||||
acum
|
||||
(recur (+ c 1)
|
||||
(conj acum (instance-value-at instance c)))))))
|
||||
|
||||
(defn instance-to-map
|
||||
"Builds a vector with the values of the instance"
|
||||
[instance]
|
||||
(let [max (.numValues instance)]
|
||||
(loop [c 0
|
||||
acum {}]
|
||||
(if (= c max)
|
||||
acum
|
||||
(recur (+ c 1)
|
||||
(conj acum {(keyword (. (.attribute instance c) name))
|
||||
(instance-value-at instance c)} ))))))
|
||||
|
||||
|
||||
;; manipulation of datasets
|
||||
|
||||
(defn dataset-seq [dataset]
|
||||
"Builds a new clojure sequence from this dataset"
|
||||
(if (= (class dataset)
|
||||
ClojureInstances)
|
||||
(seq dataset)
|
||||
(seq (new ClojureInstances dataset))))
|
||||
|
||||
(defn dataset-set-class [dataset pos]
|
||||
"Sets the index of the attribute of the dataset that is the class of the dataset"
|
||||
(do (.setClassIndex dataset pos)
|
||||
dataset))
|
||||
|
||||
(defn dataset-count [dataset]
|
||||
"Returns the number of elements in a dataset"
|
||||
(.numInstances dataset))
|
||||
|
||||
(defn dataset-add
|
||||
"Adds a new instance to a dataset. A clojure vector or an Instance
|
||||
can be passed as arguments"
|
||||
([dataset vector]
|
||||
(dataset-add dataset 1 vector))
|
||||
([dataset weight vector]
|
||||
(do
|
||||
(if (= (class vector) weka.core.Instance)
|
||||
(.add dataset vector)
|
||||
(let [instance (make-instance dataset weight vector)]
|
||||
(.add dataset instance)))
|
||||
dataset)))
|
||||
|
||||
(defn dataset-extract-at
|
||||
"Removes and returns the instance at a certain position from the dataset"
|
||||
[dataset pos]
|
||||
(let [inst (.instance dataset pos)]
|
||||
(do
|
||||
(.delete dataset pos)
|
||||
inst)))
|
||||
|
||||
(defn dataset-at
|
||||
"Returns the instance at a certain position from the dataset"
|
||||
[dataset pos]
|
||||
(.instance dataset pos))
|
||||
|
||||
(defn dataset-pop
|
||||
"Removes and returns the first instance in the dataset"
|
||||
[dataset]
|
||||
(dataset-extract-at dataset 0))
|
106
src/clj_ml/filters.clj
Normal file
106
src/clj_ml/filters.clj
Normal file
|
@ -0,0 +1,106 @@
|
|||
;;
|
||||
;; Data processing of data with different filtering algorithms
|
||||
;; @author Antonio Garrote
|
||||
;;
|
||||
|
||||
(ns clj-ml.filters
|
||||
(:use [clj-ml data utils])
|
||||
(:import (weka.filters Filter)))
|
||||
|
||||
|
||||
|
||||
;; Options for the filters
|
||||
|
||||
(defmulti make-filter-options
|
||||
"Creates the right parameters for a filter"
|
||||
(fn [kind map] kind))
|
||||
|
||||
(defmethod make-filter-options :supervised-discretize
|
||||
([kind map]
|
||||
(let [cols (get map :attributes)
|
||||
pre-cols (reduce #(str %1 "," (+ %2 1)) "" cols)
|
||||
cols-val-a ["-R" (.substring pre-cols 1 (.length pre-cols))]
|
||||
cols-val-b (check-options {:invert "-V"
|
||||
:binary "-D"
|
||||
:better-encoding "-E"
|
||||
:kononenko "-K"}
|
||||
map
|
||||
cols-val-a)]
|
||||
(into-array cols-val-b))))
|
||||
|
||||
(defmethod make-filter-options :unsupervised-discretize
|
||||
([kind map]
|
||||
(let [cols (get map :attributes)
|
||||
pre-cols (reduce #(str %1 "," (+ %2 1)) "" cols)
|
||||
cols-val-a ["-R" (.substring pre-cols 1 (.length pre-cols))]
|
||||
cols-val-b (check-options {:unset-class "-unset-class-temporarily"
|
||||
:binary "-D"
|
||||
:better-encoding "-E"
|
||||
:equal-frequency "-F"
|
||||
:optimize "-O"}
|
||||
map
|
||||
cols-val-a)
|
||||
cols-val-c (check-option-values {:number-bins "-B"
|
||||
:weight-bins "-M"}
|
||||
map
|
||||
cols-val-b)]
|
||||
(into-array cols-val-c))))
|
||||
|
||||
(defmethod make-filter-options :supervised-nominal-to-binary
|
||||
([kind map]
|
||||
(let [cols-val (check-options {:also-binary "-N"
|
||||
:for-each-nominal "-A"}
|
||||
map
|
||||
[""])]
|
||||
(into-array cols-val))))
|
||||
|
||||
(defmethod make-filter-options :unsupervised-nominal-to-binary
|
||||
([kind map]
|
||||
(let [cols (get map :attributes)
|
||||
pre-cols (reduce #(str %1 "," (+ %2 1)) "" cols)
|
||||
cols-val-a ["-R" (.substring pre-cols 1 (.length pre-cols))]
|
||||
cols-val-b (check-options {:invert "-V"
|
||||
:also-binary "-N"
|
||||
:for-each-nominal "-A"}
|
||||
map
|
||||
cols-val-a)]
|
||||
(into-array cols-val-b))))
|
||||
|
||||
|
||||
;; Creation of filters
|
||||
|
||||
(defmacro make-filter-m [kind options filter-class]
|
||||
`(let [filter# (new ~filter-class)
|
||||
dataset# (get ~options :dataset)
|
||||
opts# (make-filter-options ~kind ~options)]
|
||||
(.setOptions filter# opts#)
|
||||
(.setInputFormat filter# dataset#)
|
||||
filter#))
|
||||
|
||||
(defmulti make-filter
|
||||
"Creates a filter for datasets"
|
||||
(fn [kind options] kind))
|
||||
|
||||
(defmethod make-filter :supervised-discretize
|
||||
([kind options]
|
||||
(make-filter-m kind options weka.filters.supervised.attribute.Discretize)))
|
||||
|
||||
|
||||
(defmethod make-filter :unsupervised-discretize
|
||||
([kind options]
|
||||
(make-filter-m kind options weka.filters.unsupervised.attribute.Discretize)))
|
||||
|
||||
(defmethod make-filter :supervised-nominal-to-binary
|
||||
([kind options]
|
||||
(make-filter-m kind options weka.filters.supervised.attribute.NominalToBinary)))
|
||||
|
||||
(defmethod make-filter :unsupervised-nominal-to-binary
|
||||
([kind options]
|
||||
(make-filter-m kind options weka.filters.unsupervised.attribute.NominalToBinary)))
|
||||
|
||||
;; Processing the filtering of data
|
||||
|
||||
(defn filter-process
|
||||
"Filters an input dataset using the provided filter and generates an output dataset"
|
||||
[filter dataset]
|
||||
(Filter/useFilter dataset filter))
|
73
src/clj_ml/io.clj
Normal file
73
src/clj_ml/io.clj
Normal file
|
@ -0,0 +1,73 @@
|
|||
;;
|
||||
;; Storing and reading data from different formats
|
||||
;; @author Antonio Garrote
|
||||
;;
|
||||
|
||||
(ns clj-ml.io
|
||||
(:import (weka.core.converters CSVLoader ArffLoader XRFFLoader)
|
||||
(weka.core.converters CSVSaver ArffSaver XRFFSaver)
|
||||
(java.io File)
|
||||
(java.net URL URI)))
|
||||
|
||||
|
||||
;; Loading of instances
|
||||
|
||||
(defmulti load-instances
|
||||
"Load instances from different data sources"
|
||||
(fn [kind source] kind))
|
||||
|
||||
(defmacro m-load-instances [loader source]
|
||||
`(do
|
||||
(if (= (class ~source) java.lang.String)
|
||||
(.setSource ~loader (new URL ~source))
|
||||
(if (= (class ~source) java.io.File)
|
||||
(.setFile ~loader ~source)))
|
||||
(.getDataSet ~loader)))
|
||||
|
||||
(defmethod load-instances :arff
|
||||
([kind source]
|
||||
(let [loader (new ArffLoader)]
|
||||
(m-load-instances loader source))))
|
||||
|
||||
|
||||
(defmethod load-instances :xrff
|
||||
([kind source]
|
||||
(let [loader (new XRFFLoader)]
|
||||
(m-load-instances loader source))))
|
||||
|
||||
(defmethod load-instances :csv
|
||||
([kind source]
|
||||
(let [loader (new CSVLoader)]
|
||||
(m-load-instances loader source))))
|
||||
|
||||
|
||||
;; Saving of instances
|
||||
|
||||
(defmulti save-instances
|
||||
"Save instances into data destinies"
|
||||
(fn [kind destiny instances] kind))
|
||||
|
||||
(defmacro m-save-instances [saver destiny instances]
|
||||
`(do
|
||||
(if (= (class ~destiny) java.lang.String)
|
||||
(.setFile ~saver (new File (new URI ~destiny)))
|
||||
(if (= (class ~destiny) java.io.File)
|
||||
(.setFile ~saver ~destiny)))
|
||||
(.setInstances ~saver ~instances)
|
||||
(.writeBatch ~saver)))
|
||||
|
||||
(defmethod save-instances :arff
|
||||
([kind destiny instances]
|
||||
(let [saver (new ArffSaver)]
|
||||
(m-save-instances saver destiny instances))))
|
||||
|
||||
(defmethod save-instances :xrff
|
||||
([kind destiny instances]
|
||||
(let [saver (new XRFFSaver)]
|
||||
(m-save-instances saver destiny instances))))
|
||||
|
||||
(defmethod save-instances :csv
|
||||
([kind destiny instances]
|
||||
(let [saver (new CSVSaver)]
|
||||
(m-save-instances saver destiny instances))))
|
||||
|
97
src/clj_ml/utils.clj
Normal file
97
src/clj_ml/utils.clj
Normal file
|
@ -0,0 +1,97 @@
|
|||
;;
|
||||
;; Common utilities and functions
|
||||
;; @author Antonio Garrote
|
||||
;;
|
||||
|
||||
(ns clj-ml.utils
|
||||
(:import (java.io ObjectOutputStream ByteArrayOutputStream
|
||||
ByteArrayInputStream ObjectInputStream
|
||||
FileOutputStream FileInputStream)))
|
||||
|
||||
|
||||
(defn key-to-str
|
||||
"transforms a keyword into a string"
|
||||
([k]
|
||||
(if (= (class k) String)
|
||||
k
|
||||
(let [sk (str k)]
|
||||
(.substring sk 1)))))
|
||||
|
||||
;; Manipulation of array of options
|
||||
|
||||
(defn check-option [opts val flag map]
|
||||
"Sets an option for a filter"
|
||||
(let [val-in-map (get map val)]
|
||||
(if (nil? val-in-map)
|
||||
opts
|
||||
(conj opts flag))))
|
||||
|
||||
(defn check-option-value [opts val flag map]
|
||||
"Sets an option with value for a filter"
|
||||
(let [val-in-map (get map val)]
|
||||
(if (nil? val-in-map)
|
||||
opts
|
||||
(conj (conj opts flag) (str val-in-map)))))
|
||||
|
||||
|
||||
(defn check-options [opts-map args-map tmp]
|
||||
"Checks the presence of a set of options for a filter"
|
||||
(loop [rem (keys opts-map)
|
||||
acum tmp]
|
||||
(if (empty? rem)
|
||||
acum
|
||||
(let [k (first rem)
|
||||
vk (get opts-map k)
|
||||
rst (rest rem)]
|
||||
(recur rst
|
||||
(check-option acum k vk args-map))))))
|
||||
|
||||
(defn check-option-values [opts-map args-map tmp]
|
||||
"Checks the presence of a set of options with value for a filter"
|
||||
(loop [rem (keys opts-map)
|
||||
acum tmp]
|
||||
(if (empty? rem)
|
||||
acum
|
||||
(let [k (first rem)
|
||||
vk (get opts-map k)
|
||||
rst (rest rem)]
|
||||
(recur rst
|
||||
(check-option-value acum k vk args-map))))))
|
||||
|
||||
;; Serializing classifiers
|
||||
|
||||
(defn serialize
|
||||
"Writes an object to memory"
|
||||
([obj]
|
||||
(let [bs (new ByteArrayOutputStream)
|
||||
os (new ObjectOutputStream bs)]
|
||||
(.writeObject os obj)
|
||||
(.close os)
|
||||
(.toByteArray bs))))
|
||||
|
||||
(defn deserialize
|
||||
"Reads an object from memory"
|
||||
([bytes]
|
||||
(let [bs (new ByteArrayInputStream bytes)
|
||||
is (new ObjectInputStream bs)
|
||||
obj (.readObject is)]
|
||||
(.close is)
|
||||
obj)))
|
||||
|
||||
(defn serialize-to-file
|
||||
"Writes an object to a file"
|
||||
([obj path]
|
||||
(let [fs (new FileOutputStream path)
|
||||
os (new ObjectOutputStream fs)]
|
||||
(.writeObject os obj)
|
||||
(.close os))
|
||||
path))
|
||||
|
||||
(defn deserialize-from-file
|
||||
"Reads an object from a file"
|
||||
([path]
|
||||
(let [fs (new FileInputStream path)
|
||||
is (new ObjectInputStream fs)
|
||||
obj (.readObject is)]
|
||||
(.close is)
|
||||
obj)))
|
64
src/java/cljml/ClojureInstances.java
Normal file
64
src/java/cljml/ClojureInstances.java
Normal file
|
@ -0,0 +1,64 @@
|
|||
package cljml;
|
||||
|
||||
import clojure.lang.ISeq;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import weka.core.FastVector;
|
||||
import weka.core.Instance;
|
||||
import weka.core.Instances;
|
||||
/**
|
||||
* A wrapper around Weka's Instances class to add some Clojure behavior.
|
||||
*
|
||||
* @author Antonio Garrote
|
||||
*/
|
||||
|
||||
class ClojureInstancesIterator implements Iterator<Instance> {
|
||||
private Instances instances;
|
||||
private int counter;
|
||||
|
||||
public ClojureInstancesIterator(Instances insts) {
|
||||
this.instances = insts;
|
||||
this.counter = 0;
|
||||
}
|
||||
public boolean hasNext() {
|
||||
return counter < instances.numInstances();
|
||||
}
|
||||
|
||||
public Instance next() {
|
||||
Instance next = instances.instance(counter);
|
||||
counter++;
|
||||
return next;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
instances.delete(counter - 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
public class ClojureInstances extends weka.core.Instances implements Iterable<weka.core.Instance>{
|
||||
public ClojureInstances(Instances dataset) {
|
||||
super(dataset);
|
||||
}
|
||||
|
||||
public ClojureInstances(Instances dataset, int capacity) {
|
||||
super(dataset,capacity);
|
||||
}
|
||||
|
||||
public ClojureInstances(Instances source, int first, int toCopy) {
|
||||
super(source, first, toCopy);
|
||||
}
|
||||
|
||||
public ClojureInstances(java.io.Reader reader) throws IOException {
|
||||
super(reader);
|
||||
}
|
||||
|
||||
public ClojureInstances(java.lang.String name, FastVector attInfo, int capacity) {
|
||||
super(name, attInfo, capacity);
|
||||
}
|
||||
|
||||
public Iterator<Instance> iterator() {
|
||||
return new ClojureInstancesIterator(this);
|
||||
}
|
||||
|
||||
}
|
66
test/clj_ml/classifiers_test.clj
Normal file
66
test/clj_ml/classifiers_test.clj
Normal file
|
@ -0,0 +1,66 @@
|
|||
(ns clj-ml.classifiers-test
|
||||
(:use [clj-ml classifiers data] :reload-all)
|
||||
(:use [clojure.test]))
|
||||
|
||||
(deftest make-classifiers-options-c45
|
||||
(let [options (make-classifier-options :decission-tree :c45 {:unpruned true :reduced-error-pruning true :only-binary-splits true :no-raising true
|
||||
:no-cleanup true :laplace-smoothing true :pruning-confidence 0.12 :minimum-instances 10
|
||||
:pruning-number-folds 5 :shuffling-random-seed 1})]
|
||||
(is (= (aget options 0)
|
||||
""))
|
||||
(is (= (aget options 1)
|
||||
"-U"))
|
||||
(is (= (aget options 2)
|
||||
"-R"))
|
||||
(is (= (aget options 3)
|
||||
"-B"))
|
||||
(is (= (aget options 4)
|
||||
"-S"))
|
||||
(is (= (aget options 5)
|
||||
"-L"))
|
||||
(is (= (aget options 6)
|
||||
"-A"))
|
||||
(is (= (aget options 7)
|
||||
"-C"))
|
||||
(is (= (aget options 8)
|
||||
"0.12"))
|
||||
(is (= (aget options 9)
|
||||
"-M"))
|
||||
(is (= (aget options 10)
|
||||
"10"))
|
||||
(is (= (aget options 11)
|
||||
"-N"))
|
||||
(is (= (aget options 12)
|
||||
"5"))
|
||||
(is (= (aget options 13)
|
||||
"-Q"))
|
||||
(is (= (aget options 14)
|
||||
"1"))))
|
||||
|
||||
|
||||
(deftest make-classifier-c45
|
||||
(let [c (make-classifier :decission-tree :c45)]
|
||||
(is (= (class c)
|
||||
weka.classifiers.trees.J48))))
|
||||
|
||||
(deftest train-classifier-c45
|
||||
(let [c (make-classifier :decission-tree :c45)
|
||||
ds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])]
|
||||
(classifier-train c ds)
|
||||
(is true)))
|
||||
|
||||
(deftest classifier-evaluate-dataset
|
||||
(let [c (make-classifier :decission-tree :c45)
|
||||
ds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
||||
tds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[4 1 :n] [4 5 :m]])
|
||||
foo (classifier-train c ds)
|
||||
res (classifier-evaluate c :dataset ds tds)]
|
||||
(is (= 26 (count (keys res))))))
|
||||
|
||||
|
||||
(deftest classifier-evaluate-cross-validation
|
||||
(let [c (make-classifier :decission-tree :c45)
|
||||
ds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
||||
foo (classifier-train c ds)
|
||||
res (classifier-evaluate c :cross-validation ds 2)]
|
||||
(is (= 26 (count (keys res))))))
|
105
test/clj_ml/data_test.clj
Normal file
105
test/clj_ml/data_test.clj
Normal file
|
@ -0,0 +1,105 @@
|
|||
(ns clj-ml.data-test
|
||||
(:use [clj-ml.data] :reload-all)
|
||||
(:use [clojure.test]))
|
||||
|
||||
(deftest make-instance-num
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
1)
|
||||
inst (make-instance dataset [1 2])]
|
||||
(is (= (class inst)
|
||||
weka.core.Instance))
|
||||
(is (= 2 (.numValues inst)))
|
||||
(is (= 1.0 (.value inst 0)))
|
||||
(is (= 2.0 (.value inst 1)))))
|
||||
|
||||
(deftest make-instance-ord
|
||||
(let [dataset (make-dataset :test
|
||||
[:a {:b [:b1 :b2]}]
|
||||
1)
|
||||
inst (make-instance dataset [1 :b1])]
|
||||
(is (= (class inst)
|
||||
weka.core.Instance))
|
||||
(is (= 2 (.numValues inst)))
|
||||
(is (= 1.0 (.value inst 0)))
|
||||
(is (= "b1" (.stringValue inst 1)))))
|
||||
|
||||
(deftest dataset-default-class
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
2)]
|
||||
(is (= 1 (.classIndex dataset)))))
|
||||
|
||||
(deftest dataset-change-class
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
2)]
|
||||
(is (= 1 (.classIndex dataset)))
|
||||
(is (= 0 (.classIndex (dataset-set-class dataset 0))))))
|
||||
|
||||
(deftest dataset-count-1
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
2)]
|
||||
(dataset-add dataset [1 2])
|
||||
(is (= 1 (dataset-count dataset)))))
|
||||
|
||||
(deftest dataset-add-1
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
2)]
|
||||
(dataset-add dataset [1 2])
|
||||
(let [inst (.lastInstance dataset)]
|
||||
(is (= 1.0 (.value inst 0)))
|
||||
(is (= 2.0 (.value inst 1))))))
|
||||
|
||||
(deftest dataset-add-2
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
2)
|
||||
instance (make-instance dataset [1 2])]
|
||||
(dataset-add dataset instance)
|
||||
(let [inst (.lastInstance dataset)]
|
||||
(is (= 1.0 (.value inst 0)))
|
||||
(is (= 2.0 (.value inst 1))))))
|
||||
|
||||
(deftest dataset-extract-at-1
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
2)]
|
||||
(dataset-add dataset [1 2])
|
||||
(let [inst (.lastInstance dataset)]
|
||||
(is (= 1.0 (.value inst 0)))
|
||||
(is (= 2.0 (.value inst 1)))
|
||||
(let [inst-ext (dataset-extract-at dataset 0)]
|
||||
(is (= 0 (.numInstances dataset)))
|
||||
(is (= 1.0 (.value inst-ext 0)))
|
||||
(is (= 2.0 (.value inst-ext 1)))))))
|
||||
|
||||
(deftest dataset-pop-1
|
||||
(let [dataset (make-dataset :test
|
||||
[:a :b]
|
||||
2)]
|
||||
(dataset-add dataset [1 2])
|
||||
(let [inst (.lastInstance dataset)]
|
||||
(is (= 1.0 (.value inst 0)))
|
||||
(is (= 2.0 (.value inst 1)))
|
||||
(let [inst-ext (dataset-pop dataset)]
|
||||
(is (= 0 (.numInstances dataset)))
|
||||
(is (= 1.0 (.value inst-ext 0)))
|
||||
(is (= 2.0 (.value inst-ext 1)))))))
|
||||
|
||||
(deftest dataset-seq-1
|
||||
(let [dataset (make-dataset :test [:a :b {:c [:e :f]}] [[1 2 :e] [3 4 :f]])
|
||||
seq (dataset-seq dataset)]
|
||||
(is (sequential? seq))))
|
||||
|
||||
|
||||
(deftest working-sequences
|
||||
(let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]])]
|
||||
(is (= 2 (dataset-count ds)))
|
||||
(let [dsm (map #(instance-to-map %1) (dataset-seq ds))]
|
||||
(is (= 2 (count dsm)))
|
||||
(is (= 1.0 (:a (first dsm))))
|
||||
(let [dsb (make-dataset "test" [:a :b {:c [:d :e]}] dsm)]
|
||||
(is (= 2 (dataset-count dsb)))))))
|
101
test/clj_ml/filters_test.clj
Normal file
101
test/clj_ml/filters_test.clj
Normal file
|
@ -0,0 +1,101 @@
|
|||
(ns clj-ml.filters-test
|
||||
(:use [clj-ml.filters] :reload-all)
|
||||
(:use [clojure.test]))
|
||||
|
||||
(deftest make-filter-options-supervised-discretize
|
||||
(let [options (make-filter-options :supervised-discretize {:attributes [1 2] :invert true :binary true :better-encoding true :kononenko true :nonexitent true})]
|
||||
(is (= (aget options 0)
|
||||
"-R"))
|
||||
(is (= (aget options 1)
|
||||
"2,3"))
|
||||
(is (= (aget options 2)
|
||||
"-V"))
|
||||
(is (= (aget options 3)
|
||||
"-D"))
|
||||
(is (= (aget options 4)
|
||||
"-E"))
|
||||
(is (= (aget options 5)
|
||||
"-K"))))
|
||||
|
||||
(deftest make-filter-options-unsupervised-discretize
|
||||
(let [options (make-filter-options :unsupervised-discretize {:attributes [1 2] :binary true :better-encoding true
|
||||
:better-encoding true :equal-frequency true :optimize true
|
||||
:number-bins 4 :weight-bins 1})]
|
||||
(is (= (aget options 0)
|
||||
"-R"))
|
||||
(is (= (aget options 1)
|
||||
"2,3"))
|
||||
(is (= (aget options 2)
|
||||
"-D"))
|
||||
(is (= (aget options 3)
|
||||
"-E"))
|
||||
(is (= (aget options 4)
|
||||
"-F"))
|
||||
(is (= (aget options 5)
|
||||
"-O"))
|
||||
(is (= (aget options 6)
|
||||
"-B"))
|
||||
(is (= (aget options 7)
|
||||
"4"))
|
||||
(is (= (aget options 8)
|
||||
"-M"))
|
||||
(is (= (aget options 9)
|
||||
"1"))))
|
||||
|
||||
(deftest make-filter-options-supervised-nominal-to-binary
|
||||
(let [options (make-filter-options :supervised-nominal-to-binary {:also-binary true :for-each-nominal true})]
|
||||
(is (= (aget options 0)
|
||||
""))
|
||||
(is (= (aget options 1)
|
||||
"-N"))
|
||||
(is (= (aget options 2)
|
||||
"-A"))))
|
||||
|
||||
(deftest make-filter-options-unsupervised-nominal-to-binary
|
||||
(let [options (make-filter-options :unsupervised-nominal-to-binary {:attributes [1,2] :also-binary true :for-each-nominal true :invert true})]
|
||||
(is (= (aget options 0)
|
||||
"-R"))
|
||||
(is (= (aget options 1)
|
||||
"2,3"))
|
||||
(is (= (aget options 2)
|
||||
"-V"))
|
||||
(is (= (aget options 3)
|
||||
"-N"))
|
||||
(is (= (aget options 4)
|
||||
"-A"))))
|
||||
|
||||
(deftest make-filter-discretize-sup
|
||||
(let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]])
|
||||
f (make-filter :supervised-discretize {:dataset ds :attributes [0]})]
|
||||
(is (= weka.filters.supervised.attribute.Discretize
|
||||
(class f)))))
|
||||
|
||||
(deftest make-filter-discretize-unsup
|
||||
(let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]])
|
||||
f (make-filter :unsupervised-discretize {:dataset ds :attributes [0]})]
|
||||
(is (= weka.filters.unsupervised.attribute.Discretize
|
||||
(class f)))))
|
||||
|
||||
(deftest make-filter-nominal-to-binary-sup
|
||||
(let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]])
|
||||
f (make-filter :supervised-nominal-to-binary {:dataset ds})]
|
||||
(is (= weka.filters.supervised.attribute.NominalToBinary
|
||||
(class f)))))
|
||||
|
||||
(deftest make-filter-nominal-to-binary-unsup
|
||||
(let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]])
|
||||
f (make-filter :unsupervised-nominal-to-binary {:dataset ds :attributes [2]})]
|
||||
(is (= weka.filters.unsupervised.attribute.NominalToBinary
|
||||
(class f)))))
|
Loading…
Reference in a new issue