2010-02-28 12:14:17 +00:00
|
|
|
(ns clj-ml.classifiers-test
|
|
|
|
(:use [clj-ml classifiers data] :reload-all)
|
2012-01-05 22:12:14 +00:00
|
|
|
(:use clojure.test midje.sweet))
|
2010-02-28 12:14:17 +00:00
|
|
|
|
2013-07-17 03:29:45 +00:00
|
|
|
(deftest make-classifiers-options-ibk
|
|
|
|
(fact
|
|
|
|
(let [options (make-classifier-options
|
|
|
|
:lazy :ibk
|
|
|
|
{:inverse-weighted true :similarity-weighted true :no-normalization true :num-neighbors 3})]
|
|
|
|
options => (just ["-I" "-F" "-N" "-K" "3"] :in-any-order))))
|
|
|
|
|
|
|
|
(deftest make-classifier-ibk
|
|
|
|
(let [c (make-classifier :lazy :ibk)]
|
|
|
|
(is (= (class c)
|
|
|
|
weka.classifiers.lazy.IBk))))
|
|
|
|
|
|
|
|
(deftest train-classifier-ibk
|
|
|
|
(let [c (make-classifier :lazy :ibk)
|
2013-08-06 07:41:12 +00:00
|
|
|
ds (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])]
|
|
|
|
(dataset-set-class ds 2)
|
2013-07-17 03:29:45 +00:00
|
|
|
(classifier-train c ds)
|
|
|
|
(is true)))
|
2010-03-07 17:22:04 +00:00
|
|
|
|
2010-02-28 12:14:17 +00:00
|
|
|
(deftest make-classifiers-options-c45
|
2012-01-05 22:12:14 +00:00
|
|
|
(fact
|
2013-03-30 22:16:08 +00:00
|
|
|
(let [options (make-classifier-options
|
|
|
|
:decision-tree :c45
|
|
|
|
{:unpruned true :reduced-error-pruning true :only-binary-splits true :no-raising true
|
|
|
|
:no-cleanup true :laplace-smoothing true :pruning-confidence 0.12 :minimum-instances 10
|
|
|
|
:pruning-number-folds 5 :random-seed 1})]
|
|
|
|
options => (just ["-U" "-R" "-B" "-S" "-L" "-A" "-C" "0.12" "-M" "10" "-N" "5" "-Q" "1"] :in-any-order))))
|
2010-02-28 12:14:17 +00:00
|
|
|
|
|
|
|
(deftest make-classifier-c45
|
2010-12-14 22:16:39 +00:00
|
|
|
(let [c (make-classifier :decision-tree :c45)]
|
2010-02-28 12:14:17 +00:00
|
|
|
(is (= (class c)
|
|
|
|
weka.classifiers.trees.J48))))
|
|
|
|
|
|
|
|
(deftest train-classifier-c45
|
2010-12-14 22:16:39 +00:00
|
|
|
(let [c (make-classifier :decision-tree :c45)
|
2013-08-06 07:41:12 +00:00
|
|
|
ds (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])]
|
|
|
|
(dataset-set-class ds 2)
|
2010-02-28 12:14:17 +00:00
|
|
|
(classifier-train c ds)
|
|
|
|
(is true)))
|
|
|
|
|
2010-03-03 13:46:08 +00:00
|
|
|
(deftest make-classifier-bayes
|
2012-01-05 22:12:14 +00:00
|
|
|
(fact
|
2013-08-06 07:41:12 +00:00
|
|
|
(let [c (make-classifier :bayes :naive {:kernel-estimator true :old-format true})
|
2013-07-31 10:50:59 +00:00
|
|
|
opts (vec (.getOptions c))]
|
|
|
|
opts => (contains ["-K" "-O"]))))
|
2010-03-03 13:46:08 +00:00
|
|
|
|
|
|
|
(deftest make-classifier-bayes-updateable
|
2013-08-06 07:41:12 +00:00
|
|
|
(let [c (make-classifier :bayes :naive {:updateable true})]
|
2010-03-03 13:46:08 +00:00
|
|
|
(is (= (class c)
|
|
|
|
weka.classifiers.bayes.NaiveBayesUpdateable))))
|
|
|
|
|
|
|
|
(deftest train-classifier-bayes
|
2013-08-06 07:41:12 +00:00
|
|
|
(let [c (make-classifier :bayes :naive {:kernel-estimator true :old-format true})
|
|
|
|
ds (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])]
|
|
|
|
(dataset-set-class ds 2)
|
2010-03-03 13:46:08 +00:00
|
|
|
(classifier-train c ds)
|
|
|
|
(is true)))
|
|
|
|
|
2010-02-28 12:14:17 +00:00
|
|
|
(deftest classifier-evaluate-dataset
|
2010-12-14 22:16:39 +00:00
|
|
|
(let [c (make-classifier :decision-tree :c45)
|
2013-08-06 07:41:12 +00:00
|
|
|
ds (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
|
|
|
tds (make-dataset "test" [:a :b {:c [:m :n]}] [[4 1 :n] [4 5 :m]])
|
|
|
|
_ (dataset-set-class ds 2)
|
|
|
|
_ (dataset-set-class tds 2)
|
2010-11-01 17:14:07 +00:00
|
|
|
_ (classifier-train c ds)
|
2010-02-28 12:14:17 +00:00
|
|
|
res (classifier-evaluate c :dataset ds tds)]
|
2013-06-21 04:22:48 +00:00
|
|
|
(is (= 28 (count (keys res))))))
|
2010-02-28 12:14:17 +00:00
|
|
|
|
2010-03-07 17:22:04 +00:00
|
|
|
(deftest make-classifier-svm-smo-polykernel
|
|
|
|
(let [svm (make-classifier :support-vector-machine :smo {:kernel-function {:polynomic {:exponent 2.0}}})]
|
|
|
|
(is (= weka.classifiers.functions.supportVector.PolyKernel
|
|
|
|
(class (.getKernel svm))))))
|
|
|
|
|
2010-02-28 12:14:17 +00:00
|
|
|
(deftest classifier-evaluate-cross-validation
|
2010-12-14 22:16:39 +00:00
|
|
|
(let [c (make-classifier :decision-tree :c45)
|
2013-08-06 07:41:12 +00:00
|
|
|
ds (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
|
|
|
_ (dataset-set-class ds 2)
|
2010-11-01 17:14:07 +00:00
|
|
|
_ (classifier-train c ds)
|
2010-02-28 12:14:17 +00:00
|
|
|
res (classifier-evaluate c :cross-validation ds 2)]
|
2013-06-21 04:22:48 +00:00
|
|
|
(is (= 28 (count (keys res))))))
|
2010-03-03 13:46:08 +00:00
|
|
|
|
2013-06-21 05:00:55 +00:00
|
|
|
(deftest classifier-evaluate-cross-validation-grid
|
|
|
|
(let [c (make-classifier :support-vector-machine :libsvm-grid)
|
2013-08-06 07:41:12 +00:00
|
|
|
ds (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
|
|
|
_ (dataset-set-class ds 2)
|
2013-06-21 05:00:55 +00:00
|
|
|
res (classifier-evaluate c :cross-validation ds 2)]
|
|
|
|
(is (= 28 (count (keys res))))))
|
|
|
|
|
2013-08-06 07:41:12 +00:00
|
|
|
(deftest test-classifier-classify
|
|
|
|
(let [c (make-classifier :decision-tree :c45)
|
|
|
|
ds (-> (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
|
|
|
(dataset-set-class 2))
|
|
|
|
inst (-> (first (dataset-seq ds))
|
|
|
|
(instance-set-class-missing))]
|
|
|
|
(classifier-train c ds)
|
|
|
|
(is (= :m (classifier-classify c inst)))))
|
|
|
|
|
|
|
|
(deftest test-classifier-label
|
|
|
|
(let [c (make-classifier :decision-tree :c45)
|
|
|
|
ds (-> (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
|
|
|
(dataset-set-class 2))
|
|
|
|
inst (-> (first (dataset-seq ds))
|
|
|
|
(instance-set-class-missing))]
|
|
|
|
(is (= nil (instance-get-class inst)))
|
|
|
|
(classifier-train c ds)
|
|
|
|
(classifier-label c inst)
|
|
|
|
(is (= :m (instance-get-class inst)))))
|
|
|
|
|
2010-03-03 13:46:08 +00:00
|
|
|
(deftest update-updateable-classifier
|
2013-08-06 07:41:12 +00:00
|
|
|
(let [c (make-classifier :bayes :naive {:updateable true})
|
|
|
|
ds (make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
|
2010-11-01 17:14:07 +00:00
|
|
|
_ (dataset-set-class ds 2)
|
2010-03-03 13:46:08 +00:00
|
|
|
inst (make-instance ds {:a 56 :b 45 :c :m})]
|
|
|
|
(classifier-train c ds)
|
|
|
|
(classifier-update c ds)
|
|
|
|
(classifier-update c inst)
|
|
|
|
(is true)))
|