Added k-nearest neighbor classifier (:lazy :ibk)

This commit is contained in:
Joshua Eckroth 2013-07-16 23:29:45 -04:00
parent 26a9d69c05
commit 3ead98c527
2 changed files with 52 additions and 0 deletions

View file

@ -64,6 +64,7 @@
(:import (java.util Date Random)
(hr.irb.fastRandomForest FastRandomForest)
(weka.core Instance Instances)
(weka.classifiers.lazy IBk)
(weka.classifiers.trees J48 RandomForest M5P)
(weka.classifiers.meta LogitBoost AdditiveRegression RotationForest)
(weka.classifiers.bayes NaiveBayes NaiveBayesUpdateable)
@ -78,6 +79,15 @@
"Creates the right parameters for a classifier. Returns the parameters as a Clojure vector."
(fn [kind algorithm map] [kind algorithm]))
(defmethod make-classifier-options [:lazy :ibk]
([kind algorithm m]
(->> (check-options m
{:inverse-weighted "-I"
:similarity-weighted "-F"
:no-normalization "-N"})
(check-option-values m
{:num-neighbors "-K"}))))
(defmethod make-classifier-options [:decision-tree :c45]
([kind algorithm m]
(->> (check-options m
@ -285,6 +295,7 @@
The classifiers currently supported are:
- :lazy :ibk
- :decision-tree :c45
- :decision-tree :boosted-stump
- :decision-tree :boosted-decision-tree
@ -304,6 +315,25 @@
This is the description of the supported classifiers and the accepted
option parameters for each of them:
* :lazy :ibk
K-nearest neighbor classification.
Parameters:
- :inverse-weighted
Neighbors will be weighted by the inverse of their distance when voting. (default equal weighting)
Sample value: true
- :similarity-weighted
Neighbors will be weighted by their similarity when voting. (default equal weighting)
Sample value: true
- :no-normalization
Turns off normalization.
Sample value: true
- :num-neighbors
Set the number of nearest neighbors to use in prediction (default 1)
Sample value: 3
* :decision-tree :c45
A classifier building a pruned or unpruned C 4.5 decision tree using
@ -430,6 +460,10 @@
"
(fn [kind algorithm & options] [kind algorithm]))
(defmethod make-classifier [:lazy :ibk]
([kind algorithm & options]
(make-classifier-with kind algorithm IBk options)))
(defmethod make-classifier [:decision-tree :c45]
([kind algorithm & options]
(make-classifier-with kind algorithm J48 options)))

View file

@ -2,6 +2,24 @@
(:use [clj-ml classifiers data] :reload-all)
(:use clojure.test midje.sweet))
(deftest make-classifiers-options-ibk
(fact
(let [options (make-classifier-options
:lazy :ibk
{:inverse-weighted true :similarity-weighted true :no-normalization true :num-neighbors 3})]
options => (just ["-I" "-F" "-N" "-K" "3"] :in-any-order))))
(deftest make-classifier-ibk
(let [c (make-classifier :lazy :ibk)]
(is (= (class c)
weka.classifiers.lazy.IBk))))
(deftest train-classifier-ibk
(let [c (make-classifier :lazy :ibk)
ds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])]
(clj-ml.data/dataset-set-class ds 2)
(classifier-train c ds)
(is true)))
(deftest make-classifiers-options-c45
(fact