Merge branch 'master' of github.com:leadtune/clj-ml

This commit is contained in:
Ben Mabey 2010-12-14 16:45:49 -07:00
commit dc6abbcce4
2 changed files with 82 additions and 17 deletions

View file

@ -6,7 +6,7 @@
(ns #^{:author "Antonio Garrote <antoniogarrote@gmail.com>"}
clj-ml.classifiers
"This namespace contains several functions for building classifiers using different
classification algorithms: Bayes networks, multilayer perceptron, decission tree or
classification algorithms: Bayes networks, multilayer perceptron, decision tree or
support vector machines are available. Some of these classifiers have incremental
versions so they can be built without having all the dataset instances in memory.
@ -17,8 +17,8 @@
(use 'clj-ml.classifiers)
; Building a classifier using a C4.5 decission tree
(def *classifier* (make-classifier :decission-tree :c45))
; Building a classifier using a C4.5 decision tree
(def *classifier* (make-classifier :decision-tree :c45))
; We set the class attribute for the loaded dataset.
; *dataset* is supposed to contain a set of instances.
@ -63,9 +63,10 @@
(:use [clj-ml utils data kernel-functions])
(:import (java.util Date Random)
(weka.core Instance Instances)
(weka.classifiers.trees J48)
(weka.classifiers.trees J48 RandomForest M5P)
(weka.classifiers.meta LogitBoost)
(weka.classifiers.bayes NaiveBayes NaiveBayesUpdateable)
(weka.classifiers.functions MultilayerPerceptron SMO LinearRegression Logistic)
(weka.classifiers.functions MultilayerPerceptron SMO LinearRegression Logistic PaceRegression)
(weka.classifiers Classifier Evaluation)))
@ -76,7 +77,7 @@
"Creates the right parameters for a classifier. Returns the parameters as a Clojure vector."
(fn [kind algorithm map] [kind algorithm]))
(defmethod make-classifier-options [:decission-tree :c45]
(defmethod make-classifier-options [:decision-tree :c45]
([kind algorithm m]
(->> (check-options m
{:unpruned "-U"
@ -112,13 +113,15 @@
:epochs "-N"
:percentage-validation-set "-V"
:random-seed "-S"
:threshold-number-errors "-E"}))))
:threshold-number-errors "-E"
:hidden-layers-string "-H"}))))
(defmethod make-classifier-options [:support-vector-machine :smo]
([kind algorithm m]
(->> (check-options m {:fit-logistic-models "-M"})
(check-option-values m
{:complexity-constant "-C"
:normalize "-N"
:tolerance "-L"
:epsilon-roundoff "-P"
:folds-for-cross-validation "-V"
@ -140,6 +143,47 @@
{:max-iterations "-S"
:ridge "-R"}))))
(defmethod make-classifier-options [:regression :pace]
([kind algorithm m]
(->> (check-options m {:debug "-D"})
(check-option-values m
{:threshold "-S"
:estimator "-E"}))))
(defmethod make-classifier-options [:decision-tree :boosted-stump]
([kind algorithm m]
(->> (check-options m {:debug "-D"
:resampling "-Q"})
(check-option-values m
{:weak-learning-class "-W"
:num-iterations "-I"
:random-seed "-S"
:percentage-weight-mass "-P"
:folds-for-cross-validation "-F"
:runs-for-cross-validation "-R"
:log-likelihood-improvement-threshold "-L"
:shrinkage-parameter "-H"}))))
(defmethod make-classifier-options [:decision-tree :random-forest]
([kind algorithm m]
(->>
(check-options m {:debug "-D"})
(check-option-values m
{:num-trees-in-forest "-I"
:num-features-to-consider "-K"
:random-seed "-S"
:depth "-depth"}))))
(defmethod make-classifier-options [:decision-tree :m5p]
([kind algorithm m]
(->>
(check-options m {:unsmoothed-predictions "-U"
:regression "-R"
:unpruned "-N"})
(check-option-values m {:minimum-instances "-M"}))))
;; Building classifiers
@ -156,15 +200,20 @@
"Creates a new classifier for the given kind algorithm and options.
The first argument identifies the kind of classifier and the second
argument the algorithm to use, e.g. :decission-tree :c45.
argument the algorithm to use, e.g. :decision-tree :c45.
The classifiers currently supported are:
- :decission-tree :c45
- :decision-tree :c45
- :decision-tree :boosted-stump
- :decision-tree :M5P
- :decision-tree :random-forest
- :bayes :naive
- :neural-network :mutilayer-perceptron
- :support-vector-machine :smo
- :regression :linear
- :regression :logistic
- :regression :pace
Optionally, a map of options can also be passed as an argument with
a set of classifier specific options.
@ -172,9 +221,9 @@
This is the description of the supported classifiers and the accepted
option parameters for each of them:
* :decission-tree :c45
* :decision-tree :c45
A classifier building a pruned or unpruned C 4.5 decission tree using
A classifier building a pruned or unpruned C 4.5 decision tree using
Weka J 4.8 implementation.
Parameters:
@ -294,7 +343,7 @@
"
(fn [kind algorithm & options] [kind algorithm]))
(defmethod make-classifier [:decission-tree :c45]
(defmethod make-classifier [:decision-tree :c45]
([kind algorithm & options]
(make-classifier-with kind algorithm J48 options)))
@ -333,6 +382,22 @@
([kind algorithm & options]
(make-classifier-with kind algorithm Logistic options)))
(defmethod make-classifier [:regression :pace]
([kind algorithm & options]
(make-classifier-with kind algorithm PaceRegression options)))
(defmethod make-classifier [:decision-tree :boosted-stump]
([kind algorithm & options]
(make-classifier-with kind algorithm LogitBoost options)))
(defmethod make-classifier [:decision-tree :random-forest]
([kind algorithm & options]
(make-classifier-with kind algorithm RandomForest options)))
(defmethod make-classifier [:decision-tree :m5p]
([kind algorithm & options]
(make-classifier-with kind algorithm M5P options)))
;; Training classifiers
(defn classifier-train

View file

@ -4,19 +4,19 @@
(deftest make-classifiers-options-c45
(let [options (make-classifier-options :decission-tree :c45 {:unpruned true :reduced-error-pruning true :only-binary-splits true :no-raising true
(let [options (make-classifier-options :decision-tree :c45 {:unpruned true :reduced-error-pruning true :only-binary-splits true :no-raising true
:no-cleanup true :laplace-smoothing true :pruning-confidence 0.12 :minimum-instances 10
:pruning-number-folds 5 :random-seed 1})]
(is (= options ["-U" "-R" "-B" "-S" "-L" "-A" "-C" "0.12" "-M" "10" "-N" "5" "-Q" "1"]))))
(deftest make-classifier-c45
(let [c (make-classifier :decission-tree :c45)]
(let [c (make-classifier :decision-tree :c45)]
(is (= (class c)
weka.classifiers.trees.J48))))
(deftest train-classifier-c45
(let [c (make-classifier :decission-tree :c45)
(let [c (make-classifier :decision-tree :c45)
ds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])]
(clj-ml.data/dataset-set-class ds 2)
(classifier-train c ds)
@ -42,7 +42,7 @@
(deftest classifier-evaluate-dataset
(let [c (make-classifier :decission-tree :c45)
(let [c (make-classifier :decision-tree :c45)
ds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
tds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[4 1 :n] [4 5 :m]])
_ (clj-ml.data/dataset-set-class ds 2)
@ -58,7 +58,7 @@
(deftest classifier-evaluate-cross-validation
(let [c (make-classifier :decission-tree :c45)
(let [c (make-classifier :decision-tree :c45)
ds (clj-ml.data/make-dataset "test" [:a :b {:c [:m :n]}] [[1 2 :m] [4 5 :m]])
_ (clj-ml.data/dataset-set-class ds 2)
_ (classifier-train c ds)