adds RemoveUseless filter wrapper and does some test refactoring

This commit is contained in:
Ben Mabey 2010-10-29 15:55:29 -06:00
parent 32617b4c24
commit 497d65dffc
3 changed files with 81 additions and 71 deletions

View file

@ -96,6 +96,10 @@
cols-val-a)]
(into-array cols-val-b))))
(defmethod make-filter-options :remove-useless-attributes
([kind map]
(->> map (check-option-values {:max-variance "-M"}) into-array)))
(defmethod make-filter-options :select-append-attributes
([kind map]
(let [cols (get map :attributes)
@ -135,6 +139,7 @@
- :supervised-nominal-to-binary
- :unsupervised-nominal-to-binary
- :remove-attributes
- :remove-useless-attributes
- :select-append-attributes
- :project-attributes
@ -236,6 +241,19 @@
- :attributes
Index of the attributes to remove. Sample value: [1 2 3]
* :remove-useless-attributes
Remove attributes that do not vary at all or that vary too much. All constant
attributes are deleted automatically, along with any that exceed the maximum percentage
of variance parameter. The maximum variance test is only applied to nominal attributes.
Parameters:
- :max-variance
Maximum variance percentage allowed (default 99).
Note: percentage, not decimal. e.g. 89 not 0.89
If you pass in a decimal Weka silently sets it to 0.0.
* :select-append-attributes
Append a copy of the selected columns at the end of the dataset.
@ -269,7 +287,6 @@
([kind options]
(make-filter-m kind options weka.filters.supervised.attribute.Discretize)))
(defmethod make-filter :unsupervised-discretize
([kind options]
(make-filter-m kind options weka.filters.unsupervised.attribute.Discretize)))
@ -286,6 +303,10 @@
([kind options]
(make-filter-m kind options weka.filters.unsupervised.attribute.Remove)))
(defmethod make-filter :remove-useless-attributes
([kind options]
(make-filter-m kind options weka.filters.unsupervised.attribute.RemoveUseless)))
(defmethod make-filter :select-append-attributes
([kind options]
(make-filter-m kind options weka.filters.unsupervised.attribute.Copy)))

View file

@ -73,30 +73,34 @@
opts
(conj (conj opts flag) (str val-in-map)))))
(defn check-options [opts-map args-map tmp]
; TODO: Raise a helpful exception when the keys don't match up with the provided flags.
(defn check-options
"Checks the presence of a set of options for a filter"
(loop [rem (keys opts-map)
acum tmp]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option acum k vk args-map))))))
([opts-map args-map] (check-options opts-map args-map []))
( [opts-map args-map tmp]
(loop [rem (keys opts-map)
acum tmp]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option acum k vk args-map)))))))
(defn check-option-values [opts-map args-map tmp]
(defn check-option-values
"Checks the presence of a set of options with value for a filter"
(loop [rem (keys opts-map)
acum tmp]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option-value acum k vk args-map))))))
([opts-map args-map] (check-option-values opts-map args-map []))
([opts-map args-map val]
(loop [rem (keys opts-map)
acum val]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option-value acum k vk args-map)))))))
;; Serializing classifiers

View file

@ -4,72 +4,57 @@
(deftest make-filter-options-supervised-discretize
(let [options (make-filter-options :supervised-discretize {:attributes [1 2] :invert true :binary true :better-encoding true :kononenko true :nonexitent true})]
(is (= (aget options 0)
"-R"))
(is (= (aget options 1)
"2,3"))
(is (= (aget options 2)
"-V"))
(is (= (aget options 3)
"-D"))
(is (= (aget options 4)
"-E"))
(is (= (aget options 5)
"-K"))))
(are [index expected-flag] (is (= (aget options index) expected-flag))
0 "-R"
1 "2,3"
2 "-V"
3 "-D"
4 "-E"
5 "-K")))
(deftest make-filter-options-unsupervised-discretize
(let [options (make-filter-options :unsupervised-discretize {:attributes [1 2] :binary true
:better-encoding true :equal-frequency true :optimize true
:number-bins 4 :weight-bins 1})]
(is (= (aget options 0)
"-R"))
(is (= (aget options 1)
"2,3"))
(is (= (aget options 2)
"-D"))
(is (= (aget options 3)
"-E"))
(is (= (aget options 4)
"-F"))
(is (= (aget options 5)
"-O"))
(is (= (aget options 6)
"-B"))
(is (= (aget options 7)
"4"))
(is (= (aget options 8)
"-M"))
(is (= (aget options 9)
"1"))))
(are [index expected-flag] (is (= (aget options index) expected-flag))
0 "-R"
1 "2,3"
2 "-D"
3 "-E"
4 "-F"
5 "-O"
6 "-B"
7 "4"
8 "-M"
9 "1")))
(deftest make-filter-options-supervised-nominal-to-binary
(let [options (make-filter-options :supervised-nominal-to-binary {:also-binary true :for-each-nominal true})]
(is (= (aget options 0)
""))
(is (= (aget options 1)
"-N"))
(is (= (aget options 2)
"-A"))))
(are [index expected-flag] (is (= (aget options index) expected-flag))
0 ""
1 "-N"
2 "-A")))
(deftest make-filter-options-unsupervised-nominal-to-binary
(let [options (make-filter-options :unsupervised-nominal-to-binary {:attributes [1,2] :also-binary true :for-each-nominal true :invert true})]
(is (= (aget options 0)
"-R"))
(is (= (aget options 1)
"2,3"))
(is (= (aget options 2)
"-V"))
(is (= (aget options 3)
"-N"))
(is (= (aget options 4)
"-A"))))
(are [index expected-flag] (is (= (aget options index) expected-flag))
0 "-R"
1 "2,3"
2 "-V"
3 "-N"
4 "-A")))
(deftest make-filter-remove-useless-attributes
(let [ds (clj-ml.data/make-dataset :foo [:a] [[1] [2]])
filter (make-filter :remove-useless-attributes {:dataset-format ds :max-variance 95})]
(is (= (.getMaximumVariancePercentageAllowed filter) 95))))
(deftest make-filter-discretize-sup
(let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g]
[2 3 :m]
[4 5 :g]])
foo1(clj-ml.data/dataset-set-class ds 2)
_ (clj-ml.data/dataset-set-class ds 2)
f (make-filter :supervised-discretize {:dataset-format ds :attributes [0]})]
(is (= weka.filters.supervised.attribute.Discretize
(class f)))))