adds RemoveUseless filter wrapper and does some test refactoring
This commit is contained in:
parent
32617b4c24
commit
497d65dffc
3 changed files with 81 additions and 71 deletions
|
@ -96,6 +96,10 @@
|
|||
cols-val-a)]
|
||||
(into-array cols-val-b))))
|
||||
|
||||
(defmethod make-filter-options :remove-useless-attributes
|
||||
([kind map]
|
||||
(->> map (check-option-values {:max-variance "-M"}) into-array)))
|
||||
|
||||
(defmethod make-filter-options :select-append-attributes
|
||||
([kind map]
|
||||
(let [cols (get map :attributes)
|
||||
|
@ -135,6 +139,7 @@
|
|||
- :supervised-nominal-to-binary
|
||||
- :unsupervised-nominal-to-binary
|
||||
- :remove-attributes
|
||||
- :remove-useless-attributes
|
||||
- :select-append-attributes
|
||||
- :project-attributes
|
||||
|
||||
|
@ -236,6 +241,19 @@
|
|||
- :attributes
|
||||
Index of the attributes to remove. Sample value: [1 2 3]
|
||||
|
||||
* :remove-useless-attributes
|
||||
|
||||
Remove attributes that do not vary at all or that vary too much. All constant
|
||||
attributes are deleted automatically, along with any that exceed the maximum percentage
|
||||
of variance parameter. The maximum variance test is only applied to nominal attributes.
|
||||
|
||||
Parameters:
|
||||
|
||||
- :max-variance
|
||||
Maximum variance percentage allowed (default 99).
|
||||
Note: percentage, not decimal. e.g. 89 not 0.89
|
||||
If you pass in a decimal Weka silently sets it to 0.0.
|
||||
|
||||
* :select-append-attributes
|
||||
|
||||
Append a copy of the selected columns at the end of the dataset.
|
||||
|
@ -269,7 +287,6 @@
|
|||
([kind options]
|
||||
(make-filter-m kind options weka.filters.supervised.attribute.Discretize)))
|
||||
|
||||
|
||||
(defmethod make-filter :unsupervised-discretize
|
||||
([kind options]
|
||||
(make-filter-m kind options weka.filters.unsupervised.attribute.Discretize)))
|
||||
|
@ -286,6 +303,10 @@
|
|||
([kind options]
|
||||
(make-filter-m kind options weka.filters.unsupervised.attribute.Remove)))
|
||||
|
||||
(defmethod make-filter :remove-useless-attributes
|
||||
([kind options]
|
||||
(make-filter-m kind options weka.filters.unsupervised.attribute.RemoveUseless)))
|
||||
|
||||
(defmethod make-filter :select-append-attributes
|
||||
([kind options]
|
||||
(make-filter-m kind options weka.filters.unsupervised.attribute.Copy)))
|
||||
|
|
|
@ -73,30 +73,34 @@
|
|||
opts
|
||||
(conj (conj opts flag) (str val-in-map)))))
|
||||
|
||||
|
||||
(defn check-options [opts-map args-map tmp]
|
||||
; TODO: Raise a helpful exception when the keys don't match up with the provided flags.
|
||||
(defn check-options
|
||||
"Checks the presence of a set of options for a filter"
|
||||
(loop [rem (keys opts-map)
|
||||
acum tmp]
|
||||
(if (empty? rem)
|
||||
acum
|
||||
(let [k (first rem)
|
||||
vk (get opts-map k)
|
||||
rst (rest rem)]
|
||||
(recur rst
|
||||
(check-option acum k vk args-map))))))
|
||||
([opts-map args-map] (check-options opts-map args-map []))
|
||||
( [opts-map args-map tmp]
|
||||
(loop [rem (keys opts-map)
|
||||
acum tmp]
|
||||
(if (empty? rem)
|
||||
acum
|
||||
(let [k (first rem)
|
||||
vk (get opts-map k)
|
||||
rst (rest rem)]
|
||||
(recur rst
|
||||
(check-option acum k vk args-map)))))))
|
||||
|
||||
(defn check-option-values [opts-map args-map tmp]
|
||||
(defn check-option-values
|
||||
"Checks the presence of a set of options with value for a filter"
|
||||
(loop [rem (keys opts-map)
|
||||
acum tmp]
|
||||
(if (empty? rem)
|
||||
acum
|
||||
(let [k (first rem)
|
||||
vk (get opts-map k)
|
||||
rst (rest rem)]
|
||||
(recur rst
|
||||
(check-option-value acum k vk args-map))))))
|
||||
([opts-map args-map] (check-option-values opts-map args-map []))
|
||||
([opts-map args-map val]
|
||||
(loop [rem (keys opts-map)
|
||||
acum val]
|
||||
(if (empty? rem)
|
||||
acum
|
||||
(let [k (first rem)
|
||||
vk (get opts-map k)
|
||||
rst (rest rem)]
|
||||
(recur rst
|
||||
(check-option-value acum k vk args-map)))))))
|
||||
|
||||
;; Serializing classifiers
|
||||
|
||||
|
|
|
@ -4,72 +4,57 @@
|
|||
|
||||
(deftest make-filter-options-supervised-discretize
|
||||
(let [options (make-filter-options :supervised-discretize {:attributes [1 2] :invert true :binary true :better-encoding true :kononenko true :nonexitent true})]
|
||||
(is (= (aget options 0)
|
||||
"-R"))
|
||||
(is (= (aget options 1)
|
||||
"2,3"))
|
||||
(is (= (aget options 2)
|
||||
"-V"))
|
||||
(is (= (aget options 3)
|
||||
"-D"))
|
||||
(is (= (aget options 4)
|
||||
"-E"))
|
||||
(is (= (aget options 5)
|
||||
"-K"))))
|
||||
(are [index expected-flag] (is (= (aget options index) expected-flag))
|
||||
0 "-R"
|
||||
1 "2,3"
|
||||
2 "-V"
|
||||
3 "-D"
|
||||
4 "-E"
|
||||
5 "-K")))
|
||||
|
||||
(deftest make-filter-options-unsupervised-discretize
|
||||
(let [options (make-filter-options :unsupervised-discretize {:attributes [1 2] :binary true
|
||||
:better-encoding true :equal-frequency true :optimize true
|
||||
:number-bins 4 :weight-bins 1})]
|
||||
(is (= (aget options 0)
|
||||
"-R"))
|
||||
(is (= (aget options 1)
|
||||
"2,3"))
|
||||
(is (= (aget options 2)
|
||||
"-D"))
|
||||
(is (= (aget options 3)
|
||||
"-E"))
|
||||
(is (= (aget options 4)
|
||||
"-F"))
|
||||
(is (= (aget options 5)
|
||||
"-O"))
|
||||
(is (= (aget options 6)
|
||||
"-B"))
|
||||
(is (= (aget options 7)
|
||||
"4"))
|
||||
(is (= (aget options 8)
|
||||
"-M"))
|
||||
(is (= (aget options 9)
|
||||
"1"))))
|
||||
(are [index expected-flag] (is (= (aget options index) expected-flag))
|
||||
0 "-R"
|
||||
1 "2,3"
|
||||
2 "-D"
|
||||
3 "-E"
|
||||
4 "-F"
|
||||
5 "-O"
|
||||
6 "-B"
|
||||
7 "4"
|
||||
8 "-M"
|
||||
9 "1")))
|
||||
|
||||
(deftest make-filter-options-supervised-nominal-to-binary
|
||||
(let [options (make-filter-options :supervised-nominal-to-binary {:also-binary true :for-each-nominal true})]
|
||||
(is (= (aget options 0)
|
||||
""))
|
||||
(is (= (aget options 1)
|
||||
"-N"))
|
||||
(is (= (aget options 2)
|
||||
"-A"))))
|
||||
(are [index expected-flag] (is (= (aget options index) expected-flag))
|
||||
0 ""
|
||||
1 "-N"
|
||||
2 "-A")))
|
||||
|
||||
(deftest make-filter-options-unsupervised-nominal-to-binary
|
||||
(let [options (make-filter-options :unsupervised-nominal-to-binary {:attributes [1,2] :also-binary true :for-each-nominal true :invert true})]
|
||||
(is (= (aget options 0)
|
||||
"-R"))
|
||||
(is (= (aget options 1)
|
||||
"2,3"))
|
||||
(is (= (aget options 2)
|
||||
"-V"))
|
||||
(is (= (aget options 3)
|
||||
"-N"))
|
||||
(is (= (aget options 4)
|
||||
"-A"))))
|
||||
(are [index expected-flag] (is (= (aget options index) expected-flag))
|
||||
0 "-R"
|
||||
1 "2,3"
|
||||
2 "-V"
|
||||
3 "-N"
|
||||
4 "-A")))
|
||||
|
||||
(deftest make-filter-remove-useless-attributes
|
||||
(let [ds (clj-ml.data/make-dataset :foo [:a] [[1] [2]])
|
||||
filter (make-filter :remove-useless-attributes {:dataset-format ds :max-variance 95})]
|
||||
(is (= (.getMaximumVariancePercentageAllowed filter) 95))))
|
||||
|
||||
(deftest make-filter-discretize-sup
|
||||
(let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]])
|
||||
foo1(clj-ml.data/dataset-set-class ds 2)
|
||||
_ (clj-ml.data/dataset-set-class ds 2)
|
||||
f (make-filter :supervised-discretize {:dataset-format ds :attributes [0]})]
|
||||
(is (= weka.filters.supervised.attribute.Discretize
|
||||
(class f)))))
|
||||
|
|
Loading…
Reference in a new issue