Changed resample filter to resample-unsupervised, and added resample-supervised.

This commit is contained in:
Joshua Eckroth 2013-03-22 12:35:24 -04:00
parent 94878fa57d
commit 05cf721ec5
2 changed files with 73 additions and 11 deletions

View file

@ -192,12 +192,19 @@
(deffilter remove-useless-attributes)
(defmethod make-filter-options :resample
(defmethod make-filter-options :resample-unsupervised
([kind m]
(->> (check-option-values m {:seed "-S" :size-percent "-Z"})
(check-options m {:no-replacement "-no-replacement" :invert "-V"}))))
(deffilter resample)
(deffilter resample-unsupervised)
(defmethod make-filter-options :resample-supervised
([kind m]
(->> (check-option-values m {:seed "-S" :size-percent "-Z" :bias "-B"})
(check-options m {:no-replacement "-no-replacement" :invert "-V"}))))
(deffilter resample-supervised)
(defmethod make-filter-options :select-append-attributes
([kind m]
@ -235,7 +242,8 @@
:remove-percentage weka.filters.unsupervised.instance.RemovePercentage
:remove-range weka.filters.unsupervised.instance.RemoveRange
:remove-useless-attributes weka.filters.unsupervised.attribute.RemoveUseless
:resample weka.filters.unsupervised.instance.Resample
:resample-unsupervised weka.filters.unsupervised.instance.Resample
:resample-supervised weka.filters.supervised.instance.Resample
:select-append-attributes weka.filters.unsupervised.attribute.Copy
:project-attributes weka.filters.unsupervised.attribute.Remove})
@ -258,7 +266,8 @@
- :remove-percentage
- :remove-range
- :remove-useless-attributes
- :resample
- :resample-unsupervised
- :resample-supervised
- :select-append-attributes
- :project-attributes
- :clj-streamable
@ -432,7 +441,7 @@
Note: percentage, not decimal. e.g. 89 not 0.89
If you pass in a decimal Weka silently sets it to 0.0.
* :resample
* :resample-unsupervised
\"Produces a random subsample of a dataset using either sampling
with replacement or without replacement. The original dataset
@ -455,6 +464,38 @@
- :invert
Inverts the selection; can only be true if :replacement is false (boolean)
* :resample-supervised
\"Produces a random subsample of a dataset using either sampling
with replacement or without replacement. The original dataset
must fit entirely in memory. The number of instances in the
generated dataset may be specified. The dataset must have a
nominal class attribute. If not, use the unsupervised
version. The filter can be made to maintain the class
distribution in the subsample, or to bias the class distribution
toward a uniform distribution. When used in batch mode (i.e. in
the FilteredClassifier), subsequent batches are NOT resampled.\"
-- from Weka JavaDoc.
Parameters:
- :seed
Random number seed (integer)
- :size-percent
\"The size of the output dataset, as a percentage of
the input dataset (default 100)\" (integer)
- :bias \"Bias factor towards uniform class distribution.0 =
distribution in input data -- 1 = uniform
distribution. (default 0)\" (0 or 1)
- :no-replacement
Use replacement or not; default is false, i.e., with replacement (boolean)
- :invert
Inverts the selection; can only be true if :replacement is false (boolean)
* :select-append-attributes
Append a copy of the selected columns at the end of the dataset.

View file

@ -34,12 +34,20 @@
filter (make-filter :remove-useless-attributes {:dataset-format ds :max-variance 95})]
(is (== (.getMaximumVariancePercentageAllowed filter) 95))))
(deftest make-filter-resample
(deftest make-filter-resample-unsupervised
(fact
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
options (make-filter-options :resample {:dataset-format ds :seed 10 :size-percent 50 :no-replacement true :invert true})]
options (make-filter-options :resample-unsupervised
{:dataset-format ds :seed 10 :size-percent 50 :no-replacement true :invert true})]
options => (just ["-S" "10" "-Z" "50" "-V" "-no-replacement"] :in-any-order))))
(deftest make-filter-resample-supervised
(fact
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
options (make-filter-options :resample-supervised
{:dataset-format ds :seed 10 :size-percent 50 :no-replacement true :invert true :bias 1})]
options => (just ["-S" "10" "-Z" "50" "-V" "-no-replacement" "-B" "1"] :in-any-order))))
(deftest make-filter-discretize-sup
(let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g]
@ -92,12 +100,19 @@
(is (= weka.filters.unsupervised.attribute.Reorder
(class f)))))
(deftest make-filter-reorder-attributes
(deftest make-filter-resample-unsupervised
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
f (make-filter :resample {:dataset-format ds :seed 10 :size-percent 50 :replacement true})]
f (make-filter :resample-unsupervised {:dataset-format ds :seed 10 :size-percent 50 :replacement true})]
(is (= weka.filters.unsupervised.instance.Resample
(class f)))))
(deftest make-filter-resample-supervised
(let [ds (dataset-set-class (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
:class)
f (make-filter :resample-supervised {:dataset-format ds :seed 10 :size-percent 50 :replacement true :bias 1})]
(is (= weka.filters.supervised.instance.Resample
(class f)))))
(deftest make-filter-remove-attributes
(let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g]
@ -178,9 +193,15 @@
(is (= (str ds (str (make-dataset :test [{:s nil} :n {:class [:yes :no]}]
[["Hello" 55 :yes] ["World" -100 :no]])))))))
(deftest make-apply-filter-resample
(deftest make-apply-filter-resample-unsupervised
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
ds2 (make-apply-filter :resample {:seed 10 :size-percent 50 :replacement true} ds)]
ds2 (make-apply-filter :resample-unsupervised {:seed 10 :size-percent 50 :replacement true} ds)]
(is (= 75 (dataset-count ds2)))))
(deftest make-apply-filter-resample-supervised
(let [ds (dataset-set-class (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
:class)
ds2 (make-apply-filter :resample-supervised {:seed 10 :size-percent 50 :replacement true :bias 1} ds)]
(is (= 75 (dataset-count ds2)))))
(deftest make-apply-filters-test