Changed resample filter to resample-unsupervised, and added resample-supervised.
This commit is contained in:
parent
94878fa57d
commit
05cf721ec5
2 changed files with 73 additions and 11 deletions
|
@ -192,12 +192,19 @@
|
|||
|
||||
(deffilter remove-useless-attributes)
|
||||
|
||||
(defmethod make-filter-options :resample
|
||||
(defmethod make-filter-options :resample-unsupervised
|
||||
([kind m]
|
||||
(->> (check-option-values m {:seed "-S" :size-percent "-Z"})
|
||||
(check-options m {:no-replacement "-no-replacement" :invert "-V"}))))
|
||||
|
||||
(deffilter resample)
|
||||
(deffilter resample-unsupervised)
|
||||
|
||||
(defmethod make-filter-options :resample-supervised
|
||||
([kind m]
|
||||
(->> (check-option-values m {:seed "-S" :size-percent "-Z" :bias "-B"})
|
||||
(check-options m {:no-replacement "-no-replacement" :invert "-V"}))))
|
||||
|
||||
(deffilter resample-supervised)
|
||||
|
||||
(defmethod make-filter-options :select-append-attributes
|
||||
([kind m]
|
||||
|
@ -235,7 +242,8 @@
|
|||
:remove-percentage weka.filters.unsupervised.instance.RemovePercentage
|
||||
:remove-range weka.filters.unsupervised.instance.RemoveRange
|
||||
:remove-useless-attributes weka.filters.unsupervised.attribute.RemoveUseless
|
||||
:resample weka.filters.unsupervised.instance.Resample
|
||||
:resample-unsupervised weka.filters.unsupervised.instance.Resample
|
||||
:resample-supervised weka.filters.supervised.instance.Resample
|
||||
:select-append-attributes weka.filters.unsupervised.attribute.Copy
|
||||
:project-attributes weka.filters.unsupervised.attribute.Remove})
|
||||
|
||||
|
@ -258,7 +266,8 @@
|
|||
- :remove-percentage
|
||||
- :remove-range
|
||||
- :remove-useless-attributes
|
||||
- :resample
|
||||
- :resample-unsupervised
|
||||
- :resample-supervised
|
||||
- :select-append-attributes
|
||||
- :project-attributes
|
||||
- :clj-streamable
|
||||
|
@ -432,7 +441,7 @@
|
|||
Note: percentage, not decimal. e.g. 89 not 0.89
|
||||
If you pass in a decimal Weka silently sets it to 0.0.
|
||||
|
||||
* :resample
|
||||
* :resample-unsupervised
|
||||
|
||||
\"Produces a random subsample of a dataset using either sampling
|
||||
with replacement or without replacement. The original dataset
|
||||
|
@ -455,6 +464,38 @@
|
|||
- :invert
|
||||
Inverts the selection; can only be true if :replacement is false (boolean)
|
||||
|
||||
* :resample-supervised
|
||||
|
||||
\"Produces a random subsample of a dataset using either sampling
|
||||
with replacement or without replacement. The original dataset
|
||||
must fit entirely in memory. The number of instances in the
|
||||
generated dataset may be specified. The dataset must have a
|
||||
nominal class attribute. If not, use the unsupervised
|
||||
version. The filter can be made to maintain the class
|
||||
distribution in the subsample, or to bias the class distribution
|
||||
toward a uniform distribution. When used in batch mode (i.e. in
|
||||
the FilteredClassifier), subsequent batches are NOT resampled.\"
|
||||
-- from Weka JavaDoc.
|
||||
|
||||
Parameters:
|
||||
|
||||
- :seed
|
||||
Random number seed (integer)
|
||||
|
||||
- :size-percent
|
||||
\"The size of the output dataset, as a percentage of
|
||||
the input dataset (default 100)\" (integer)
|
||||
|
||||
- :bias \"Bias factor towards uniform class distribution.0 =
|
||||
distribution in input data -- 1 = uniform
|
||||
distribution. (default 0)\" (0 or 1)
|
||||
|
||||
- :no-replacement
|
||||
Use replacement or not; default is false, i.e., with replacement (boolean)
|
||||
|
||||
- :invert
|
||||
Inverts the selection; can only be true if :replacement is false (boolean)
|
||||
|
||||
* :select-append-attributes
|
||||
|
||||
Append a copy of the selected columns at the end of the dataset.
|
||||
|
|
|
@ -34,12 +34,20 @@
|
|||
filter (make-filter :remove-useless-attributes {:dataset-format ds :max-variance 95})]
|
||||
(is (== (.getMaximumVariancePercentageAllowed filter) 95))))
|
||||
|
||||
(deftest make-filter-resample
|
||||
(deftest make-filter-resample-unsupervised
|
||||
(fact
|
||||
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
|
||||
options (make-filter-options :resample {:dataset-format ds :seed 10 :size-percent 50 :no-replacement true :invert true})]
|
||||
options (make-filter-options :resample-unsupervised
|
||||
{:dataset-format ds :seed 10 :size-percent 50 :no-replacement true :invert true})]
|
||||
options => (just ["-S" "10" "-Z" "50" "-V" "-no-replacement"] :in-any-order))))
|
||||
|
||||
(deftest make-filter-resample-supervised
|
||||
(fact
|
||||
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
|
||||
options (make-filter-options :resample-supervised
|
||||
{:dataset-format ds :seed 10 :size-percent 50 :no-replacement true :invert true :bias 1})]
|
||||
options => (just ["-S" "10" "-Z" "50" "-V" "-no-replacement" "-B" "1"] :in-any-order))))
|
||||
|
||||
(deftest make-filter-discretize-sup
|
||||
(let [ds (make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
|
@ -92,12 +100,19 @@
|
|||
(is (= weka.filters.unsupervised.attribute.Reorder
|
||||
(class f)))))
|
||||
|
||||
(deftest make-filter-reorder-attributes
|
||||
(deftest make-filter-resample-unsupervised
|
||||
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
|
||||
f (make-filter :resample {:dataset-format ds :seed 10 :size-percent 50 :replacement true})]
|
||||
f (make-filter :resample-unsupervised {:dataset-format ds :seed 10 :size-percent 50 :replacement true})]
|
||||
(is (= weka.filters.unsupervised.instance.Resample
|
||||
(class f)))))
|
||||
|
||||
(deftest make-filter-resample-supervised
|
||||
(let [ds (dataset-set-class (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
|
||||
:class)
|
||||
f (make-filter :resample-supervised {:dataset-format ds :seed 10 :size-percent 50 :replacement true :bias 1})]
|
||||
(is (= weka.filters.supervised.instance.Resample
|
||||
(class f)))))
|
||||
|
||||
(deftest make-filter-remove-attributes
|
||||
(let [ds (make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
|
@ -178,9 +193,15 @@
|
|||
(is (= (str ds (str (make-dataset :test [{:s nil} :n {:class [:yes :no]}]
|
||||
[["Hello" 55 :yes] ["World" -100 :no]])))))))
|
||||
|
||||
(deftest make-apply-filter-resample
|
||||
(deftest make-apply-filter-resample-unsupervised
|
||||
(let [ds (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
|
||||
ds2 (make-apply-filter :resample {:seed 10 :size-percent 50 :replacement true} ds)]
|
||||
ds2 (make-apply-filter :resample-unsupervised {:seed 10 :size-percent 50 :replacement true} ds)]
|
||||
(is (= 75 (dataset-count ds2)))))
|
||||
|
||||
(deftest make-apply-filter-resample-supervised
|
||||
(let [ds (dataset-set-class (load-instances :arff "http://repository.seasr.org/Datasets/UCI/arff/iris.arff")
|
||||
:class)
|
||||
ds2 (make-apply-filter :resample-supervised {:seed 10 :size-percent 50 :replacement true :bias 1} ds)]
|
||||
(is (= 75 (dataset-count ds2)))))
|
||||
|
||||
(deftest make-apply-filters-test
|
||||
|
|
Loading…
Reference in a new issue