Added :replace-missing-values filter and updated readme.
This commit is contained in:
parent
a18cbbae19
commit
ddace20320
2 changed files with 24 additions and 0 deletions
|
@ -27,6 +27,7 @@ A machine learning library for Clojure built on top of Weka and friends.
|
||||||
* String to word vector
|
* String to word vector
|
||||||
* Attribute manipulation (reorder, add, remove range, remove percentage, etc.)
|
* Attribute manipulation (reorder, add, remove range, remove percentage, etc.)
|
||||||
* Resample (supervised, unsupervised)
|
* Resample (supervised, unsupervised)
|
||||||
|
* Replace missing values with mean (numeric attributes) or mode (nominal attributes)
|
||||||
|
|
||||||
* Classifiers
|
* Classifiers
|
||||||
* k-Nearest neighbor
|
* k-Nearest neighbor
|
||||||
|
@ -550,6 +551,8 @@ user> titanicds
|
||||||
...
|
...
|
||||||
>
|
>
|
||||||
|
|
||||||
|
user> (def titanicds (make-apply-filter :replace-missing-values {} titanicds))
|
||||||
|
|
||||||
user> (def titanicds (make-apply-filter :remove-attributes
|
user> (def titanicds (make-apply-filter :remove-attributes
|
||||||
{:attributes [:PassengerId :Name :Ticket :Cabin]}
|
{:attributes [:PassengerId :Name :Ticket :Cabin]}
|
||||||
titanicds))
|
titanicds))
|
||||||
|
@ -621,6 +624,7 @@ user> titanic-test-passids
|
||||||
user> (def titanic-testds (->> titanic-testds
|
user> (def titanic-testds (->> titanic-testds
|
||||||
(make-apply-filter :remove-attributes
|
(make-apply-filter :remove-attributes
|
||||||
{:attributes [:PassengerId :Name :Ticket :Cabin]})
|
{:attributes [:PassengerId :Name :Ticket :Cabin]})
|
||||||
|
(make-apply-filter :replace-missing-values {})
|
||||||
(make-apply-filter :add-attribute
|
(make-apply-filter :add-attribute
|
||||||
{:type :nominal :name :Survived
|
{:type :nominal :name :Survived
|
||||||
:column 0 :labels ["0" "1"]})))
|
:column 0 :labels ["0" "1"]})))
|
||||||
|
|
|
@ -206,6 +206,12 @@
|
||||||
|
|
||||||
(deffilter resample-supervised)
|
(deffilter resample-supervised)
|
||||||
|
|
||||||
|
(defmethod make-filter-options :replace-missing-values
|
||||||
|
([kind m]
|
||||||
|
(check-options m {:unset-class-temporarily "-unset-class-temporarily"})))
|
||||||
|
|
||||||
|
(deffilter replace-missing-values)
|
||||||
|
|
||||||
(defmethod make-filter-options :select-append-attributes
|
(defmethod make-filter-options :select-append-attributes
|
||||||
([kind m]
|
([kind m]
|
||||||
(->> (extract-attributes m)
|
(->> (extract-attributes m)
|
||||||
|
@ -245,6 +251,7 @@
|
||||||
:resample-unsupervised weka.filters.unsupervised.instance.Resample
|
:resample-unsupervised weka.filters.unsupervised.instance.Resample
|
||||||
:resample-supervised weka.filters.supervised.instance.Resample
|
:resample-supervised weka.filters.supervised.instance.Resample
|
||||||
:select-append-attributes weka.filters.unsupervised.attribute.Copy
|
:select-append-attributes weka.filters.unsupervised.attribute.Copy
|
||||||
|
:replace-missing-values weka.filters.unsupervised.attribute.ReplaceMissingValues
|
||||||
:project-attributes weka.filters.unsupervised.attribute.Remove})
|
:project-attributes weka.filters.unsupervised.attribute.Remove})
|
||||||
|
|
||||||
|
|
||||||
|
@ -269,6 +276,7 @@
|
||||||
- :resample-unsupervised
|
- :resample-unsupervised
|
||||||
- :resample-supervised
|
- :resample-supervised
|
||||||
- :select-append-attributes
|
- :select-append-attributes
|
||||||
|
- :replace-missing-values
|
||||||
- :project-attributes
|
- :project-attributes
|
||||||
- :clj-streamable
|
- :clj-streamable
|
||||||
- :clj-batch
|
- :clj-batch
|
||||||
|
@ -377,6 +385,7 @@
|
||||||
Parameters:
|
Parameters:
|
||||||
|
|
||||||
- :attributes
|
- :attributes
|
||||||
|
|
||||||
Index of the attributes to be transformed. Sample value: [0 1 2]
|
Index of the attributes to be transformed. Sample value: [0 1 2]
|
||||||
The attributes may also be specified by names as well: [:some-name, \"another-name\"]
|
The attributes may also be specified by names as well: [:some-name, \"another-name\"]
|
||||||
- :invert
|
- :invert
|
||||||
|
@ -508,6 +517,17 @@
|
||||||
- :invert
|
- :invert
|
||||||
Invert the selection of the columns. Sample value: true
|
Invert the selection of the columns. Sample value: true
|
||||||
|
|
||||||
|
* :replace-missing-values
|
||||||
|
|
||||||
|
Replaces all missing values for nominal and numeric attributes
|
||||||
|
in a dataset with the modes and means from the training data.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
|
||||||
|
- :unset-class-temporarily
|
||||||
|
Unsets the class index temporarily before the filter is
|
||||||
|
applied to the data. Sample value: true; default: false
|
||||||
|
|
||||||
* :project-attributes
|
* :project-attributes
|
||||||
|
|
||||||
Project some columns from the provided dataset
|
Project some columns from the provided dataset
|
||||||
|
|
Loading…
Reference in a new issue