improves filters API by making filter fns that create and apply filters
This commit is contained in:
parent
3a393ebcf9
commit
b6deb0421e
3 changed files with 77 additions and 16 deletions
|
@ -96,11 +96,16 @@ API documenation can be found [here](http://antoniogarrote.github.com/clj-ml/ind
|
|||
REPL>(def ds (load-instances :arff "file:///Applications/weka-3-6-2/data/iris.arff"))
|
||||
|
||||
REPL>; Discretizing a numeric attribute using an unsupervised filter
|
||||
REPL>(def discretize (make-filter :unsupervised-discretize {:dataset-format ds :attributes [0 2]}))
|
||||
REPL>(def discretize (make-filter :unsupervised-discretize {:dataset-format ds :attributes [:sepallength :petallength]}))
|
||||
|
||||
|
||||
REPL>(def filtered-ds (filter-apply discretize ds))
|
||||
|
||||
REPL>; You can also use the filter's fn directly which will create and apply the filter:
|
||||
REPL>(def filtered-ds (unsupervised-discretize ds {:attributes [:sepallength :petallength]}))
|
||||
REPL>; The above way lends itself to the -> macro and is useful when using multiple filters.
|
||||
|
||||
|
||||
REPL>; The eqivalent operation can be done with the ->> macro and make-apply-filter fn:
|
||||
REPL>(def filtered-ds (->> "file:///Applications/weka-3-6-2/data/iris.arff")
|
||||
(load-instances :arff)
|
||||
|
|
|
@ -9,21 +9,33 @@
|
|||
dataset in some way: transforming nominal attributes into binary attributes, removing
|
||||
attributes etc.
|
||||
|
||||
A sample use of the API is shown below:
|
||||
There are a number of ways to use the filtering API. The most straight forward and
|
||||
idomatic clojure way is to use the provided filter fns:
|
||||
|
||||
;; *ds* is the dataset where the first attribute is to be removed
|
||||
(def *filter* (make-filter :remove-attributes {:dataset-format *ds* :attributes [:name-of-attr]}))
|
||||
;; ds is the dataset
|
||||
(def ds (make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]]))
|
||||
(def filtered-ds
|
||||
(-> ds
|
||||
(add-attribute {:type :nominal, :column 1, :name \"pet\", :labels [\"dog\" \"cat\"]})
|
||||
(remove-attributes {:attributes [:a :c]})))
|
||||
|
||||
|
||||
The above functions rely on lower level fns that create and apply the filters which you may
|
||||
also use if you need more control over the actual filter objects:
|
||||
|
||||
(def filter (make-filter :remove-attributes {:dataset-format ds :attributes [:a :c]}))
|
||||
|
||||
|
||||
;; We apply the filter to the original data set and obtain the new one
|
||||
(def *filtered-ds* (filter-apply *filter* *ds*))
|
||||
(def filtered-ds (filter-apply filter ds))
|
||||
|
||||
|
||||
The previous sample of code could be rewritten with the make-apply-filter function:
|
||||
|
||||
;; There is no necessity of passing the :dataset-format option, *ds* format is used
|
||||
;; automatically
|
||||
(def *filtered-ds* (make-apply-filter :remove-attributes {:attributes [0]} *ds*))"
|
||||
(def filtered-ds (make-apply-filter :remove-attributes {:attributes [:a :c]} ds))"
|
||||
(:use [clj-ml data utils]
|
||||
[clojure.contrib [def :only [defvar defvar-]]])
|
||||
(:require [clojure.contrib [string :as str]])
|
||||
|
@ -45,6 +57,20 @@
|
|||
(for [attr (:attributes m)]
|
||||
(inc (index-attr (:dataset-format m) attr))))])
|
||||
|
||||
(declare make-apply-filter)
|
||||
;TODO: consider passing in the make-filter-options body here as well in additon to the docstring.
|
||||
(defmacro deffilter
|
||||
"Defines the filter's fn that creates a fn to make and apply the filter."
|
||||
[filter-name]
|
||||
(let [filter-keyword (keyword filter-name)]
|
||||
`(do
|
||||
(defn ~filter-name
|
||||
([ds#]
|
||||
(make-apply-filter ~filter-keyword {} ds#))
|
||||
([ds# attributes#]
|
||||
(make-apply-filter ~filter-keyword attributes# ds#))))))
|
||||
|
||||
|
||||
(defmethod make-filter-options :supervised-discretize
|
||||
([kind m]
|
||||
(->> (extract-attributes m)
|
||||
|
@ -53,6 +79,8 @@
|
|||
:better-encoding "-E"
|
||||
:kononenko "-K"}))))
|
||||
|
||||
(deffilter supervised-discretize)
|
||||
|
||||
(defmethod make-filter-options :unsupervised-discretize
|
||||
([kind m]
|
||||
(->> (extract-attributes m)
|
||||
|
@ -64,10 +92,15 @@
|
|||
(check-option-values m {:number-bins "-B"
|
||||
:weight-bins "-M"}))))
|
||||
|
||||
(deffilter unsupervised-discretize)
|
||||
|
||||
(defmethod make-filter-options :supervised-nominal-to-binary
|
||||
([kind m]
|
||||
(check-options m {:also-binary "-N" :for-each-nominal "-A"})))
|
||||
|
||||
|
||||
(deffilter unsupervised-discretize)
|
||||
|
||||
(defmethod make-filter-options :unsupervised-nominal-to-binary
|
||||
([kind m]
|
||||
(->> (extract-attributes m)
|
||||
|
@ -75,14 +108,14 @@
|
|||
:also-binary "-N"
|
||||
:for-each-nominal "-A"}))))
|
||||
|
||||
(deffilter unsupervised-nominal-to-binary)
|
||||
|
||||
(defmethod make-filter-options :numeric-to-nominal
|
||||
([kind m]
|
||||
(->> (extract-attributes m) (check-options m {:invert "-V"}))))
|
||||
|
||||
(defmethod make-filter-options :add-attribute
|
||||
([kind m]
|
||||
(->> (extract-attributes m)
|
||||
(check-options m {:invert "-V"}))))
|
||||
(deffilter numeric-to-nominal)
|
||||
|
||||
|
||||
(defvar- attribute-types {:numeric "NUM" :nominal "NOM" :string "STR" :date "DAT"}
|
||||
"Mapping of Weka's attribute types from clj-ml keywords to the -T flag's representation.")
|
||||
|
@ -99,20 +132,28 @@
|
|||
:column "-C"
|
||||
:date-format "-F"}))))
|
||||
|
||||
(deffilter add-attribute)
|
||||
|
||||
(defmethod make-filter-options :remove-attributes
|
||||
([kind m]
|
||||
(->> (extract-attributes m)
|
||||
(check-options m {:invert "-V"}))))
|
||||
|
||||
(deffilter remove-attributes)
|
||||
|
||||
(defmethod make-filter-options :remove-useless-attributes
|
||||
([kind m]
|
||||
(check-option-values m {:max-variance "-M"})))
|
||||
|
||||
(deffilter remove-useless-attributes)
|
||||
|
||||
(defmethod make-filter-options :select-append-attributes
|
||||
([kind m]
|
||||
(->> (extract-attributes m)
|
||||
(check-options m {:invert "-V"}))))
|
||||
|
||||
(deffilter select-append-attributes)
|
||||
|
||||
(defmethod make-filter-options :project-attributes
|
||||
([kind options]
|
||||
(let [opts (if (nil? (:invert options))
|
||||
|
@ -120,6 +161,10 @@
|
|||
(dissoc options :invert))]
|
||||
(make-filter-options :remove-attributes opts))))
|
||||
|
||||
(deffilter project-attributes)
|
||||
|
||||
(deffilter clj-streamable)
|
||||
(deffilter clj-batch)
|
||||
|
||||
;; Creation of filters
|
||||
|
||||
|
|
|
@ -130,7 +130,7 @@
|
|||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]])
|
||||
res (make-apply-filter :add-attribute {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]} ds)]
|
||||
res (add-attribute ds {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]})]
|
||||
(is (= (dataset-format res)
|
||||
[:a {:pet '(:cat :dog)} :b {:c '(:m :g)}]))))
|
||||
|
||||
|
@ -145,6 +145,17 @@
|
|||
(is (= (dataset-format res)
|
||||
[{:pet '(:cat :dog)} :b]))))
|
||||
|
||||
(deftest using-regular-filter-fns-with-threading
|
||||
(let [ds (make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
[2 3 :m]
|
||||
[4 5 :g]])
|
||||
res (-> ds
|
||||
(add-attribute {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]})
|
||||
(remove-attributes {:attributes [:a :c]}))]
|
||||
(is (= (dataset-format res)
|
||||
[{:pet '(:cat :dog)} :b]))))
|
||||
|
||||
(deftest make-apply-filter-clj-streamable
|
||||
(let [ds (make-dataset :test [:a :b {:c [:g :m]}]
|
||||
[ [1 2 :g]
|
||||
|
@ -189,9 +200,9 @@
|
|||
(#(weka.core.Instance. 1 (into-array Double/TYPE %)))
|
||||
add-instance))
|
||||
result))
|
||||
res (make-apply-filter :clj-batch
|
||||
{:process add-max-diff-values
|
||||
:determine-dataset-format add-max-diff-attr} ds)]
|
||||
res (clj-batch ds
|
||||
{:process add-max-diff-values
|
||||
:determine-dataset-format add-max-diff-attr})]
|
||||
(is (= [{:a 1 :max-diff 3}
|
||||
{:a 2 :max-diff 2}
|
||||
{:a 4 :max-diff 0}]
|
||||
|
|
Loading…
Reference in a new issue