Fixed indentation.

This commit is contained in:
Joshua Eckroth 2013-07-31 06:50:59 -04:00
parent 2945f082bb
commit db70ee980f
16 changed files with 469 additions and 469 deletions

View file

@ -16,40 +16,40 @@
"Creates the right parameters for a weka object. Returns a clojure vector." "Creates the right parameters for a weka object. Returns a clojure vector."
(fn [kind map] kind)) (fn [kind map] kind))
;TODO: consider passing in the make-filter-options body here as well in additon to the docstring. ;TODO: consider passing in the make-filter-options body here as well in additon to the docstring.
#_(defmacro defsearch #_(defmacro defsearch
"Defines the filter's fn that creates a fn to make and apply the filter." "Defines the filter's fn that creates a fn to make and apply the filter."
[filter-name] [filter-name]
(let [search-keyword (keyword filter-name)] (let [search-keyword (keyword filter-name)]
`(do `(do
(defn ~search-name (defn ~search-name
([ds#] ([ds#]
(make-apply-filter ~filter-keyword {} ds#)) (make-apply-filter ~filter-keyword {} ds#))
([ds# attributes#] ([ds# attributes#]
(make-apply-filter ~filter-keyword attributes# ds#)))))) (make-apply-filter ~filter-keyword attributes# ds#))))))
(defmethod make-obj-options :greedy (defmethod make-obj-options :greedy
;; -C ;; -C
;; Use conservative forward search ;; Use conservative forward search
;; ;;
;; -B ;; -B
;; Use a backward search instead of a ;; Use a backward search instead of a
;; forward one. ;; forward one.
;; ;;
;; -P <start set> ;; -P <start set>
;; Specify a starting set of attributes. ;; Specify a starting set of attributes.
;; Eg. 1,3,5-7. ;; Eg. 1,3,5-7.
;; ;;
;; -R ;; -R
;; Produce a ranked list of attributes. ;; Produce a ranked list of attributes.
;; ;;
;; -T <threshold> ;; -T <threshold>
;; Specify a theshold by which attributes ;; Specify a theshold by which attributes
;; may be discarded from the ranking. ;; may be discarded from the ranking.
;; Use in conjuction with -R ;; Use in conjuction with -R
;; ;;
;; -N <num to select> ;; -N <num to select>
;; Specify number of attributes to select ;; Specify number of attributes to select
([kind m] ([kind m]
(let [weka-opts (->> (extract-attributes "-P" :starting-attributes) (let [weka-opts (->> (extract-attributes "-P" :starting-attributes)
@ -58,11 +58,11 @@
(check-option-values m (check-option-values m
{:threshold "-T" {:threshold "-T"
:num-attributes "-N"}))] :num-attributes "-N"}))]
(case (m :direction) (case (m :direction)
:forward weka-opts :forward weka-opts
:conservative-forward (conj weka-opts "-C") :conservative-forward (conj weka-opts "-C")
:backward (conj weka-opts "-B") :backward (conj weka-opts "-B")
weka-opts)))) weka-opts))))
;; Sketch of what would be nice to have... ;; Sketch of what would be nice to have...
@ -76,45 +76,45 @@
;; -C :direction ...) ;; -C :direction ...)
(defmethod make-obj-options :linear-forward (defmethod make-obj-options :linear-forward
;; LinearForwardSelection: ;; LinearForwardSelection:
;; ;;
;; Extension of BestFirst. Takes a restricted number of k attributes into account. Fixed-set selects a fixed number k of attributes, whereas k is increased in each step when fixed-width is selected. The search uses either the initial ordering to select the top k attributes, or performs a ranking (with the same evalutator the search uses later on). The search direction can be forward, or floating forward selection (with opitional backward search steps). ;; Extension of BestFirst. Takes a restricted number of k attributes into account. Fixed-set selects a fixed number k of attributes, whereas k is increased in each step when fixed-width is selected. The search uses either the initial ordering to select the top k attributes, or performs a ranking (with the same evalutator the search uses later on). The search direction can be forward, or floating forward selection (with opitional backward search steps).
;; ;;
;; For more information see: ;; For more information see:
;; ;;
;; Martin Guetlein (2006). Large Scale Attribute Selection Using Wrappers. Freiburg, Germany. ;; Martin Guetlein (2006). Large Scale Attribute Selection Using Wrappers. Freiburg, Germany.
;; ;;
;; Valid options are: ;; Valid options are:
;; ;;
;; -P <start set> ;; -P <start set>
;; Specify a starting set of attributes. ;; Specify a starting set of attributes.
;; Eg. 1,3,5-7. ;; Eg. 1,3,5-7.
;; ;;
;; -D <0 = forward selection | 1 = floating forward selection> ;; -D <0 = forward selection | 1 = floating forward selection>
;; Forward selection method. (default = 0). ;; Forward selection method. (default = 0).
;; ;;
;; -N <num> ;; -N <num>
;; Number of non-improving nodes to ;; Number of non-improving nodes to
;; consider before terminating search. ;; consider before terminating search.
;; ;;
;; -I ;; -I
;; Perform initial ranking to select the ;; Perform initial ranking to select the
;; top-ranked attributes. ;; top-ranked attributes.
;; ;;
;; -K <num> ;; -K <num>
;; Number of top-ranked attributes that are ;; Number of top-ranked attributes that are
;; taken into account by the search. ;; taken into account by the search.
;; ;;
;; -T <0 = fixed-set | 1 = fixed-width> ;; -T <0 = fixed-set | 1 = fixed-width>
;; Type of Linear Forward Selection (default = 0). ;; Type of Linear Forward Selection (default = 0).
;; ;;
;; -S <num> ;; -S <num>
;; Size of lookup cache for evaluated subsets. ;; Size of lookup cache for evaluated subsets.
;; Expressed as a multiple of the number of ;; Expressed as a multiple of the number of
;; attributes in the data set. (default = 1) ;; attributes in the data set. (default = 1)
;; ;;
;; -Z ;; -Z
;; verbose on/off ;; verbose on/off
([kind m] ([kind m]
(let [weka-opts (->> (let [weka-opts (->>
(extract-attributes "-P" :starting-attributes) (extract-attributes "-P" :starting-attributes)
@ -123,11 +123,11 @@
{:num-non-inproving "-N" {:num-non-inproving "-N"
:num-attrs-in-search "-K" :num-attrs-in-search "-K"
:subset-eval-cache-size "-S"}))] :subset-eval-cache-size "-S"}))]
(conj weka-opts "-D" (case (m :direction) (conj weka-opts "-D" (case (m :direction)
:backward "0" :backward "0"
:forward "1" :forward "1"
:bi-directional "2" :bi-directional "2"
"1")) "1"))
))) )))
(defmethod make-obj-options :best-first (defmethod make-obj-options :best-first
@ -155,79 +155,79 @@
;; Size of lookup cache for evaluated subsets. ;; Size of lookup cache for evaluated subsets.
;; Expressed as a multiple of the number of ;; Expressed as a multiple of the number of
;; attributes in the data set. (default = 1) ;; attributes in the data set. (default = 1)
([kind m] ([kind m]
(let [weka-opts (->> (extract-attributes "-P" :starting-attributes) (let [weka-opts (->> (extract-attributes "-P" :starting-attributes)
(check-option-values m (check-option-values m
{:num-non-inproving "-N" {:num-non-inproving "-N"
:subset-eval-cache-size "-S"}))] :subset-eval-cache-size "-S"}))]
(conj weka-opts "-D" (case (m :direction) (conj weka-opts "-D" (case (m :direction)
:backward "0" :backward "0"
:forward "1" :forward "1"
:bi-directional "2" :bi-directional "2"
"1"))))) "1")))))
(defmethod make-obj-options :genetic (defmethod make-obj-options :genetic
;; GeneticSearch: ;; GeneticSearch:
;; ;;
;; Performs a search using the simple genetic algorithm described in Goldberg (1989). ;; Performs a search using the simple genetic algorithm described in Goldberg (1989).
;; ;;
;; For more information see: ;; For more information see:
;; ;;
;; David E. Goldberg (1989). Genetic algorithms in search, optimization and machine learning. Addison-Wesley. ;; David E. Goldberg (1989). Genetic algorithms in search, optimization and machine learning. Addison-Wesley.
;; ;;
;; BibTeX: ;; BibTeX:
;; ;;
;; @book{Goldberg1989, ;; @book{Goldberg1989,
;; author = {David E. Goldberg}, ;; author = {David E. Goldberg},
;; publisher = {Addison-Wesley}, ;; publisher = {Addison-Wesley},
;; title = {Genetic algorithms in search, optimization and machine learning}, ;; title = {Genetic algorithms in search, optimization and machine learning},
;; year = {1989}, ;; year = {1989},
;; ISBN = {0201157675} ;; ISBN = {0201157675}
;; } ;; }
;; ;;
;; ;;
;; Valid options are: ;; Valid options are:
;; ;;
;; -P <start set> ;; -P <start set>
;; Specify a starting set of attributes. ;; Specify a starting set of attributes.
;; Eg. 1,3,5-7.If supplied, the starting set becomes ;; Eg. 1,3,5-7.If supplied, the starting set becomes
;; one member of the initial random ;; one member of the initial random
;; population. ;; population.
;; ;;
;; -Z <population size> ;; -Z <population size>
;; Set the size of the population (even number). ;; Set the size of the population (even number).
;; (default = 20). ;; (default = 20).
;; ;;
;; -G <number of generations> ;; -G <number of generations>
;; Set the number of generations. ;; Set the number of generations.
;; (default = 20) ;; (default = 20)
;; ;;
;; -C <probability of crossover> ;; -C <probability of crossover>
;; Set the probability of crossover. ;; Set the probability of crossover.
;; (default = 0.6) ;; (default = 0.6)
;; ;;
;; -M <probability of mutation> ;; -M <probability of mutation>
;; Set the probability of mutation. ;; Set the probability of mutation.
;; (default = 0.033) ;; (default = 0.033)
;; ;;
;; -R <report frequency> ;; -R <report frequency>
;; Set frequency of generation reports. ;; Set frequency of generation reports.
;; e.g, setting the value to 5 will ;; e.g, setting the value to 5 will
;; report every 5th generation ;; report every 5th generation
;; (default = number of generations) ;; (default = number of generations)
;; ;;
;; -S <seed> ;; -S <seed>
;; Set the random number seed. ;; Set the random number seed.
;; (default = 1) ;; (default = 1)
([kind m] ([kind m]
(->> (extract-attributes "-P" :starting-attributes) (->> (extract-attributes "-P" :starting-attributes)
(check-option-values m (check-option-values m
{:population-size "-Z" {:population-size "-Z"
:num-generations "-G" :num-generations "-G"
:crossover-prob "-C" :crossover-prob "-C"
:mutation-prob "-M" :mutation-prob "-M"
:report-freq "-R" :report-freq "-R"
:random-seed "-S"})))) :random-seed "-S"}))))
(defmethod make-obj-options :cfs-subset-eval (defmethod make-obj-options :cfs-subset-eval
;; CfsSubsetEval : ;; CfsSubsetEval :
@ -258,37 +258,37 @@
;; ;;
;; -L ;; -L
;; Don't include locally predictive attributes. ;; Don't include locally predictive attributes.
([kind m] ([kind m]
(check-options m (check-options m
{:treat-missing-vals-separate "-M" {:treat-missing-vals-separate "-M"
:ignore-locally-predictive-attrs "-L"}))) :ignore-locally-predictive-attrs "-L"})))
(defn attribute-eval-options [m] (defn attribute-eval-options [m]
;; Valid options are: ;; Valid options are:
;; ;;
;; -M ;; -M
;; treat missing values as a seperate value. ;; treat missing values as a seperate value.
;; ;;
;; -B ;; -B
;; just binarize numeric attributes instead ;; just binarize numeric attributes instead
;; of properly discretizing them. ;; of properly discretizing them.
(check-options m (check-options m
{:treat-missing-vals-separate "-M" {:treat-missing-vals-separate "-M"
:binarize-numeric-attrs "-B"})) :binarize-numeric-attrs "-B"}))
(defmethod make-obj-options :info-gain (defmethod make-obj-options :info-gain
;; InfoGainAttributeEval : ;; InfoGainAttributeEval :
;; ;;
;; Evaluates the worth of an attribute by measuring the information gain with respect to the class. ;; Evaluates the worth of an attribute by measuring the information gain with respect to the class.
;; ;;
;; InfoGain(Class,Attribute) = H(Class) - H(Class | Attribute). ;; InfoGain(Class,Attribute) = H(Class) - H(Class | Attribute).
([kind m] ([kind m]
(attribute-eval-options m))) (attribute-eval-options m)))
(defmethod make-obj-options :chi-squared (defmethod make-obj-options :chi-squared
;; ChiSquaredAttributeEval : ;; ChiSquaredAttributeEval :
;; ;;
;; Evaluates the worth of an attribute by computing the value of the chi-squared statistic with respect to the class. ;; Evaluates the worth of an attribute by computing the value of the chi-squared statistic with respect to the class.
([kind m] ([kind m]
(attribute-eval-options m))) (attribute-eval-options m)))
@ -298,8 +298,8 @@
;; ;;
;; GainR(Class, Attribute) = (H(Class) - H(Class | Attribute)) / H(Attribute). ;; GainR(Class, Attribute) = (H(Class) - H(Class | Attribute)) / H(Attribute).
([kind m] ([kind m]
(check-options m (check-options m
{:treat-missing-vals-separate "-M"}))) {:treat-missing-vals-separate "-M"})))
(defmethod make-obj-options :symmetrical-uncert (defmethod make-obj-options :symmetrical-uncert
@ -310,67 +310,67 @@
;; SymmU(Class, Attribute) = 2 * (H(Class) - H(Class | Attribute)) / H(Class) + H(Attribute). ;; SymmU(Class, Attribute) = 2 * (H(Class) - H(Class | Attribute)) / H(Class) + H(Attribute).
;; ;;
([kind m] ([kind m]
(check-options m (check-options m
{:treat-missing-vals-separate "-M"}))) {:treat-missing-vals-separate "-M"})))
(defmethod make-obj-options :relief (defmethod make-obj-options :relief
;; ReliefFAttributeEval : ;; ReliefFAttributeEval :
;; ;;
;; Evaluates the worth of an attribute by repeatedly sampling an instance and considering the value of the given attribute for the nearest instance of the same and different class. Can operate on both discrete and continuous class data. ;; Evaluates the worth of an attribute by repeatedly sampling an instance and considering the value of the given attribute for the nearest instance of the same and different class. Can operate on both discrete and continuous class data.
;; -M <num instances> ;; -M <num instances>
;; Specify the number of instances to ;; Specify the number of instances to
;; sample when estimating attributes. ;; sample when estimating attributes.
;; If not specified, then all instances ;; If not specified, then all instances
;; will be used. ;; will be used.
;; ;;
;; -D <seed> ;; -D <seed>
;; Seed for randomly sampling instances. ;; Seed for randomly sampling instances.
;; (Default = 1) ;; (Default = 1)
;; ;;
;; -K <number of neighbours> ;; -K <number of neighbours>
;; Number of nearest neighbours (k) used ;; Number of nearest neighbours (k) used
;; to estimate attribute relevances ;; to estimate attribute relevances
;; (Default = 10). ;; (Default = 10).
;; ;;
;; -W ;; -W
;; Weight nearest neighbours by distance ;; Weight nearest neighbours by distance
;; ;;
;; -A <num> ;; -A <num>
;; Specify sigma value (used in an exp ;; Specify sigma value (used in an exp
;; function to control how quickly ;; function to control how quickly
;; weights for more distant instances ;; weights for more distant instances
;; decrease. Use in conjunction with -W. ;; decrease. Use in conjunction with -W.
;; Sensible value=1/5 to 1/10 of the ;; Sensible value=1/5 to 1/10 of the
;; number of nearest neighbours. ;; number of nearest neighbours.
;; (Default = 2) ;; (Default = 2)
([kind m] ([kind m]
(->> (extract-attributes "-P" :starting-attributes) (->> (extract-attributes "-P" :starting-attributes)
(check-options {:weight "-W"}) (check-options {:weight "-W"})
(check-option-values m (check-option-values m
{:num-instances "-M" {:num-instances "-M"
:random-seed "-D" :random-seed "-D"
:number-of-neighbors "-K" :number-of-neighbors "-K"
:weight-sigma "-A"})))) :weight-sigma "-A"}))))
(defmethod make-obj-options :ranker (defmethod make-obj-options :ranker
;; Ranker : ;; Ranker :
;; ;;
;; Ranks attributes by their individual evaluations. Use in conjunction with attribute evaluators (ReliefF, GainRatio, Entropy etc). ;; Ranks attributes by their individual evaluations. Use in conjunction with attribute evaluators (ReliefF, GainRatio, Entropy etc).
;; ;;
;; Valid options are: ;; Valid options are:
;; ;;
;; -P <start set> ;; -P <start set>
;; Specify a starting set of attributes. ;; Specify a starting set of attributes.
;; Eg. 1,3,5-7. ;; Eg. 1,3,5-7.
;; Any starting attributes specified are ;; Any starting attributes specified are
;; ignored during the ranking. ;; ignored during the ranking.
;; ;;
;; -T <threshold> ;; -T <threshold>
;; Specify a theshold by which attributes ;; Specify a theshold by which attributes
;; may be discarded from the ranking. ;; may be discarded from the ranking.
;; ;;
;; -N <num to select> ;; -N <num to select>
;; Specify number of attributes to select ;; Specify number of attributes to select
([kind m] ([kind m]
(->> (extract-attributes "-P" :starting-attributes) (->> (extract-attributes "-P" :starting-attributes)
(check-option-values m (check-option-values m
@ -380,26 +380,26 @@
(defmethod make-obj-options :one-R (defmethod make-obj-options :one-R
;; OneRAttributeEval : ;; OneRAttributeEval :
;; ;;
;; Evaluates the worth of an attribute by using the OneR classifier. ;; Evaluates the worth of an attribute by using the OneR classifier.
;; ;;
;; Valid options are: ;; Valid options are:
;; ;;
;; -S <seed> ;; -S <seed>
;; Random number seed for cross validation ;; Random number seed for cross validation
;; (default = 1) ;; (default = 1)
;; ;;
;; -F <folds> ;; -F <folds>
;; Number of folds for cross validation ;; Number of folds for cross validation
;; (default = 10) ;; (default = 10)
;; ;;
;; -D ;; -D
;; Use training data for evaluation rather than cross validaton ;; Use training data for evaluation rather than cross validaton
;; ;;
;; -B <minimum bucket size> ;; -B <minimum bucket size>
;; Minimum number of objects in a bucket ;; Minimum number of objects in a bucket
;; (passed on to OneR, default = 6) ;; (passed on to OneR, default = 6)
([kind m] ([kind m]
(->> (check-options m {:use-training-data-for-eval "-D"}) (->> (check-options m {:use-training-data-for-eval "-D"})
(check-option-values m (check-option-values m

View file

@ -234,44 +234,44 @@
(defmethod make-classifier-options [:decision-tree :random-forest] (defmethod make-classifier-options [:decision-tree :random-forest]
([kind algorithm m] ([kind algorithm m]
(->> (->>
(check-options m {:debug "-D"}) (check-options m {:debug "-D"})
(check-option-values m (check-option-values m
{:num-trees-in-forest "-I" {:num-trees-in-forest "-I"
:num-features-to-consider "-K" :num-features-to-consider "-K"
:random-seed "-S" :random-seed "-S"
:depth "-depth"})))) :depth "-depth"}))))
(defmethod make-classifier-options [:decision-tree :fast-random-forest] (defmethod make-classifier-options [:decision-tree :fast-random-forest]
([kind algorithm m] ([kind algorithm m]
(->> (->>
(check-options m {:debug "-D"}) (check-options m {:debug "-D"})
(check-option-values m (check-option-values m
{:num-trees-in-forest "-I" {:num-trees-in-forest "-I"
:num-features-to-consider "-K" :num-features-to-consider "-K"
:random-seed "-S" :random-seed "-S"
:depth "-depth"})))) :depth "-depth"}))))
(defmethod make-classifier-options [:decision-tree :rotation-forest] (defmethod make-classifier-options [:decision-tree :rotation-forest]
([kind algorithm m] ([kind algorithm m]
(->> (->>
(check-options m {:debug "-D"}) (check-options m {:debug "-D"})
(check-option-values m (check-option-values m
{:num-iterations "-I" {:num-iterations "-I"
:use-number-of-groups "-N" :use-number-of-groups "-N"
:min-attribute-group-size "-G" :min-attribute-group-size "-G"
:max-attribute-group-size "-H" :max-attribute-group-size "-H"
:percentage-of-instances-to-remove "-P" :percentage-of-instances-to-remove "-P"
:filter "-F" :filter "-F"
:random-seed "-S" :random-seed "-S"
:weak-learning-class "-W"})))) :weak-learning-class "-W"}))))
(defmethod make-classifier-options [:decision-tree :m5p] (defmethod make-classifier-options [:decision-tree :m5p]
([kind algorithm m] ([kind algorithm m]
(->> (->>
(check-options m {:unsmoothed-predictions "-U" (check-options m {:unsmoothed-predictions "-U"
:regression "-R" :regression "-R"
:unpruned "-N"}) :unpruned "-N"})
(check-option-values m {:minimum-instances "-M"})))) (check-option-values m {:minimum-instances "-M"}))))
@ -281,11 +281,11 @@
(defn make-classifier-with (defn make-classifier-with
#^{:skip-wiki true} #^{:skip-wiki true}
[kind algorithm ^Class classifier-class options] [kind algorithm ^Class classifier-class options]
(let [options-read (if (empty? options) {} (first options)) (let [options-read (if (empty? options) {} (first options))
^Classifier classifier (.newInstance classifier-class) ^Classifier classifier (.newInstance classifier-class)
opts (into-array String (make-classifier-options kind algorithm options-read))] opts (into-array String (make-classifier-options kind algorithm options-read))]
(.setOptions classifier opts) (.setOptions classifier opts)
classifier)) classifier))
(defmulti make-classifier (defmulti make-classifier
"Creates a new classifier for the given kind algorithm and options. "Creates a new classifier for the given kind algorithm and options.
@ -486,14 +486,14 @@
opts (into-array String (make-classifier-options :support-vector-machine :smo options-read))] opts (into-array String (make-classifier-options :support-vector-machine :smo options-read))]
(.setOptions classifier opts) (.setOptions classifier opts)
(when (not (empty? (get options-read :kernel-function))) (when (not (empty? (get options-read :kernel-function)))
;; We have to setup a different kernel function ;; We have to setup a different kernel function
(let [kernel (get options-read :kernel-function) (let [kernel (get options-read :kernel-function)
real-kernel (if (map? kernel) real-kernel (if (map? kernel)
(make-kernel-function (first (keys kernel)) (make-kernel-function (first (keys kernel))
(first (vals kernel))) (first (vals kernel)))
kernel)] kernel)]
(.setKernel classifier real-kernel))) (.setKernel classifier real-kernel)))
classifier))) classifier)))
(defmethod make-classifier [:support-vector-machine :spegasos] (defmethod make-classifier [:support-vector-machine :spegasos]
([kind algorithm & options] ([kind algorithm & options]

View file

@ -28,14 +28,14 @@
(defmethod make-clusterer-options :k-means (defmethod make-clusterer-options :k-means
([kind m] ([kind m]
(let [cols-val (check-options m {:display-standard-deviation "-V" (let [cols-val (check-options m {:display-standard-deviation "-V"
:replace-missing-values "-M" :replace-missing-values "-M"
:preserve-instances-order "-O"} :preserve-instances-order "-O"}
[""]) [""])
cols-val-a (check-option-values m {:number-clusters "-N" cols-val-a (check-option-values m {:number-clusters "-N"
:random-seed "-S" :random-seed "-S"
:number-iterations "-I"} :number-iterations "-I"}
cols-val)] cols-val)]
(into-array cols-val-a)))) (into-array cols-val-a))))
(defmethod make-clusterer-options :cobweb (defmethod make-clusterer-options :cobweb
@ -44,7 +44,7 @@
:cutoff "-C" :cutoff "-C"
:random-seed "-S"} :random-seed "-S"}
[""])] [""])]
(into-array cols-val-a)))) (into-array cols-val-a))))
(defmethod make-clusterer-options :expectation-maximization (defmethod make-clusterer-options :expectation-maximization
@ -54,7 +54,7 @@
:minimum-standard-deviation "-M" :minimum-standard-deviation "-M"
:random-seed "-S"} :random-seed "-S"}
[""])] [""])]
(into-array cols-val-a)))) (into-array cols-val-a))))
;; Building clusterers ;; Building clusterers
@ -244,7 +244,7 @@
training-data training-data
folds folds
(new Random (.getTime (new Date))))] (new Random (.getTime (new Date))))]
{:log-likelihood log-likelihood}))) {:log-likelihood log-likelihood})))
;; Clustering collections ;; Clustering collections

View file

@ -136,7 +136,7 @@
(make-instance dataset 1 vector)) (make-instance dataset 1 vector))
([dataset weight vector] ([dataset weight vector]
(let [^Instance inst (new Instance (let [^Instance inst (new Instance
(count vector))] (count vector))]
(do (.setDataset inst dataset) (do (.setDataset inst dataset)
(loop [vs vector (loop [vs vector
c 0] c 0]
@ -208,17 +208,17 @@
(let [index-class-attribute (if (keyword? class-attribute) (let [index-class-attribute (if (keyword? class-attribute)
(loop [c 0 (loop [c 0
acum attributes] acum attributes]
(if (= (let [at (first acum)] (if (= (let [at (first acum)]
(if (map? at) (if (map? at)
(first (keys at)) (first (keys at))
at)) at))
class-attribute) class-attribute)
c c
(if (= c (count attributes)) (if (= c (count attributes))
(throw (new Exception "provided class attribute not found")) (throw (new Exception "provided class attribute not found"))
(recur (+ c 1) (recur (+ c 1)
(rest acum))))) (rest acum)))))
class-attribute)] class-attribute)]
(.setClassIndex ds index-class-attribute))) (.setClassIndex ds index-class-attribute)))
ds))) ds)))
@ -243,7 +243,7 @@
"Returns map of the labels (possible values) for the given nominal attribute as the keys "Returns map of the labels (possible values) for the given nominal attribute as the keys
with the values being the attributes index. " with the values being the attributes index. "
[^Attribute attr] [^Attribute attr]
(let [values (enumeration-seq (.enumerateValues attr))] (let [values (enumeration-seq (.enumerateValues attr))]
(if (empty? values) (if (empty? values)
:not-nominal :not-nominal
(reduce (fn [m ^String val] (reduce (fn [m ^String val]
@ -274,14 +274,14 @@
(defn dataset-format (defn dataset-format
"Returns the definition of the attributes of this dataset" "Returns the definition of the attributes of this dataset"
[dataset] [dataset]
(reduce (reduce
(fn [so-far ^Attribute attr] (fn [so-far ^Attribute attr]
(conj so-far (conj so-far
(if (.isNominal attr) (if (.isNominal attr)
{(keyword-name attr) (map keyword (enumeration-seq (.enumerateValues attr)))} {(keyword-name attr) (map keyword (enumeration-seq (.enumerateValues attr)))}
(keyword-name attr)))) (keyword-name attr))))
[] []
(attributes dataset))) (attributes dataset)))
(defn headers-only (defn headers-only
"Returns a new weka dataset (Instances) with the same headers as the given one" "Returns a new weka dataset (Instances) with the same headers as the given one"
@ -328,7 +328,7 @@ If the class is nominal then the string value (not keyword) is returned."
(defn instance-get-class (defn instance-get-class
"Get the index of the class attribute for this instance" "Get the index of the class attribute for this instance"
[^Instance instance] [^Instance instance]
(.classValue instance)) (.classValue instance))
(defn instance-value-at (defn instance-value-at

View file

@ -51,7 +51,7 @@
(fn [kind map] kind)) (fn [kind map] kind))
(declare make-apply-filter) (declare make-apply-filter)
;TODO: consider passing in the make-filter-options body here as well in additon to the docstring. ;;TODO: consider passing in the make-filter-options body here as well in additon to the docstring.
(defmacro deffilter (defmacro deffilter
"Defines the filter's fn that creates a fn to make and apply the filter." "Defines the filter's fn that creates a fn to make and apply the filter."
[filter-name] [filter-name]
@ -153,10 +153,10 @@
(update-in-when [:labels] (partial str/join ",")) (update-in-when [:labels] (partial str/join ","))
(update-in-when [:column] #(if (number? %) (inc %) %)) (update-in-when [:column] #(if (number? %) (inc %) %))
(check-option-values {:type "-T" (check-option-values {:type "-T"
:labels "-L" :labels "-L"
:name "-N" :name "-N"
:column "-C" :column "-C"
:date-format "-F"})))) :date-format "-F"}))))
(deffilter add-attribute) (deffilter add-attribute)
@ -571,9 +571,9 @@
(let [^OptionHandler f (.newInstance class)] (let [^OptionHandler f (.newInstance class)]
(.setOptions f (into-array String (make-filter-options kind options))) (.setOptions f (into-array String (make-filter-options kind options)))
f) f)
(case kind (case kind
:clj-streamable (ClojureStreamFilter. (:process options) (:determine-dataset-format options)) :clj-streamable (ClojureStreamFilter. (:process options) (:determine-dataset-format options))
:clj-batch (ClojureBatchFilter. (:process options) (:determine-dataset-format options))))] :clj-batch (ClojureBatchFilter. (:process options) (:determine-dataset-format options))))]
(doto filter (.setInputFormat (:dataset-format options))))) (doto filter (.setInputFormat (:dataset-format options)))))
;; Processing the filtering of data ;; Processing the filtering of data
@ -602,7 +602,7 @@
The :dataset-format attribute for the making of the filter will be setup to the The :dataset-format attribute for the making of the filter will be setup to the
dataset passed as an argument if no other value is provided." dataset passed as an argument if no other value is provided."
[filter-options dataset] [filter-options dataset]
;TODO: Consider using Weka's MultiFilter instead.. could be faster for streamable filters. ;TODO: Consider using Weka's MultiFilter instead.. could be faster for streamable filters.
(reduce (reduce
(fn [ds [kind options]] (fn [ds [kind options]]
(make-apply-filter kind options ds)) (make-apply-filter kind options ds))

View file

@ -18,8 +18,8 @@
(defmethod make-kernel-function-options :polynomic (defmethod make-kernel-function-options :polynomic
([kind map] ([kind map]
(let [cols-val (check-option-values map {:cache-size "-C" (let [cols-val (check-option-values map {:cache-size "-C"
:exponent "-E" :exponent "-E"
:use=lower-order-terms "-L"} :use=lower-order-terms "-L"}
[""])] [""])]
(into-array cols-val)))) (into-array cols-val))))

View file

@ -15,7 +15,7 @@
"Sets an option for a filter" "Sets an option for a filter"
(if (get map val) (if (get map val)
(conj opts flag) (conj opts flag)
opts)) opts))
(defn check-option-value [opts val flag map] (defn check-option-value [opts val flag map]
"Sets an option with value for a filter" "Sets an option with value for a filter"
@ -48,7 +48,7 @@
[]))) [])))
; TODO: Raise a helpful exception when the keys don't match up with the provided flags. ;; TODO: Raise a helpful exception when the keys don't match up with the provided flags.
(defn check-options (defn check-options
"Checks the presence of a set of options for a filter" "Checks the presence of a set of options for a filter"
([args-map opts-map] ([args-map opts-map]

View file

@ -100,13 +100,13 @@
the-plot (if (nil? plot) the-plot (if (nil? plot)
(scatter-plot this-val-0 this-val-1 (scatter-plot this-val-0 this-val-1
:title title :title title
:x-label (name (nth cols-names col-0)) :x-label (name (nth cols-names col-0))
:y-label (name (nth cols-names col-1)) :y-label (name (nth cols-names col-1))
:series-label (name (first ks)) :series-label (name (first ks))
:legend legend) :legend legend)
(do (add-points plot this-val-0 this-val-1 :series-label (name (first ks))) (do (add-points plot this-val-0 this-val-1 :series-label (name (first ks)))
plot))] plot))]
(recur the-plot (rest ks)))))))) (recur the-plot (rest ks))))))))
;; visualization of different objects ;; visualization of different objects
@ -129,7 +129,7 @@
(defn dataset-display-attributes [dataset attribute-x attribute-y & visualization-options] (defn dataset-display-attributes [dataset attribute-x attribute-y & visualization-options]
"Displays the distribution of a set of attributes for a dataset" "Displays the distribution of a set of attributes for a dataset"
(let [attr-x (if (keyword? attribute-x) (datset-index-attr dataset attribute-x) attribute-x) (let [attr-x (if (keyword? attribute-x) (datset-index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (datset-index-attr dataset attribute-y) attribute-y) attr-y (if (keyword? attribute-y) (datset-index-attr dataset attribute-y) attribute-y)
options-pre (first-or-default visualization-options {}) options-pre (first-or-default visualization-options {})
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre) opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
@ -171,30 +171,30 @@
;; Things to load to test this from slime ;; Things to load to test this from slime
;(defn load-test-from-slime [] ;(defn load-test-from-slime []
; (do ; (do
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/joda-time-1.6.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/joda-time-1.6.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/opencsv-2.0.1.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/opencsv-2.0.1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/classes/") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/classes/")
; (add-classpath "file:///Applications/weka-3-6-2/weka.jar") ; (add-classpath "file:///Applications/weka-3-6-2/weka.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/src/") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/src/")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-charts-1.0-master-SNAPSHOT.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-charts-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-core-1.0-master-SNAPSHOT.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-core-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-io-1.0-master-SNAPSHOT.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-io-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-processing-1.0-master-SNAPSHOT.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-processing-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-chrono-1.0-master-SNAPSHOT.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-chrono-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-full-1.0-master-SNAPSHOT.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-full-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-mongodb-1.0-master-SNAPSHOT.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-mongodb-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jfreechart-1.0.13.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jfreechart-1.0.13.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/parallelcolt-0.7.2.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/parallelcolt-0.7.2.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/arpack-combo-0.1.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/arpack-combo-0.1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/gnujaxp-1.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/gnujaxp-1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-json-1.1-20091229.021828-4.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-json-1.1-20091229.021828-4.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-db-object-0.1.1-20091229.021828-2.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-db-object-0.1.1-20091229.021828-2.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jcommon-1.0.16.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jcommon-1.0.16.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/netlib-java-0.9.1.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/netlib-java-0.9.1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/processing-core-1.jar") ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/processing-core-1.jar")
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/congomongo-0.1.1-20091229.021828-1.jar") ; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/congomongo-0.1.1-20091229.021828-1.jar")
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-1.0.jar") ; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-1.0.jar")
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-java-driver-1.1.0-20091229.021828-3.jar") ; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-java-driver-1.1.0-20091229.021828-3.jar")
; )) ; ))

View file

@ -46,11 +46,11 @@
"Similar to update-in, but returns m unmodified if any levels do "Similar to update-in, but returns m unmodified if any levels do
not exist" not exist"
([m [k & ks] f & args] ([m [k & ks] f & args]
(if (contains? m k) (if (contains? m k)
(if ks (if ks
(assoc m k (apply update-in-when (get m k) ks f args)) (assoc m k (apply update-in-when (get m k) ks f args))
(assoc m k (apply f (get m k) args))) (assoc m k (apply f (get m k) args)))
m))) m)))
;; trying metrics ;; trying metrics

View file

@ -20,6 +20,6 @@
[4 5 :g]]) [4 5 :g]])
attrs (select-attributes ds :search (greedy) :evaluator (cfs-subset-eval))] attrs (select-attributes ds :search (greedy) :evaluator (cfs-subset-eval))]
(facts (facts
attrs => [:a :c] attrs => [:a :c]
(-> attrs meta :selector class) => #(isa? weka.attributeSelection.AttributeSelection %)))) (-> attrs meta :selector class) => #(isa? weka.attributeSelection.AttributeSelection %))))

View file

@ -44,9 +44,9 @@
(deftest make-classifier-bayes (deftest make-classifier-bayes
(fact (fact
(let [c (clj-ml.classifiers/make-classifier :bayes :naive {:kernel-estimator true :old-format true}) (let [c (clj-ml.classifiers/make-classifier :bayes :naive {:kernel-estimator true :old-format true})
opts (vec (.getOptions c))] opts (vec (.getOptions c))]
opts => (contains ["-K" "-O"])))) opts => (contains ["-K" "-O"]))))
(deftest make-classifier-bayes-updateable (deftest make-classifier-bayes-updateable
(let [c (clj-ml.classifiers/make-classifier :bayes :naive {:updateable true})] (let [c (clj-ml.classifiers/make-classifier :bayes :naive {:updateable true})]

View file

@ -4,14 +4,14 @@
(deftest make-clusterers-options-k-means (deftest make-clusterers-options-k-means
(fact (fact
(let [options (vec (make-clusterer-options :k-means {:display-standard-deviation true :replace-missing-values true :preserve-instances-order true (let [options (vec (make-clusterer-options :k-means {:display-standard-deviation true :replace-missing-values true :preserve-instances-order true
:number-clusters 3 :random-seed 2 :number-iterations 1}))] :number-clusters 3 :random-seed 2 :number-iterations 1}))]
options => (just ["" "-V" "-M" "-O" "-N" "3" "-S" "2" "-I" "1"] :in-any-order)))) options => (just ["" "-V" "-M" "-O" "-N" "3" "-S" "2" "-I" "1"] :in-any-order))))
(deftest make-clusterers-options-expectation-maximization (deftest make-clusterers-options-expectation-maximization
(fact (fact
(let [options (vec (make-clusterer-options :expectation-maximization {:number-clusters 3 :maximum-iterations 10 :minimum-standard-deviation 0.001 :random-seed 30}))] (let [options (vec (make-clusterer-options :expectation-maximization {:number-clusters 3 :maximum-iterations 10 :minimum-standard-deviation 0.001 :random-seed 30}))]
options => (just ["" "-N" "3" "-I" "10" "-M" "0.001" "-S" "30"] :in-any-order)))) options => (just ["" "-N" "3" "-I" "10" "-M" "0.001" "-S" "30"] :in-any-order))))
(deftest make-and-build-clusterer (deftest make-and-build-clusterer
@ -27,27 +27,27 @@
(deftest test-make-cobweb (deftest test-make-cobweb
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]]) (let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
c (make-clusterer :cobweb)] c (make-clusterer :cobweb)]
(clusterer-build c ds) (clusterer-build c ds)
(is true))) (is true)))
(deftest test-update-clusterer-cobweb (deftest test-update-clusterer-cobweb
(let [ds (make-dataset :test [:a :b] []) (let [ds (make-dataset :test [:a :b] [])
c (make-clusterer :cobweb)] c (make-clusterer :cobweb)]
(clusterer-build c ds) (clusterer-build c ds)
(clusterer-update c (clj-ml.data/make-instance ds [1 2])) (clusterer-update c (clj-ml.data/make-instance ds [1 2]))
(is true))) (is true)))
(deftest test-update-clusterer-cobweb-many-instances (deftest test-update-clusterer-cobweb-many-instances
(let [ds (make-dataset :test [:a :b] []) (let [ds (make-dataset :test [:a :b] [])
c (make-clusterer :cobweb) c (make-clusterer :cobweb)
to-update (make-dataset :test [:a :b] [[1 2] [3 4]])] to-update (make-dataset :test [:a :b] [[1 2] [3 4]])]
(clusterer-build c ds) (clusterer-build c ds)
(clusterer-update c to-update) (clusterer-update c to-update)
(is true))) (is true)))
(deftest test-evaluate-clusterer-cross-validation (deftest test-evaluate-clusterer-cross-validation
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4] [5 6]]) (let [ds (make-dataset :test [:a :b] [[1 2] [3 4] [5 6]])
c (make-clusterer :expectation-maximization)] c (make-clusterer :expectation-maximization)]
(clusterer-build c ds) (clusterer-build c ds)
(clusterer-evaluate c :cross-validation ds 2) (clusterer-evaluate c :cross-validation ds 2)
(is true))) (is true)))

View file

@ -7,33 +7,33 @@
[:a :b] [:a :b]
1) 1)
inst (make-instance dataset [1 2])] inst (make-instance dataset [1 2])]
(is (= (class inst) (is (= (class inst)
weka.core.Instance)) weka.core.Instance))
(is (= 2 (.numValues inst))) (is (= 2 (.numValues inst)))
(is (= 1.0 (.value inst 0))) (is (= 1.0 (.value inst 0)))
(is (= 2.0 (.value inst 1))))) (is (= 2.0 (.value inst 1)))))
(deftest make-instance-ord (deftest make-instance-ord
(let [dataset (make-dataset :test (let [dataset (make-dataset :test
[:a {:b [:b1 :b2]}] [:a {:b [:b1 :b2]}]
1) 1)
inst (make-instance dataset [1 :b1])] inst (make-instance dataset [1 :b1])]
(is (= (class inst) (is (= (class inst)
weka.core.Instance)) weka.core.Instance))
(is (= 2 (.numValues inst))) (is (= 2 (.numValues inst)))
(is (= 1.0 (.value inst 0))) (is (= 1.0 (.value inst 0)))
(is (= "b1" (.stringValue inst 1))))) (is (= "b1" (.stringValue inst 1)))))
(deftest make-instance-nils (deftest make-instance-nils
(let [dataset (make-dataset :test (let [dataset (make-dataset :test
[:a :b] [:a :b]
1) 1)
inst (make-instance dataset [1 nil])] inst (make-instance dataset [1 nil])]
(is (= (class inst) (is (= (class inst)
weka.core.Instance)) weka.core.Instance))
(is (= 2 (.numValues inst))) (is (= 2 (.numValues inst)))
(is (= 1.0 (.value inst 0))) (is (= 1.0 (.value inst 0)))
(is (Double/isNaN (.value inst 1))))) (is (Double/isNaN (.value inst 1)))))
(deftest dataset-make-dataset-with-default-class (deftest dataset-make-dataset-with-default-class
(let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:d :e]}] [] {:class :c}) (let [ds (clj-ml.data/make-dataset :test [:a :b {:c [:d :e]}] [] {:class :c})
@ -48,7 +48,7 @@
(let [dataset (make-dataset :test (let [dataset (make-dataset :test
[:a :b] [:a :b]
2) 2)
_ (clj-ml.data/dataset-set-class dataset 1)] _ (clj-ml.data/dataset-set-class dataset 1)]
(is (= 1 (.classIndex dataset))) (is (= 1 (.classIndex dataset)))
(is (= 0 (.classIndex (dataset-set-class dataset 0)))) (is (= 0 (.classIndex (dataset-set-class dataset 0))))
(testing "when a string or symbol is passed in" (testing "when a string or symbol is passed in"

View file

@ -4,25 +4,25 @@
(deftest make-filter-options-supervised-discretize (deftest make-filter-options-supervised-discretize
(fact (fact
(let [options (make-filter-options :supervised-discretize {:attributes [1 2] :invert true :binary true :better-encoding true :kononenko true :nonexitent true})] (let [options (make-filter-options :supervised-discretize {:attributes [1 2] :invert true :binary true :better-encoding true :kononenko true :nonexitent true})]
options => (just ["-R" "2,3" "-V" "-D" "-E" "-K"] :in-any-order)))) options => (just ["-R" "2,3" "-V" "-D" "-E" "-K"] :in-any-order))))
(deftest make-filter-options-unsupervised-discretize (deftest make-filter-options-unsupervised-discretize
(fact (fact
(let [options (make-filter-options :unsupervised-discretize {:attributes [1 2] :binary true (let [options (make-filter-options :unsupervised-discretize {:attributes [1 2] :binary true
:better-encoding true :equal-frequency true :optimize true :better-encoding true :equal-frequency true :optimize true
:number-bins 4 :weight-bins 1})] :number-bins 4 :weight-bins 1})]
options => (just ["-R" "2,3" "-D" "-E" "-F" "-O" "-B" "4" "-M" "1"] :in-any-order)))) options => (just ["-R" "2,3" "-D" "-E" "-F" "-O" "-B" "4" "-M" "1"] :in-any-order))))
(deftest make-filter-options-supervised-nominal-to-binary (deftest make-filter-options-supervised-nominal-to-binary
(fact (fact
(let [options (make-filter-options :supervised-nominal-to-binary {:also-binary true :for-each-nominal true})] (let [options (make-filter-options :supervised-nominal-to-binary {:also-binary true :for-each-nominal true})]
options => (just ["-N" "-A"] :in-any-order)))) options => (just ["-N" "-A"] :in-any-order))))
(deftest make-filter-options-unsupervised-nominal-to-binary (deftest make-filter-options-unsupervised-nominal-to-binary
(fact (fact
(let [options (make-filter-options :unsupervised-nominal-to-binary {:attributes [1,2] :also-binary true :for-each-nominal true :invert true})] (let [options (make-filter-options :unsupervised-nominal-to-binary {:attributes [1,2] :also-binary true :for-each-nominal true :invert true})]
options => (just ["-R" "2,3" "-V" "-N" "-A"] :in-any-order)))) options => (just ["-R" "2,3" "-V" "-N" "-A"] :in-any-order))))
(deftest make-filter-options-string-to-word-vector (deftest make-filter-options-string-to-word-vector
(fact (fact
@ -55,9 +55,9 @@
(deftest make-filter-discretize-sup (deftest make-filter-discretize-sup
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
_ (dataset-set-class ds 2) _ (dataset-set-class ds 2)
f (make-filter :supervised-discretize {:dataset-format ds :attributes [0]})] f (make-filter :supervised-discretize {:dataset-format ds :attributes [0]})]
(is (= weka.filters.supervised.attribute.Discretize (is (= weka.filters.supervised.attribute.Discretize
@ -65,18 +65,18 @@
(deftest make-filter-discretize-unsup (deftest make-filter-discretize-unsup
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
f (make-filter :unsupervised-discretize {:dataset-format ds :attributes [0]})] f (make-filter :unsupervised-discretize {:dataset-format ds :attributes [0]})]
(is (= weka.filters.unsupervised.attribute.Discretize (is (= weka.filters.unsupervised.attribute.Discretize
(class f))))) (class f)))))
(deftest make-filter-nominal-to-binary-sup (deftest make-filter-nominal-to-binary-sup
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
foo1(dataset-set-class ds 2) foo1(dataset-set-class ds 2)
f (make-filter :supervised-nominal-to-binary {:dataset-format ds})] f (make-filter :supervised-nominal-to-binary {:dataset-format ds})]
(is (= weka.filters.supervised.attribute.NominalToBinary (is (= weka.filters.supervised.attribute.NominalToBinary
@ -84,9 +84,9 @@
(deftest make-filter-nominal-to-binary-unsup (deftest make-filter-nominal-to-binary-unsup
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
f (make-filter :unsupervised-nominal-to-binary {:dataset-format ds :attributes [2]})] f (make-filter :unsupervised-nominal-to-binary {:dataset-format ds :attributes [2]})]
(is (= weka.filters.unsupervised.attribute.NominalToBinary (is (= weka.filters.unsupervised.attribute.NominalToBinary
(class f))))) (class f)))))
@ -123,9 +123,9 @@
(deftest make-filter-remove-attributes (deftest make-filter-remove-attributes
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
f (make-filter :remove-attributes {:dataset-format ds :attributes [0]})] f (make-filter :remove-attributes {:dataset-format ds :attributes [0]})]
(is (= weka.filters.unsupervised.attribute.Remove (is (= weka.filters.unsupervised.attribute.Remove
(class f))) (class f)))
@ -135,44 +135,44 @@
(deftest make-apply-filter-remove-attributes (deftest make-apply-filter-remove-attributes
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
res (make-apply-filter :remove-attributes {:attributes [0]} ds)] res (make-apply-filter :remove-attributes {:attributes [0]} ds)]
(is (= (dataset-format res) (is (= (dataset-format res)
[:b {:c '(:g :m)}])))) [:b {:c '(:g :m)}]))))
(deftest remove-precentage-test (deftest remove-precentage-test
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 2 :m] [4 2 :m]
[4 5 :g]])] [4 5 :g]])]
(is (= (dataset-count (remove-percentage ds {:percentage 75})) 1)))) (is (= (dataset-count (remove-percentage ds {:percentage 75})) 1))))
(deftest remove-range-test (deftest remove-range-test
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 2 :m] [4 2 :m]
[4 5 :g]])] [4 5 :g]])]
(is (= (dataset-count (remove-range ds {:range "first-3"})) 1) (is (= (dataset-count (remove-range ds {:range "first-3"})) 1)
(= (dataset-count (remove-range ds {:range "first-3" :invert true})) 3)))) (= (dataset-count (remove-range ds {:range "first-3" :invert true})) 3))))
(deftest make-apply-filter-numeric-to-nominal (deftest make-apply-filter-numeric-to-nominal
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]])] [4 5 :g]])]
(testing "when no attributes are specified" (testing "when no attributes are specified"
(is (= (dataset-format (make-apply-filter :numeric-to-nominal {} ds)) (is (= (dataset-format (make-apply-filter :numeric-to-nominal {} ds))
[{:a '(:1 :2 :4)} {:b '(:2 :3 :5)} {:c '(:g :m)}]))) [{:a '(:1 :2 :4)} {:b '(:2 :3 :5)} {:c '(:g :m)}])))
(testing "when attributes are specified by index" (testing "when attributes are specified by index"
(is (= (dataset-format (make-apply-filter :numeric-to-nominal {:attributes [0]} ds)) (is (= (dataset-format (make-apply-filter :numeric-to-nominal {:attributes [0]} ds))
[{:a '(:1 :2 :4)} :b {:c '(:g :m)}]))) [{:a '(:1 :2 :4)} :b {:c '(:g :m)}])))
(testing "when attributes are specified by name" (testing "when attributes are specified by name"
(is (= (dataset-format (make-apply-filter :numeric-to-nominal {:attributes [:b]} ds)) (is (= (dataset-format (make-apply-filter :numeric-to-nominal {:attributes [:b]} ds))
[:a {:b '(:2 :3 :5)} {:c '(:g :m)}]))))) [:a {:b '(:2 :3 :5)} {:c '(:g :m)}])))))
(deftest make-apply-filter-string-to-word-vector (deftest make-apply-filter-string-to-word-vector
(let [ds (make-dataset :test [{:s nil} {:class [:yes :no]}] (let [ds (make-dataset :test [{:s nil} {:class [:yes :no]}]
@ -188,9 +188,9 @@
(deftest make-apply-filter-add-attribute (deftest make-apply-filter-add-attribute
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
res (add-attribute ds {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]})] res (add-attribute ds {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]})]
(is (= (dataset-format res) (is (= (dataset-format res)
[:a {:pet '(:dog :cat)} :b {:c '(:g :m)}])))) [:a {:pet '(:dog :cat)} :b {:c '(:g :m)}]))))
@ -218,9 +218,9 @@
(deftest make-apply-filters-test (deftest make-apply-filters-test
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
res (make-apply-filters res (make-apply-filters
[[:add-attribute {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]}] [[:add-attribute {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]}]
[:remove-attributes {:attributes [:a :c]}]] ds)] [:remove-attributes {:attributes [:a :c]}]] ds)]
@ -229,9 +229,9 @@
(deftest using-regular-filter-fns-with-threading (deftest using-regular-filter-fns-with-threading
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
res (-> ds res (-> ds
(add-attribute {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]}) (add-attribute {:type :nominal, :column 1, :name "pet", :labels ["dog" "cat"]})
(remove-attributes {:attributes [:a :c]}))] (remove-attributes {:attributes [:a :c]}))]
@ -240,9 +240,9 @@
(deftest make-apply-filter-clj-streamable (deftest make-apply-filter-clj-streamable
(let [ds (make-dataset :test [:a :b {:c [:g :m]}] (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
[ [1 2 :g] [ [1 2 :g]
[2 3 :m] [2 3 :m]
[4 5 :g]]) [4 5 :g]])
rename-attributes (fn [^weka.core.Instances input-format] rename-attributes (fn [^weka.core.Instances input-format]
(doto (weka.core.Instances. input-format 0) (doto (weka.core.Instances. input-format 0)
@ -263,9 +263,9 @@
(deftest make-apply-filter-clj-batch (deftest make-apply-filter-clj-batch
(let [ds (make-dataset :test [:a] (let [ds (make-dataset :test [:a]
[ [1] [ [1]
[2] [2]
[4]]) [4]])
max-diff-attr (weka.core.Attribute. "max-diff") max-diff-attr (weka.core.Attribute. "max-diff")
add-max-diff-attr (fn [^weka.core.Instances input-format] add-max-diff-attr (fn [^weka.core.Instances input-format]
(doto (weka.core.Instances. input-format 0) (doto (weka.core.Instances. input-format 0)
@ -288,4 +288,4 @@
(is (= [{:a 1.0 :max-diff 3.0} (is (= [{:a 1.0 :max-diff 3.0}
{:a 2.0 :max-diff 2.0} {:a 2.0 :max-diff 2.0}
{:a 4.0 :max-diff 0.0}] {:a 4.0 :max-diff 0.0}]
(dataset-as-maps res))))) (dataset-as-maps res)))))

View file

@ -4,18 +4,18 @@
(deftest make-kernel-function-polynomic (deftest make-kernel-function-polynomic
(fact (fact
(let [kernel (clj-ml.kernel-functions/make-kernel-function :polynomic {:exponent 0.3}) (let [kernel (clj-ml.kernel-functions/make-kernel-function :polynomic {:exponent 0.3})
options (vec (.getOptions kernel))] options (vec (.getOptions kernel))]
options => (contains ["-E" "0.3"])))) options => (contains ["-E" "0.3"]))))
(deftest make-kernel-function-radial-basis (deftest make-kernel-function-radial-basis
(fact (fact
(let [kernel (clj-ml.kernel-functions/make-kernel-function :radial-basis {:gamma 0.3}) (let [kernel (clj-ml.kernel-functions/make-kernel-function :radial-basis {:gamma 0.3})
options (vec (.getOptions kernel))] options (vec (.getOptions kernel))]
options => (contains ["-G" "0.3"])))) options => (contains ["-G" "0.3"]))))
(deftest make-kernel-function-string (deftest make-kernel-function-string
(fact (fact
(let [kernel (clj-ml.kernel-functions/make-kernel-function :string {:lambda 0}) (let [kernel (clj-ml.kernel-functions/make-kernel-function :string {:lambda 0})
options (vec (.getOptions kernel))] options (vec (.getOptions kernel))]
options => (contains ["-L" "0.0"])))) options => (contains ["-L" "0.0"]))))

View file

@ -4,4 +4,4 @@
(deftest test-into-fast-vecotor (deftest test-into-fast-vecotor
(is (= ["a" "B" "c"] (is (= ["a" "B" "c"]
(vec (.toArray (into-fast-vector ["a" "B" "c"])))))) (vec (.toArray (into-fast-vector ["a" "B" "c"]))))))