Distance functions and distance function parameters for clusterers
This commit is contained in:
parent
00ff6b1bc4
commit
c37d36e5d3
4 changed files with 93 additions and 2 deletions
|
@ -4,7 +4,7 @@
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(ns clj-ml.clusterers
|
(ns clj-ml.clusterers
|
||||||
(:use [clj-ml utils data]
|
(:use [clj-ml utils data distance-functions]
|
||||||
[incanter charts])
|
[incanter charts])
|
||||||
(:import (java.util Date Random)
|
(:import (java.util Date Random)
|
||||||
(weka.clusterers ClusterEvaluation SimpleKMeans)))
|
(weka.clusterers ClusterEvaluation SimpleKMeans)))
|
||||||
|
@ -37,6 +37,13 @@
|
||||||
clusterer# (new ~clusterer-class)
|
clusterer# (new ~clusterer-class)
|
||||||
opts# (make-clusterer-options ~kind options-read#)]
|
opts# (make-clusterer-options ~kind options-read#)]
|
||||||
(.setOptions clusterer# opts#)
|
(.setOptions clusterer# opts#)
|
||||||
|
(when (not (empty? (get options-read# :distance-function)))
|
||||||
|
(let [dist# (get options-read# :distance-function)
|
||||||
|
real-dist# (if (map? dist#)
|
||||||
|
(make-distance-function (first (keys dist#))
|
||||||
|
(first (vals dist#)))
|
||||||
|
dist#)]
|
||||||
|
(.setDistanceFunction clusterer# real-dist#)))
|
||||||
clusterer#)))
|
clusterer#)))
|
||||||
|
|
||||||
(defmulti make-clusterer
|
(defmulti make-clusterer
|
||||||
|
|
51
src/clj_ml/distance_functions.clj
Normal file
51
src/clj_ml/distance_functions.clj
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
;;
|
||||||
|
;; Distance functions
|
||||||
|
;; @author Antonio Garrote
|
||||||
|
;;
|
||||||
|
|
||||||
|
(ns clj-ml.distance-functions
|
||||||
|
(:use [clj-ml utils data])
|
||||||
|
(:import (weka.core EuclideanDistance ManhattanDistance ChebyshevDistance)))
|
||||||
|
|
||||||
|
|
||||||
|
;; Setting up clusterer options
|
||||||
|
|
||||||
|
(defn- make-distance-function-options
|
||||||
|
"Creates ther right parameters for a distance-function"
|
||||||
|
([map]
|
||||||
|
(let [cols (get map :attributes)
|
||||||
|
pre-cols (reduce #(str %1 "," (+ %2 1)) "" cols)
|
||||||
|
cols-val-a ["-R" (.substring pre-cols 1 (.length pre-cols))]
|
||||||
|
cols-val-b (check-options {:invert "-V"
|
||||||
|
:no-normalization "-D"}
|
||||||
|
map
|
||||||
|
cols-val-a)]
|
||||||
|
(into-array cols-val-b))))
|
||||||
|
|
||||||
|
|
||||||
|
(defmulti make-distance-function
|
||||||
|
"Creates a new distance function"
|
||||||
|
(fn [kind & options] kind))
|
||||||
|
|
||||||
|
|
||||||
|
(defmethod make-distance-function :euclidean
|
||||||
|
([kind & options]
|
||||||
|
(let [dist (new EuclideanDistance)
|
||||||
|
opts (make-distance-function-options (first-or-default options {}))]
|
||||||
|
(.setOptions dist opts)
|
||||||
|
dist)))
|
||||||
|
|
||||||
|
(defmethod make-distance-function :manhattan
|
||||||
|
([kind & options]
|
||||||
|
(let [dist (new ManhattanDistance)
|
||||||
|
opts (make-distance-function-options (first-or-default options {}))]
|
||||||
|
(.setOptions dist opts)
|
||||||
|
dist)))
|
||||||
|
|
||||||
|
(defmethod make-distance-function :chebyshev
|
||||||
|
([kind & options]
|
||||||
|
(let [dist (new ChebyshevDistance)
|
||||||
|
opts (make-distance-function-options (first-or-default options {}))]
|
||||||
|
(.setOptions dist opts)
|
||||||
|
dist)))
|
||||||
|
|
|
@ -27,8 +27,12 @@
|
||||||
"1"))))
|
"1"))))
|
||||||
|
|
||||||
|
|
||||||
(deftest make-and-build-classifier
|
(deftest make-and-build-clusterer
|
||||||
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
|
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
|
||||||
c (make-clusterer :k-means)]
|
c (make-clusterer :k-means)]
|
||||||
(clusterer-build c ds)
|
(clusterer-build c ds)
|
||||||
(is (= weka.clusterers.SimpleKMeans (class c)))))
|
(is (= weka.clusterers.SimpleKMeans (class c)))))
|
||||||
|
|
||||||
|
(deftest make-clusterer-with-distance
|
||||||
|
(let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})]
|
||||||
|
(is (= weka.core.ManhattanDistance (.getDistanceFunction c)))))
|
||||||
|
|
29
test/clj_ml/distance_functions_test.clj
Normal file
29
test/clj_ml/distance_functions_test.clj
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
(ns clj-ml.distance-functions-test
|
||||||
|
(:use [clj-ml distance-functions] :reload-all)
|
||||||
|
(:use [clojure.test]))
|
||||||
|
|
||||||
|
(deftest make-distance-function-euclidean
|
||||||
|
(let [dist (clj-ml.distance-functions/make-distance-function :euclidean {:attributes [0 1 2 3]})
|
||||||
|
options (.getOptions dist)]
|
||||||
|
(is (= (aget options 0)
|
||||||
|
"-R"))
|
||||||
|
(is (= (aget options 1)
|
||||||
|
"1,2,3,4"))))
|
||||||
|
|
||||||
|
(deftest make-distance-function-manhattan
|
||||||
|
(let [dist (clj-ml.distance-functions/make-distance-function :manhattan {:attributes [0 1 2 3]})
|
||||||
|
options (.getOptions dist)]
|
||||||
|
(is (= (aget options 0)
|
||||||
|
"-R"))
|
||||||
|
(is (= (aget options 1)
|
||||||
|
"1,2,3,4"))))
|
||||||
|
|
||||||
|
(deftest make-distance-function-chebyshev
|
||||||
|
(let [dist (clj-ml.distance-functions/make-distance-function :chebyshev {:attributes [0 1 2 3]})
|
||||||
|
options (.getOptions dist)]
|
||||||
|
(is (= (aget options 0)
|
||||||
|
"-R"))
|
||||||
|
(is (= (aget options 1)
|
||||||
|
"1,2,3,4"))))
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue