Distance functions and distance function parameters for clusterers

This commit is contained in:
Antonio Garrote 2010-03-02 22:47:35 +01:00
parent 00ff6b1bc4
commit c37d36e5d3
4 changed files with 93 additions and 2 deletions

View file

@ -4,7 +4,7 @@
;;
(ns clj-ml.clusterers
(:use [clj-ml utils data]
(:use [clj-ml utils data distance-functions]
[incanter charts])
(:import (java.util Date Random)
(weka.clusterers ClusterEvaluation SimpleKMeans)))
@ -37,6 +37,13 @@
clusterer# (new ~clusterer-class)
opts# (make-clusterer-options ~kind options-read#)]
(.setOptions clusterer# opts#)
(when (not (empty? (get options-read# :distance-function)))
(let [dist# (get options-read# :distance-function)
real-dist# (if (map? dist#)
(make-distance-function (first (keys dist#))
(first (vals dist#)))
dist#)]
(.setDistanceFunction clusterer# real-dist#)))
clusterer#)))
(defmulti make-clusterer

View file

@ -0,0 +1,51 @@
;;
;; Distance functions
;; @author Antonio Garrote
;;
(ns clj-ml.distance-functions
(:use [clj-ml utils data])
(:import (weka.core EuclideanDistance ManhattanDistance ChebyshevDistance)))
;; Setting up clusterer options
(defn- make-distance-function-options
"Creates ther right parameters for a distance-function"
([map]
(let [cols (get map :attributes)
pre-cols (reduce #(str %1 "," (+ %2 1)) "" cols)
cols-val-a ["-R" (.substring pre-cols 1 (.length pre-cols))]
cols-val-b (check-options {:invert "-V"
:no-normalization "-D"}
map
cols-val-a)]
(into-array cols-val-b))))
(defmulti make-distance-function
"Creates a new distance function"
(fn [kind & options] kind))
(defmethod make-distance-function :euclidean
([kind & options]
(let [dist (new EuclideanDistance)
opts (make-distance-function-options (first-or-default options {}))]
(.setOptions dist opts)
dist)))
(defmethod make-distance-function :manhattan
([kind & options]
(let [dist (new ManhattanDistance)
opts (make-distance-function-options (first-or-default options {}))]
(.setOptions dist opts)
dist)))
(defmethod make-distance-function :chebyshev
([kind & options]
(let [dist (new ChebyshevDistance)
opts (make-distance-function-options (first-or-default options {}))]
(.setOptions dist opts)
dist)))

View file

@ -27,8 +27,12 @@
"1"))))
(deftest make-and-build-classifier
(deftest make-and-build-clusterer
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
c (make-clusterer :k-means)]
(clusterer-build c ds)
(is (= weka.clusterers.SimpleKMeans (class c)))))
(deftest make-clusterer-with-distance
(let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})]
(is (= weka.core.ManhattanDistance (.getDistanceFunction c)))))

View file

@ -0,0 +1,29 @@
(ns clj-ml.distance-functions-test
(:use [clj-ml distance-functions] :reload-all)
(:use [clojure.test]))
(deftest make-distance-function-euclidean
(let [dist (clj-ml.distance-functions/make-distance-function :euclidean {:attributes [0 1 2 3]})
options (.getOptions dist)]
(is (= (aget options 0)
"-R"))
(is (= (aget options 1)
"1,2,3,4"))))
(deftest make-distance-function-manhattan
(let [dist (clj-ml.distance-functions/make-distance-function :manhattan {:attributes [0 1 2 3]})
options (.getOptions dist)]
(is (= (aget options 0)
"-R"))
(is (= (aget options 1)
"1,2,3,4"))))
(deftest make-distance-function-chebyshev
(let [dist (clj-ml.distance-functions/make-distance-function :chebyshev {:attributes [0 1 2 3]})
options (.getOptions dist)]
(is (= (aget options 0)
"-R"))
(is (= (aget options 1)
"1,2,3,4"))))