diff --git a/src/clj_ml/clusterers.clj b/src/clj_ml/clusterers.clj index ad457a9..5a4eaba 100644 --- a/src/clj_ml/clusterers.clj +++ b/src/clj_ml/clusterers.clj @@ -4,7 +4,7 @@ ;; (ns clj-ml.clusterers - (:use [clj-ml utils data] + (:use [clj-ml utils data distance-functions] [incanter charts]) (:import (java.util Date Random) (weka.clusterers ClusterEvaluation SimpleKMeans))) @@ -37,6 +37,13 @@ clusterer# (new ~clusterer-class) opts# (make-clusterer-options ~kind options-read#)] (.setOptions clusterer# opts#) + (when (not (empty? (get options-read# :distance-function))) + (let [dist# (get options-read# :distance-function) + real-dist# (if (map? dist#) + (make-distance-function (first (keys dist#)) + (first (vals dist#))) + dist#)] + (.setDistanceFunction clusterer# real-dist#))) clusterer#))) (defmulti make-clusterer diff --git a/src/clj_ml/distance_functions.clj b/src/clj_ml/distance_functions.clj new file mode 100644 index 0000000..4d23e75 --- /dev/null +++ b/src/clj_ml/distance_functions.clj @@ -0,0 +1,51 @@ +;; +;; Distance functions +;; @author Antonio Garrote +;; + +(ns clj-ml.distance-functions + (:use [clj-ml utils data]) + (:import (weka.core EuclideanDistance ManhattanDistance ChebyshevDistance))) + + +;; Setting up clusterer options + +(defn- make-distance-function-options + "Creates ther right parameters for a distance-function" + ([map] + (let [cols (get map :attributes) + pre-cols (reduce #(str %1 "," (+ %2 1)) "" cols) + cols-val-a ["-R" (.substring pre-cols 1 (.length pre-cols))] + cols-val-b (check-options {:invert "-V" + :no-normalization "-D"} + map + cols-val-a)] + (into-array cols-val-b)))) + + +(defmulti make-distance-function + "Creates a new distance function" + (fn [kind & options] kind)) + + +(defmethod make-distance-function :euclidean + ([kind & options] + (let [dist (new EuclideanDistance) + opts (make-distance-function-options (first-or-default options {}))] + (.setOptions dist opts) + dist))) + +(defmethod make-distance-function :manhattan + ([kind & options] + (let [dist (new ManhattanDistance) + opts (make-distance-function-options (first-or-default options {}))] + (.setOptions dist opts) + dist))) + +(defmethod make-distance-function :chebyshev + ([kind & options] + (let [dist (new ChebyshevDistance) + opts (make-distance-function-options (first-or-default options {}))] + (.setOptions dist opts) + dist))) + diff --git a/test/clj_ml/clusterers_test.clj b/test/clj_ml/clusterers_test.clj index d9dd8cc..b80e12d 100644 --- a/test/clj_ml/clusterers_test.clj +++ b/test/clj_ml/clusterers_test.clj @@ -27,8 +27,12 @@ "1")))) -(deftest make-and-build-classifier +(deftest make-and-build-clusterer (let [ds (make-dataset :test [:a :b] [[1 2] [3 4]]) c (make-clusterer :k-means)] (clusterer-build c ds) (is (= weka.clusterers.SimpleKMeans (class c))))) + +(deftest make-clusterer-with-distance + (let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})] + (is (= weka.core.ManhattanDistance (.getDistanceFunction c))))) diff --git a/test/clj_ml/distance_functions_test.clj b/test/clj_ml/distance_functions_test.clj new file mode 100644 index 0000000..289c6df --- /dev/null +++ b/test/clj_ml/distance_functions_test.clj @@ -0,0 +1,29 @@ +(ns clj-ml.distance-functions-test + (:use [clj-ml distance-functions] :reload-all) + (:use [clojure.test])) + +(deftest make-distance-function-euclidean + (let [dist (clj-ml.distance-functions/make-distance-function :euclidean {:attributes [0 1 2 3]}) + options (.getOptions dist)] + (is (= (aget options 0) + "-R")) + (is (= (aget options 1) + "1,2,3,4")))) + +(deftest make-distance-function-manhattan + (let [dist (clj-ml.distance-functions/make-distance-function :manhattan {:attributes [0 1 2 3]}) + options (.getOptions dist)] + (is (= (aget options 0) + "-R")) + (is (= (aget options 1) + "1,2,3,4")))) + +(deftest make-distance-function-chebyshev + (let [dist (clj-ml.distance-functions/make-distance-function :chebyshev {:attributes [0 1 2 3]}) + options (.getOptions dist)] + (is (= (aget options 0) + "-R")) + (is (= (aget options 1) + "1,2,3,4")))) + +