From b13fd3b4b4d9abf4c6835bc9554efed5d2d7ba25 Mon Sep 17 00:00:00 2001 From: Antonio Garrote Date: Wed, 3 Mar 2010 16:37:06 +0100 Subject: [PATCH] Updateable clusterers --- src/clj_ml/classifiers.clj | 2 +- src/clj_ml/clusterers.clj | 38 +++++++++++++++++++++++++++++---- test/clj_ml/clusterers_test.clj | 21 ++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/src/clj_ml/classifiers.clj b/src/clj_ml/classifiers.clj index 2d07690..91863c7 100644 --- a/src/clj_ml/classifiers.clj +++ b/src/clj_ml/classifiers.clj @@ -105,7 +105,7 @@ classifier))) (defn classifier-update - "If the classifier is updatable it updates the classifier with the given instance or set of instances" + "If the classifier is updateable it updates the classifier with the given instance or set of instances" ([classifier instance-s] (if (is-dataset? instance-s) (do (for [i (dataset-seq instance-s)] diff --git a/src/clj_ml/clusterers.clj b/src/clj_ml/clusterers.clj index 5a4eaba..b1d89af 100644 --- a/src/clj_ml/clusterers.clj +++ b/src/clj_ml/clusterers.clj @@ -7,7 +7,8 @@ (:use [clj-ml utils data distance-functions] [incanter charts]) (:import (java.util Date Random) - (weka.clusterers ClusterEvaluation SimpleKMeans))) + (weka.clusterers ClusterEvaluation SimpleKMeans Cobweb))) + ;; Setting up clusterer options @@ -29,6 +30,16 @@ cols-val)] (into-array cols-val-a)))) + +(defmethod make-clusterer-options :cobweb + ([kind map] + (let [cols-val-a (check-option-values {:acuity "-A" + :cutoff "-C" + :random-seed "-S"} + map + [""])] + (into-array cols-val-a)))) + ;; Building clusterers (defmacro make-clusterer-m @@ -38,6 +49,7 @@ opts# (make-clusterer-options ~kind options-read#)] (.setOptions clusterer# opts#) (when (not (empty? (get options-read# :distance-function))) + ;; We have to setup a different distance function (let [dist# (get options-read# :distance-function) real-dist# (if (map? dist#) (make-distance-function (first (keys dist#)) @@ -55,6 +67,10 @@ ([kind & options] (make-clusterer-m kind SimpleKMeans options))) +(defmethod make-clusterer :cobweb + ([kind & options] + (make-clusterer-m kind Cobweb options))) + ;; Clustering data @@ -63,6 +79,18 @@ ([clusterer dataset] (.buildClusterer clusterer dataset))) +(defn clusterer-update + "If the clusterer is updateable it updates the cluster with the given instance or set of instances" + ([clusterer instance-s] + (if (is-dataset? instance-s) + (do (for [i (dataset-seq instance-s)] + (.updateClusterer clusterer i)) + (.updateFinished clusterer) + clusterer) + (do (.updateClusterer clusterer instance-s) + (.updateFinished clusterer) + clusterer)))) + ;; Retrieving information from a clusterer (defmulti clusterer-info @@ -74,7 +102,9 @@ "Accepts a k-means clusterer Returns a map with: :number-clusters The number of clusters in the clusterer - :centroids Map with the identifier and the centroid values for each cluster" + :centroids Map with the identifier and the centroid values for each cluster + :cluster-sizes Number of data points classified in each cluster + :squared-error Minimized squared error" {:number-clusters (.numberOfClusters clusterer) :centroids (second (reduce (fn [acum item] @@ -96,7 +126,7 @@ ;; Evaluating clusterers (defn- collect-evaluation-results - "Collects all the statistics from the evaluation of a classifier" + "Collects all the statistics from the evaluation of a clusterer" ([evaluation] (do (println "hola?") @@ -121,9 +151,9 @@ evl))] (.evaluateClusterer evaluation test-data) (println (.clusterResultsToString evaluation)) -; evaluation))) (collect-evaluation-results evaluation)))) + ;; Clustering collections (defn clusterer-cluster diff --git a/test/clj_ml/clusterers_test.clj b/test/clj_ml/clusterers_test.clj index 63f321b..0c1cdef 100644 --- a/test/clj_ml/clusterers_test.clj +++ b/test/clj_ml/clusterers_test.clj @@ -36,3 +36,24 @@ (deftest make-clusterer-with-distance (let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})] (is (= weka.core.ManhattanDistance (class (.getDistanceFunction c)))))) + +(deftest test-make-cobweb + (let [ds (make-dataset :test [:a :b] [[1 2] [3 4]]) + c (make-clusterer :cobweb)] + (clusterer-build c ds) + (is true))) + +(deftest test-update-clusterer-cobweb + (let [ds (make-dataset :test [:a :b] []) + c (make-clusterer :cobweb)] + (clusterer-build c ds) + (clusterer-update c (clj-ml.data/make-instance ds [1 2])) + (is true))) + +(deftest test-update-clusterer-cobweb-many-instances + (let [ds (make-dataset :test [:a :b] []) + c (make-clusterer :cobweb) + to-update (make-dataset :test [:a :b] [[1 2] [3 4]])] + (clusterer-build c ds) + (clusterer-update c to-update) + (is true)))