Updateable clusterers

This commit is contained in:
Antonio Garrote 2010-03-03 16:37:06 +01:00
parent 2670b037b2
commit b13fd3b4b4
3 changed files with 56 additions and 5 deletions

View file

@ -105,7 +105,7 @@
classifier)))
(defn classifier-update
"If the classifier is updatable it updates the classifier with the given instance or set of instances"
"If the classifier is updateable it updates the classifier with the given instance or set of instances"
([classifier instance-s]
(if (is-dataset? instance-s)
(do (for [i (dataset-seq instance-s)]

View file

@ -7,7 +7,8 @@
(:use [clj-ml utils data distance-functions]
[incanter charts])
(:import (java.util Date Random)
(weka.clusterers ClusterEvaluation SimpleKMeans)))
(weka.clusterers ClusterEvaluation SimpleKMeans Cobweb)))
;; Setting up clusterer options
@ -29,6 +30,16 @@
cols-val)]
(into-array cols-val-a))))
(defmethod make-clusterer-options :cobweb
([kind map]
(let [cols-val-a (check-option-values {:acuity "-A"
:cutoff "-C"
:random-seed "-S"}
map
[""])]
(into-array cols-val-a))))
;; Building clusterers
(defmacro make-clusterer-m
@ -38,6 +49,7 @@
opts# (make-clusterer-options ~kind options-read#)]
(.setOptions clusterer# opts#)
(when (not (empty? (get options-read# :distance-function)))
;; We have to setup a different distance function
(let [dist# (get options-read# :distance-function)
real-dist# (if (map? dist#)
(make-distance-function (first (keys dist#))
@ -55,6 +67,10 @@
([kind & options]
(make-clusterer-m kind SimpleKMeans options)))
(defmethod make-clusterer :cobweb
([kind & options]
(make-clusterer-m kind Cobweb options)))
;; Clustering data
@ -63,6 +79,18 @@
([clusterer dataset]
(.buildClusterer clusterer dataset)))
(defn clusterer-update
"If the clusterer is updateable it updates the cluster with the given instance or set of instances"
([clusterer instance-s]
(if (is-dataset? instance-s)
(do (for [i (dataset-seq instance-s)]
(.updateClusterer clusterer i))
(.updateFinished clusterer)
clusterer)
(do (.updateClusterer clusterer instance-s)
(.updateFinished clusterer)
clusterer))))
;; Retrieving information from a clusterer
(defmulti clusterer-info
@ -74,7 +102,9 @@
"Accepts a k-means clusterer
Returns a map with:
:number-clusters The number of clusters in the clusterer
:centroids Map with the identifier and the centroid values for each cluster"
:centroids Map with the identifier and the centroid values for each cluster
:cluster-sizes Number of data points classified in each cluster
:squared-error Minimized squared error"
{:number-clusters (.numberOfClusters clusterer)
:centroids (second
(reduce (fn [acum item]
@ -96,7 +126,7 @@
;; Evaluating clusterers
(defn- collect-evaluation-results
"Collects all the statistics from the evaluation of a classifier"
"Collects all the statistics from the evaluation of a clusterer"
([evaluation]
(do
(println "hola?")
@ -121,9 +151,9 @@
evl))]
(.evaluateClusterer evaluation test-data)
(println (.clusterResultsToString evaluation))
; evaluation)))
(collect-evaluation-results evaluation))))
;; Clustering collections
(defn clusterer-cluster

View file

@ -36,3 +36,24 @@
(deftest make-clusterer-with-distance
(let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})]
(is (= weka.core.ManhattanDistance (class (.getDistanceFunction c))))))
(deftest test-make-cobweb
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
c (make-clusterer :cobweb)]
(clusterer-build c ds)
(is true)))
(deftest test-update-clusterer-cobweb
(let [ds (make-dataset :test [:a :b] [])
c (make-clusterer :cobweb)]
(clusterer-build c ds)
(clusterer-update c (clj-ml.data/make-instance ds [1 2]))
(is true)))
(deftest test-update-clusterer-cobweb-many-instances
(let [ds (make-dataset :test [:a :b] [])
c (make-clusterer :cobweb)
to-update (make-dataset :test [:a :b] [[1 2] [3 4]])]
(clusterer-build c ds)
(clusterer-update c to-update)
(is true)))