Updateable clusterers

This commit is contained in:
Antonio Garrote 2010-03-03 16:37:06 +01:00
parent 2670b037b2
commit b13fd3b4b4
3 changed files with 56 additions and 5 deletions

View file

@ -105,7 +105,7 @@
classifier))) classifier)))
(defn classifier-update (defn classifier-update
"If the classifier is updatable it updates the classifier with the given instance or set of instances" "If the classifier is updateable it updates the classifier with the given instance or set of instances"
([classifier instance-s] ([classifier instance-s]
(if (is-dataset? instance-s) (if (is-dataset? instance-s)
(do (for [i (dataset-seq instance-s)] (do (for [i (dataset-seq instance-s)]

View file

@ -7,7 +7,8 @@
(:use [clj-ml utils data distance-functions] (:use [clj-ml utils data distance-functions]
[incanter charts]) [incanter charts])
(:import (java.util Date Random) (:import (java.util Date Random)
(weka.clusterers ClusterEvaluation SimpleKMeans))) (weka.clusterers ClusterEvaluation SimpleKMeans Cobweb)))
;; Setting up clusterer options ;; Setting up clusterer options
@ -29,6 +30,16 @@
cols-val)] cols-val)]
(into-array cols-val-a)))) (into-array cols-val-a))))
(defmethod make-clusterer-options :cobweb
([kind map]
(let [cols-val-a (check-option-values {:acuity "-A"
:cutoff "-C"
:random-seed "-S"}
map
[""])]
(into-array cols-val-a))))
;; Building clusterers ;; Building clusterers
(defmacro make-clusterer-m (defmacro make-clusterer-m
@ -38,6 +49,7 @@
opts# (make-clusterer-options ~kind options-read#)] opts# (make-clusterer-options ~kind options-read#)]
(.setOptions clusterer# opts#) (.setOptions clusterer# opts#)
(when (not (empty? (get options-read# :distance-function))) (when (not (empty? (get options-read# :distance-function)))
;; We have to setup a different distance function
(let [dist# (get options-read# :distance-function) (let [dist# (get options-read# :distance-function)
real-dist# (if (map? dist#) real-dist# (if (map? dist#)
(make-distance-function (first (keys dist#)) (make-distance-function (first (keys dist#))
@ -55,6 +67,10 @@
([kind & options] ([kind & options]
(make-clusterer-m kind SimpleKMeans options))) (make-clusterer-m kind SimpleKMeans options)))
(defmethod make-clusterer :cobweb
([kind & options]
(make-clusterer-m kind Cobweb options)))
;; Clustering data ;; Clustering data
@ -63,6 +79,18 @@
([clusterer dataset] ([clusterer dataset]
(.buildClusterer clusterer dataset))) (.buildClusterer clusterer dataset)))
(defn clusterer-update
"If the clusterer is updateable it updates the cluster with the given instance or set of instances"
([clusterer instance-s]
(if (is-dataset? instance-s)
(do (for [i (dataset-seq instance-s)]
(.updateClusterer clusterer i))
(.updateFinished clusterer)
clusterer)
(do (.updateClusterer clusterer instance-s)
(.updateFinished clusterer)
clusterer))))
;; Retrieving information from a clusterer ;; Retrieving information from a clusterer
(defmulti clusterer-info (defmulti clusterer-info
@ -74,7 +102,9 @@
"Accepts a k-means clusterer "Accepts a k-means clusterer
Returns a map with: Returns a map with:
:number-clusters The number of clusters in the clusterer :number-clusters The number of clusters in the clusterer
:centroids Map with the identifier and the centroid values for each cluster" :centroids Map with the identifier and the centroid values for each cluster
:cluster-sizes Number of data points classified in each cluster
:squared-error Minimized squared error"
{:number-clusters (.numberOfClusters clusterer) {:number-clusters (.numberOfClusters clusterer)
:centroids (second :centroids (second
(reduce (fn [acum item] (reduce (fn [acum item]
@ -96,7 +126,7 @@
;; Evaluating clusterers ;; Evaluating clusterers
(defn- collect-evaluation-results (defn- collect-evaluation-results
"Collects all the statistics from the evaluation of a classifier" "Collects all the statistics from the evaluation of a clusterer"
([evaluation] ([evaluation]
(do (do
(println "hola?") (println "hola?")
@ -121,9 +151,9 @@
evl))] evl))]
(.evaluateClusterer evaluation test-data) (.evaluateClusterer evaluation test-data)
(println (.clusterResultsToString evaluation)) (println (.clusterResultsToString evaluation))
; evaluation)))
(collect-evaluation-results evaluation)))) (collect-evaluation-results evaluation))))
;; Clustering collections ;; Clustering collections
(defn clusterer-cluster (defn clusterer-cluster

View file

@ -36,3 +36,24 @@
(deftest make-clusterer-with-distance (deftest make-clusterer-with-distance
(let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})] (let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})]
(is (= weka.core.ManhattanDistance (class (.getDistanceFunction c)))))) (is (= weka.core.ManhattanDistance (class (.getDistanceFunction c))))))
(deftest test-make-cobweb
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
c (make-clusterer :cobweb)]
(clusterer-build c ds)
(is true)))
(deftest test-update-clusterer-cobweb
(let [ds (make-dataset :test [:a :b] [])
c (make-clusterer :cobweb)]
(clusterer-build c ds)
(clusterer-update c (clj-ml.data/make-instance ds [1 2]))
(is true)))
(deftest test-update-clusterer-cobweb-many-instances
(let [ds (make-dataset :test [:a :b] [])
c (make-clusterer :cobweb)
to-update (make-dataset :test [:a :b] [[1 2] [3 4]])]
(clusterer-build c ds)
(clusterer-update c to-update)
(is true)))