Updateable clusterers
This commit is contained in:
parent
2670b037b2
commit
b13fd3b4b4
3 changed files with 56 additions and 5 deletions
|
@ -105,7 +105,7 @@
|
||||||
classifier)))
|
classifier)))
|
||||||
|
|
||||||
(defn classifier-update
|
(defn classifier-update
|
||||||
"If the classifier is updatable it updates the classifier with the given instance or set of instances"
|
"If the classifier is updateable it updates the classifier with the given instance or set of instances"
|
||||||
([classifier instance-s]
|
([classifier instance-s]
|
||||||
(if (is-dataset? instance-s)
|
(if (is-dataset? instance-s)
|
||||||
(do (for [i (dataset-seq instance-s)]
|
(do (for [i (dataset-seq instance-s)]
|
||||||
|
|
|
@ -7,7 +7,8 @@
|
||||||
(:use [clj-ml utils data distance-functions]
|
(:use [clj-ml utils data distance-functions]
|
||||||
[incanter charts])
|
[incanter charts])
|
||||||
(:import (java.util Date Random)
|
(:import (java.util Date Random)
|
||||||
(weka.clusterers ClusterEvaluation SimpleKMeans)))
|
(weka.clusterers ClusterEvaluation SimpleKMeans Cobweb)))
|
||||||
|
|
||||||
|
|
||||||
;; Setting up clusterer options
|
;; Setting up clusterer options
|
||||||
|
|
||||||
|
@ -29,6 +30,16 @@
|
||||||
cols-val)]
|
cols-val)]
|
||||||
(into-array cols-val-a))))
|
(into-array cols-val-a))))
|
||||||
|
|
||||||
|
|
||||||
|
(defmethod make-clusterer-options :cobweb
|
||||||
|
([kind map]
|
||||||
|
(let [cols-val-a (check-option-values {:acuity "-A"
|
||||||
|
:cutoff "-C"
|
||||||
|
:random-seed "-S"}
|
||||||
|
map
|
||||||
|
[""])]
|
||||||
|
(into-array cols-val-a))))
|
||||||
|
|
||||||
;; Building clusterers
|
;; Building clusterers
|
||||||
|
|
||||||
(defmacro make-clusterer-m
|
(defmacro make-clusterer-m
|
||||||
|
@ -38,6 +49,7 @@
|
||||||
opts# (make-clusterer-options ~kind options-read#)]
|
opts# (make-clusterer-options ~kind options-read#)]
|
||||||
(.setOptions clusterer# opts#)
|
(.setOptions clusterer# opts#)
|
||||||
(when (not (empty? (get options-read# :distance-function)))
|
(when (not (empty? (get options-read# :distance-function)))
|
||||||
|
;; We have to setup a different distance function
|
||||||
(let [dist# (get options-read# :distance-function)
|
(let [dist# (get options-read# :distance-function)
|
||||||
real-dist# (if (map? dist#)
|
real-dist# (if (map? dist#)
|
||||||
(make-distance-function (first (keys dist#))
|
(make-distance-function (first (keys dist#))
|
||||||
|
@ -55,6 +67,10 @@
|
||||||
([kind & options]
|
([kind & options]
|
||||||
(make-clusterer-m kind SimpleKMeans options)))
|
(make-clusterer-m kind SimpleKMeans options)))
|
||||||
|
|
||||||
|
(defmethod make-clusterer :cobweb
|
||||||
|
([kind & options]
|
||||||
|
(make-clusterer-m kind Cobweb options)))
|
||||||
|
|
||||||
|
|
||||||
;; Clustering data
|
;; Clustering data
|
||||||
|
|
||||||
|
@ -63,6 +79,18 @@
|
||||||
([clusterer dataset]
|
([clusterer dataset]
|
||||||
(.buildClusterer clusterer dataset)))
|
(.buildClusterer clusterer dataset)))
|
||||||
|
|
||||||
|
(defn clusterer-update
|
||||||
|
"If the clusterer is updateable it updates the cluster with the given instance or set of instances"
|
||||||
|
([clusterer instance-s]
|
||||||
|
(if (is-dataset? instance-s)
|
||||||
|
(do (for [i (dataset-seq instance-s)]
|
||||||
|
(.updateClusterer clusterer i))
|
||||||
|
(.updateFinished clusterer)
|
||||||
|
clusterer)
|
||||||
|
(do (.updateClusterer clusterer instance-s)
|
||||||
|
(.updateFinished clusterer)
|
||||||
|
clusterer))))
|
||||||
|
|
||||||
;; Retrieving information from a clusterer
|
;; Retrieving information from a clusterer
|
||||||
|
|
||||||
(defmulti clusterer-info
|
(defmulti clusterer-info
|
||||||
|
@ -74,7 +102,9 @@
|
||||||
"Accepts a k-means clusterer
|
"Accepts a k-means clusterer
|
||||||
Returns a map with:
|
Returns a map with:
|
||||||
:number-clusters The number of clusters in the clusterer
|
:number-clusters The number of clusters in the clusterer
|
||||||
:centroids Map with the identifier and the centroid values for each cluster"
|
:centroids Map with the identifier and the centroid values for each cluster
|
||||||
|
:cluster-sizes Number of data points classified in each cluster
|
||||||
|
:squared-error Minimized squared error"
|
||||||
{:number-clusters (.numberOfClusters clusterer)
|
{:number-clusters (.numberOfClusters clusterer)
|
||||||
:centroids (second
|
:centroids (second
|
||||||
(reduce (fn [acum item]
|
(reduce (fn [acum item]
|
||||||
|
@ -96,7 +126,7 @@
|
||||||
;; Evaluating clusterers
|
;; Evaluating clusterers
|
||||||
|
|
||||||
(defn- collect-evaluation-results
|
(defn- collect-evaluation-results
|
||||||
"Collects all the statistics from the evaluation of a classifier"
|
"Collects all the statistics from the evaluation of a clusterer"
|
||||||
([evaluation]
|
([evaluation]
|
||||||
(do
|
(do
|
||||||
(println "hola?")
|
(println "hola?")
|
||||||
|
@ -121,9 +151,9 @@
|
||||||
evl))]
|
evl))]
|
||||||
(.evaluateClusterer evaluation test-data)
|
(.evaluateClusterer evaluation test-data)
|
||||||
(println (.clusterResultsToString evaluation))
|
(println (.clusterResultsToString evaluation))
|
||||||
; evaluation)))
|
|
||||||
(collect-evaluation-results evaluation))))
|
(collect-evaluation-results evaluation))))
|
||||||
|
|
||||||
|
|
||||||
;; Clustering collections
|
;; Clustering collections
|
||||||
|
|
||||||
(defn clusterer-cluster
|
(defn clusterer-cluster
|
||||||
|
|
|
@ -36,3 +36,24 @@
|
||||||
(deftest make-clusterer-with-distance
|
(deftest make-clusterer-with-distance
|
||||||
(let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})]
|
(let [c (clj-ml.clusterers/make-clusterer :k-means {:distance-function {:manhattan {:attributes [0 1 2]}}})]
|
||||||
(is (= weka.core.ManhattanDistance (class (.getDistanceFunction c))))))
|
(is (= weka.core.ManhattanDistance (class (.getDistanceFunction c))))))
|
||||||
|
|
||||||
|
(deftest test-make-cobweb
|
||||||
|
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
|
||||||
|
c (make-clusterer :cobweb)]
|
||||||
|
(clusterer-build c ds)
|
||||||
|
(is true)))
|
||||||
|
|
||||||
|
(deftest test-update-clusterer-cobweb
|
||||||
|
(let [ds (make-dataset :test [:a :b] [])
|
||||||
|
c (make-clusterer :cobweb)]
|
||||||
|
(clusterer-build c ds)
|
||||||
|
(clusterer-update c (clj-ml.data/make-instance ds [1 2]))
|
||||||
|
(is true)))
|
||||||
|
|
||||||
|
(deftest test-update-clusterer-cobweb-many-instances
|
||||||
|
(let [ds (make-dataset :test [:a :b] [])
|
||||||
|
c (make-clusterer :cobweb)
|
||||||
|
to-update (make-dataset :test [:a :b] [[1 2] [3 4]])]
|
||||||
|
(clusterer-build c ds)
|
||||||
|
(clusterer-update c to-update)
|
||||||
|
(is true)))
|
||||||
|
|
Loading…
Reference in a new issue