More functions for working with clusters and display of cluster and k-means centroids
This commit is contained in:
parent
5d6d85fc0e
commit
fba9f97845
4 changed files with 154 additions and 15 deletions
|
@ -4,9 +4,10 @@
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(ns clj-ml.clusterers
|
(ns clj-ml.clusterers
|
||||||
(:use [clj-ml utils data])
|
(:use [clj-ml utils data ui]
|
||||||
|
[incanter charts])
|
||||||
(:import (java.util Date Random)
|
(:import (java.util Date Random)
|
||||||
(weka.clusterers SimpleKMeans)))
|
(weka.clusterers ClusterEvaluation SimpleKMeans)))
|
||||||
|
|
||||||
;; Setting up clusterer options
|
;; Setting up clusterer options
|
||||||
|
|
||||||
|
@ -54,3 +55,95 @@
|
||||||
"Applies a clustering algorithm to a set of data"
|
"Applies a clustering algorithm to a set of data"
|
||||||
([clusterer dataset]
|
([clusterer dataset]
|
||||||
(.buildClusterer clusterer dataset)))
|
(.buildClusterer clusterer dataset)))
|
||||||
|
|
||||||
|
;; Retrieving information from a clusterer
|
||||||
|
|
||||||
|
(defmulti clusterer-info
|
||||||
|
"Retrieves the data from a cluster, these data are clustering-algorithm dependent"
|
||||||
|
(fn [clusterer] (class clusterer)))
|
||||||
|
|
||||||
|
(defmethod clusterer-info SimpleKMeans
|
||||||
|
([clusterer]
|
||||||
|
"Accepts a k-means clusterer
|
||||||
|
Returns a map with:
|
||||||
|
:number-clusters The number of clusters in the clusterer
|
||||||
|
:centroids Map with the identifier and the centroid values for each cluster"
|
||||||
|
{:number-clusters (.numberOfClusters clusterer)
|
||||||
|
:centroids (second
|
||||||
|
(reduce (fn [acum item]
|
||||||
|
(let [counter (first acum)
|
||||||
|
map (second acum)]
|
||||||
|
(list (+ counter 1)
|
||||||
|
(conj map {counter item}))))
|
||||||
|
(list 0 {})
|
||||||
|
(dataset-seq (.getClusterCentroids clusterer))))
|
||||||
|
:cluster-sizes (let [sizes (.getClusterSizes clusterer)]
|
||||||
|
(reduce (fn [acum item]
|
||||||
|
(conj acum {item (aget sizes item)}))
|
||||||
|
{}
|
||||||
|
(range 0 (.numberOfClusters clusterer))))
|
||||||
|
:squared-error (.getSquaredError clusterer)}))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;; Evaluating clusterers
|
||||||
|
|
||||||
|
(defmulti clusterer-evaluate
|
||||||
|
"Evaluetes a trained clusterer using the provided dataset or cross-validation"
|
||||||
|
(fn [clusterer mode & evaluation-data] mode))
|
||||||
|
|
||||||
|
(defmethod clusterer-evaluate :dataset
|
||||||
|
([clusterer mode & evaluation-data]
|
||||||
|
(let [test-data (nth evaluation-data 0)
|
||||||
|
evaluation (do (let [evl (new ClusterEvaluation)]
|
||||||
|
(.setClusterer evl clusterer)
|
||||||
|
evl))]
|
||||||
|
(.evaluateClusterer evaluation test-data)
|
||||||
|
(println (.clusterResultsToString evaluation))
|
||||||
|
evaluation)))
|
||||||
|
|
||||||
|
;; Clustering collections
|
||||||
|
|
||||||
|
(defn clusterer-cluster
|
||||||
|
"Add a class to each instance according to the provided clusterer"
|
||||||
|
([clusterer dataset]
|
||||||
|
(let [attributes (conj (clj-ml.data/dataset-attributes-definition dataset)
|
||||||
|
{:class (map #(keyword (str %1)) (range 0 (.numberOfClusters clusterer)))})
|
||||||
|
clustered (map (fn [i] (conj (instance-to-vector i)
|
||||||
|
(keyword (str (.clusterInstance clusterer i)))))
|
||||||
|
(dataset-seq dataset))
|
||||||
|
nds (make-dataset (keyword (str "clustered " (dataset-name dataset)))
|
||||||
|
attributes
|
||||||
|
clustered)]
|
||||||
|
(dataset-set-class nds (- (count attributes) 1))
|
||||||
|
nds)))
|
||||||
|
|
||||||
|
;; visualization
|
||||||
|
|
||||||
|
(defmulti clusterer-display-for-attributes
|
||||||
|
(fn [clusterer dataset attribute-x attribute-y] (class clusterer)))
|
||||||
|
|
||||||
|
(defmethod clusterer-display-for-attributes SimpleKMeans
|
||||||
|
([clusterer dataset attribute-x attribute-y & visualization-options]
|
||||||
|
(let [attr-x (if (keyword? attribute-x) (instance-index-attr dataset attribute-x) attribute-x)
|
||||||
|
attr-y (if (keyword? attribute-y) (instance-index-attr dataset attribute-y) attribute-y)
|
||||||
|
opts (first-or-default visualization-options {})
|
||||||
|
display? (if (= (get visualization-options :visualize) false)
|
||||||
|
false
|
||||||
|
true)
|
||||||
|
true-opts (conj opts {:visualize false})
|
||||||
|
plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts)
|
||||||
|
info (clusterer-info clusterer)
|
||||||
|
centroids (:centroids info)]
|
||||||
|
(do
|
||||||
|
(loop [ks (keys centroids)]
|
||||||
|
(if (empty? ks)
|
||||||
|
(if display?
|
||||||
|
(visualize-plot plot)
|
||||||
|
plot)
|
||||||
|
(let [k (first ks)
|
||||||
|
centroid (get centroids k)
|
||||||
|
val-x (instance-value-at centroid attr-x)
|
||||||
|
val-y (instance-value-at centroid attr-y)]
|
||||||
|
(add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")"))
|
||||||
|
(recur (rest ks)))))))))
|
||||||
|
|
|
@ -4,14 +4,14 @@
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(ns clj-ml.data
|
(ns clj-ml.data
|
||||||
(:use [clj-ml utils])
|
(:use [clj-ml utils ui])
|
||||||
(:import (weka.core Instance Instances FastVector Attribute)
|
(:import (weka.core Instance Instances FastVector Attribute)
|
||||||
(cljml ClojureInstances)))
|
(cljml ClojureInstances)))
|
||||||
|
|
||||||
|
|
||||||
;; Construction of individual data and datasets
|
;; Construction of individual data and datasets
|
||||||
|
|
||||||
(defn attribute-name-at- [dataset-or-instance pos]
|
(defn attribute-name-at [dataset-or-instance pos]
|
||||||
(let [class-attr (.attribute dataset-or-instance pos)]
|
(let [class-attr (.attribute dataset-or-instance pos)]
|
||||||
(.name class-attr)))
|
(.name class-attr)))
|
||||||
|
|
||||||
|
@ -21,10 +21,17 @@
|
||||||
(loop [c 0]
|
(loop [c 0]
|
||||||
(if (= c max)
|
(if (= c max)
|
||||||
(throw (.Exception (str "Attribute " attrs " not found")))
|
(throw (.Exception (str "Attribute " attrs " not found")))
|
||||||
(if (= attrs (attribute-name-at- dataset-or-instance c))
|
(if (= attrs (attribute-name-at dataset-or-instance c))
|
||||||
c
|
c
|
||||||
(recur (+ c 1 )))))))
|
(recur (+ c 1 )))))))
|
||||||
|
|
||||||
|
(defn dataset-index-attr [dataset attr]
|
||||||
|
(index-attr dataset attr))
|
||||||
|
|
||||||
|
(defn instance-index-attr [instance attr]
|
||||||
|
(index-attr instance attr))
|
||||||
|
|
||||||
|
|
||||||
(defn make-instance
|
(defn make-instance
|
||||||
"Creates a new dataset instance from a vector"
|
"Creates a new dataset instance from a vector"
|
||||||
([dataset vector]
|
([dataset vector]
|
||||||
|
@ -153,6 +160,10 @@
|
||||||
(+ c 1)))
|
(+ c 1)))
|
||||||
acum))))
|
acum))))
|
||||||
|
|
||||||
|
(defn dataset-get-class [dataset]
|
||||||
|
"Returns the index of the class attribute for this dataset"
|
||||||
|
(.classIndex dataset))
|
||||||
|
|
||||||
;; manipulation of instances
|
;; manipulation of instances
|
||||||
|
|
||||||
(defn instance-set-class [instance pos]
|
(defn instance-set-class [instance pos]
|
||||||
|
@ -256,3 +267,19 @@
|
||||||
"Removes and returns the first instance in the dataset"
|
"Removes and returns the first instance in the dataset"
|
||||||
[dataset]
|
[dataset]
|
||||||
(dataset-extract-at dataset 0))
|
(dataset-extract-at dataset 0))
|
||||||
|
|
||||||
|
;; visualization
|
||||||
|
|
||||||
|
(defn dataset-display-numeric-attributes [dataset attributes & visualization-options]
|
||||||
|
"Displays the provided attributes into a box plot"
|
||||||
|
(let [attr (map #(if (keyword? %1) (index-attr dataset %1) %1) attributes)
|
||||||
|
options (first-or-default visualization-options {})]
|
||||||
|
(display-object :dataset :boxplot {:dataset dataset :cols attr} options)))
|
||||||
|
|
||||||
|
(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options]
|
||||||
|
"Displays how a pair of attributes are distributed for each class"
|
||||||
|
(let [attr-x (if (keyword? attribute-x) (index-attr dataset attribute-x) attribute-x)
|
||||||
|
attr-y (if (keyword? attribute-y) (index-attr dataset attribute-y) attribute-y)
|
||||||
|
opts (first-or-default visualization-options {})
|
||||||
|
class-index (dataset-get-class dataset)]
|
||||||
|
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts)))
|
||||||
|
|
|
@ -7,6 +7,12 @@
|
||||||
(:use (incanter core stats charts)
|
(:use (incanter core stats charts)
|
||||||
(clj-ml data utils)))
|
(clj-ml data utils)))
|
||||||
|
|
||||||
|
(defn visualize-plot [plot]
|
||||||
|
"Prepare a plot to be displayed"
|
||||||
|
(do (clear-background plot)
|
||||||
|
(view plot)
|
||||||
|
plot))
|
||||||
|
|
||||||
(defmulti display-object
|
(defmulti display-object
|
||||||
"Displays some kind of clj-ml object"
|
"Displays some kind of clj-ml object"
|
||||||
(fn [kind chart data opts] [kind chart]))
|
(fn [kind chart data opts] [kind chart]))
|
||||||
|
@ -24,17 +30,18 @@
|
||||||
{}
|
{}
|
||||||
cols)
|
cols)
|
||||||
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Box Plot"))
|
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Box Plot"))
|
||||||
legend (or (get display-opts :legend) true)]
|
legend (if (nil? (get display-opts :legend)) true (get display-opts :legend))
|
||||||
|
should-display (get display-opts :visualize)]
|
||||||
(loop [plot nil
|
(loop [plot nil
|
||||||
ks (keys vals-map)]
|
ks (keys vals-map)]
|
||||||
(if (empty? ks)
|
(if (empty? ks)
|
||||||
(do
|
(if should-display
|
||||||
(view plot)
|
(visualize-plot plot)
|
||||||
plot)
|
plot)
|
||||||
(let [this-val (get vals-map (first ks))
|
(let [this-val (get vals-map (first ks))
|
||||||
the-plot (if (nil? plot)
|
the-plot (if (nil? plot)
|
||||||
(box-plot this-val :title title)
|
(box-plot this-val :title title :legend legend :series-label (key-to-str (first ks)))
|
||||||
(do (add-box-plot plot this-val)
|
(do (add-box-plot plot this-val :series-label (key-to-str (first ks)))
|
||||||
plot))]
|
plot))]
|
||||||
(recur the-plot (rest ks))))))))
|
(recur the-plot (rest ks))))))))
|
||||||
|
|
||||||
|
@ -73,12 +80,13 @@
|
||||||
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot ("
|
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot ("
|
||||||
(key-to-str (nth cols-names col-0)) " vs "
|
(key-to-str (nth cols-names col-0)) " vs "
|
||||||
(key-to-str (nth cols-names col-1)) ")"))
|
(key-to-str (nth cols-names col-1)) ")"))
|
||||||
legend (or (get display-opts :legend) true)]
|
legend (if (nil? (get display-opts :legend)) true (get display-opts :legend))
|
||||||
|
should-display (get display-opts :visualize)]
|
||||||
(loop [plot nil
|
(loop [plot nil
|
||||||
ks (keys folded-points)]
|
ks (keys folded-points)]
|
||||||
(if (empty? ks)
|
(if (empty? ks)
|
||||||
(do
|
(if should-display
|
||||||
(view plot)
|
(visualize-plot plot)
|
||||||
plot)
|
plot)
|
||||||
(let [this-vals (get folded-points (first ks))
|
(let [this-vals (get folded-points (first ks))
|
||||||
this-val-0 (get this-vals col-0)
|
this-val-0 (get this-vals col-0)
|
||||||
|
@ -87,8 +95,10 @@
|
||||||
(scatter-plot this-val-0 this-val-1
|
(scatter-plot this-val-0 this-val-1
|
||||||
:title title
|
:title title
|
||||||
:x-label (key-to-str (nth cols-names col-0))
|
:x-label (key-to-str (nth cols-names col-0))
|
||||||
:y-label (key-to-str (nth cols-names col-1)))
|
:y-label (key-to-str (nth cols-names col-1))
|
||||||
(do (add-points plot this-val-0 this-val-1)
|
:series-label (key-to-str (first ks))
|
||||||
|
:legend legend)
|
||||||
|
(do (add-points plot this-val-0 this-val-1 :series-label (key-to-str (first ks)))
|
||||||
plot))]
|
plot))]
|
||||||
(recur the-plot (rest ks))))))))
|
(recur the-plot (rest ks))))))))
|
||||||
|
|
||||||
|
@ -97,6 +107,8 @@
|
||||||
|
|
||||||
;(defn load-test-from-slime []
|
;(defn load-test-from-slime []
|
||||||
; (do
|
; (do
|
||||||
|
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/joda-time-1.6.jar")
|
||||||
|
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/opencsv-2.0.1.jar")
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/classes/")
|
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/classes/")
|
||||||
; (add-classpath "file:///Applications/weka-3-6-2/weka.jar")
|
; (add-classpath "file:///Applications/weka-3-6-2/weka.jar")
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/src/")
|
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/src/")
|
||||||
|
|
|
@ -17,6 +17,13 @@
|
||||||
(let [sk (str k)]
|
(let [sk (str k)]
|
||||||
(.substring sk 1)))))
|
(.substring sk 1)))))
|
||||||
|
|
||||||
|
(defn first-or-default
|
||||||
|
"Returns the first element in the collection or the default value"
|
||||||
|
([col default]
|
||||||
|
(if (empty? col)
|
||||||
|
default
|
||||||
|
(first col))))
|
||||||
|
|
||||||
;; Manipulation of array of options
|
;; Manipulation of array of options
|
||||||
|
|
||||||
(defn check-option [opts val flag map]
|
(defn check-option [opts val flag map]
|
||||||
|
|
Loading…
Reference in a new issue