Visualization of more objects, clustering of collections and good clusterers evaluation
This commit is contained in:
parent
fba9f97845
commit
00ff6b1bc4
6 changed files with 109 additions and 75 deletions
|
@ -103,20 +103,6 @@
|
||||||
|
|
||||||
;; Evaluating classifiers
|
;; Evaluating classifiers
|
||||||
|
|
||||||
(defn- try-metric [f]
|
|
||||||
(try (f)
|
|
||||||
(catch Exception ex {:nan (.getMessage ex)})))
|
|
||||||
|
|
||||||
(defn- try-multiple-values-metric [class-values f]
|
|
||||||
(loop [acum {}
|
|
||||||
ks (keys class-values)]
|
|
||||||
(if (empty? ks)
|
|
||||||
acum
|
|
||||||
(let [index (get class-values (first ks))
|
|
||||||
val (f index)]
|
|
||||||
(recur (conj acum {(first ks) val})
|
|
||||||
(rest ks))))))
|
|
||||||
|
|
||||||
(defn- collect-evaluation-results
|
(defn- collect-evaluation-results
|
||||||
"Collects all the statistics from the evaluation of a classifier"
|
"Collects all the statistics from the evaluation of a classifier"
|
||||||
([class-values evaluation]
|
([class-values evaluation]
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(ns clj-ml.clusterers
|
(ns clj-ml.clusterers
|
||||||
(:use [clj-ml utils data ui]
|
(:use [clj-ml utils data]
|
||||||
[incanter charts])
|
[incanter charts])
|
||||||
(:import (java.util Date Random)
|
(:import (java.util Date Random)
|
||||||
(weka.clusterers ClusterEvaluation SimpleKMeans)))
|
(weka.clusterers ClusterEvaluation SimpleKMeans)))
|
||||||
|
@ -88,8 +88,22 @@
|
||||||
|
|
||||||
;; Evaluating clusterers
|
;; Evaluating clusterers
|
||||||
|
|
||||||
|
(defn- collect-evaluation-results
|
||||||
|
"Collects all the statistics from the evaluation of a classifier"
|
||||||
|
([evaluation]
|
||||||
|
(do
|
||||||
|
(println "hola?")
|
||||||
|
(println (.clusterResultsToString evaluation))
|
||||||
|
{:classes-to-clusters (try-metric
|
||||||
|
#(reduce (fn [acum i] (conj acum {i (aget (.getClassesToClusters evaluation) i)}))
|
||||||
|
{}
|
||||||
|
(range 0 (.getNumClusters evaluation))))
|
||||||
|
:log-likelihood (try-metric #(.getLogLikelihood evaluation))
|
||||||
|
:evaluation-object evaluation})))
|
||||||
|
|
||||||
|
|
||||||
(defmulti clusterer-evaluate
|
(defmulti clusterer-evaluate
|
||||||
"Evaluetes a trained clusterer using the provided dataset or cross-validation"
|
"Evaluates a trained clusterer using the provided dataset or cross-validation"
|
||||||
(fn [clusterer mode & evaluation-data] mode))
|
(fn [clusterer mode & evaluation-data] mode))
|
||||||
|
|
||||||
(defmethod clusterer-evaluate :dataset
|
(defmethod clusterer-evaluate :dataset
|
||||||
|
@ -100,7 +114,8 @@
|
||||||
evl))]
|
evl))]
|
||||||
(.evaluateClusterer evaluation test-data)
|
(.evaluateClusterer evaluation test-data)
|
||||||
(println (.clusterResultsToString evaluation))
|
(println (.clusterResultsToString evaluation))
|
||||||
evaluation)))
|
; evaluation)))
|
||||||
|
(collect-evaluation-results evaluation))))
|
||||||
|
|
||||||
;; Clustering collections
|
;; Clustering collections
|
||||||
|
|
||||||
|
@ -117,33 +132,3 @@
|
||||||
clustered)]
|
clustered)]
|
||||||
(dataset-set-class nds (- (count attributes) 1))
|
(dataset-set-class nds (- (count attributes) 1))
|
||||||
nds)))
|
nds)))
|
||||||
|
|
||||||
;; visualization
|
|
||||||
|
|
||||||
(defmulti clusterer-display-for-attributes
|
|
||||||
(fn [clusterer dataset attribute-x attribute-y] (class clusterer)))
|
|
||||||
|
|
||||||
(defmethod clusterer-display-for-attributes SimpleKMeans
|
|
||||||
([clusterer dataset attribute-x attribute-y & visualization-options]
|
|
||||||
(let [attr-x (if (keyword? attribute-x) (instance-index-attr dataset attribute-x) attribute-x)
|
|
||||||
attr-y (if (keyword? attribute-y) (instance-index-attr dataset attribute-y) attribute-y)
|
|
||||||
opts (first-or-default visualization-options {})
|
|
||||||
display? (if (= (get visualization-options :visualize) false)
|
|
||||||
false
|
|
||||||
true)
|
|
||||||
true-opts (conj opts {:visualize false})
|
|
||||||
plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts)
|
|
||||||
info (clusterer-info clusterer)
|
|
||||||
centroids (:centroids info)]
|
|
||||||
(do
|
|
||||||
(loop [ks (keys centroids)]
|
|
||||||
(if (empty? ks)
|
|
||||||
(if display?
|
|
||||||
(visualize-plot plot)
|
|
||||||
plot)
|
|
||||||
(let [k (first ks)
|
|
||||||
centroid (get centroids k)
|
|
||||||
val-x (instance-value-at centroid attr-x)
|
|
||||||
val-y (instance-value-at centroid attr-y)]
|
|
||||||
(add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")"))
|
|
||||||
(recur (rest ks)))))))))
|
|
||||||
|
|
|
@ -4,11 +4,10 @@
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(ns clj-ml.data
|
(ns clj-ml.data
|
||||||
(:use [clj-ml utils ui])
|
(:use [clj-ml utils])
|
||||||
(:import (weka.core Instance Instances FastVector Attribute)
|
(:import (weka.core Instance Instances FastVector Attribute)
|
||||||
(cljml ClojureInstances)))
|
(cljml ClojureInstances)))
|
||||||
|
|
||||||
|
|
||||||
;; Construction of individual data and datasets
|
;; Construction of individual data and datasets
|
||||||
|
|
||||||
(defn attribute-name-at [dataset-or-instance pos]
|
(defn attribute-name-at [dataset-or-instance pos]
|
||||||
|
@ -267,19 +266,3 @@
|
||||||
"Removes and returns the first instance in the dataset"
|
"Removes and returns the first instance in the dataset"
|
||||||
[dataset]
|
[dataset]
|
||||||
(dataset-extract-at dataset 0))
|
(dataset-extract-at dataset 0))
|
||||||
|
|
||||||
;; visualization
|
|
||||||
|
|
||||||
(defn dataset-display-numeric-attributes [dataset attributes & visualization-options]
|
|
||||||
"Displays the provided attributes into a box plot"
|
|
||||||
(let [attr (map #(if (keyword? %1) (index-attr dataset %1) %1) attributes)
|
|
||||||
options (first-or-default visualization-options {})]
|
|
||||||
(display-object :dataset :boxplot {:dataset dataset :cols attr} options)))
|
|
||||||
|
|
||||||
(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options]
|
|
||||||
"Displays how a pair of attributes are distributed for each class"
|
|
||||||
(let [attr-x (if (keyword? attribute-x) (index-attr dataset attribute-x) attribute-x)
|
|
||||||
attr-y (if (keyword? attribute-y) (index-attr dataset attribute-y) attribute-y)
|
|
||||||
opts (first-or-default visualization-options {})
|
|
||||||
class-index (dataset-get-class dataset)]
|
|
||||||
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts)))
|
|
||||||
|
|
|
@ -4,8 +4,9 @@
|
||||||
;;
|
;;
|
||||||
|
|
||||||
(ns clj-ml.ui
|
(ns clj-ml.ui
|
||||||
(:use (incanter core stats charts)
|
(:use (clj-ml data utils clusterers)
|
||||||
(clj-ml data utils)))
|
(incanter core stats charts)))
|
||||||
|
|
||||||
|
|
||||||
(defn visualize-plot [plot]
|
(defn visualize-plot [plot]
|
||||||
"Prepare a plot to be displayed"
|
"Prepare a plot to be displayed"
|
||||||
|
@ -55,9 +56,10 @@
|
||||||
col-1 (nth cols 1)
|
col-1 (nth cols 1)
|
||||||
group-by (get dataset-opts :group-by)
|
group-by (get dataset-opts :group-by)
|
||||||
cols-names (dataset-attributes-definition dataset)
|
cols-names (dataset-attributes-definition dataset)
|
||||||
group-vals (dataset-values-at dataset group-by)
|
group-vals (if (nil? group-by) {:no-group-by :no-class} (dataset-values-at dataset group-by))
|
||||||
acum-map (reduce (fn [acum group-val]
|
acum-map (reduce (fn [acum group-val]
|
||||||
(conj acum {(first group-val) (reduce (fn [acum x] (conj acum {x []}))
|
(conj acum {(first group-val)
|
||||||
|
(reduce (fn [acum x] (conj acum {x []}))
|
||||||
{}
|
{}
|
||||||
cols)}))
|
cols)}))
|
||||||
{}
|
{}
|
||||||
|
@ -66,15 +68,16 @@
|
||||||
(let [inst (instance-to-vector instance)
|
(let [inst (instance-to-vector instance)
|
||||||
val-0 (nth inst col-0)
|
val-0 (nth inst col-0)
|
||||||
val-1 (nth inst col-1)
|
val-1 (nth inst col-1)
|
||||||
class (nth inst group-by)]
|
class (if (nil? group-by)
|
||||||
|
:no-group-by
|
||||||
|
(nth inst group-by))]
|
||||||
(merge-with
|
(merge-with
|
||||||
(fn [a b] {col-0 (conj (get a col-0)
|
(fn [a b] {col-0 (conj (get a col-0)
|
||||||
(get b col-0))
|
(get b col-0))
|
||||||
col-1 (conj (get a col-1)
|
col-1 (conj (get a col-1)
|
||||||
(get b col-1))})
|
(get b col-1))})
|
||||||
acum
|
acum
|
||||||
{class {col-0 val-0 col-1 val-1}})
|
{class {col-0 val-0 col-1 val-1}})))
|
||||||
))
|
|
||||||
acum-map
|
acum-map
|
||||||
dataseq)
|
dataseq)
|
||||||
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot ("
|
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot ("
|
||||||
|
@ -103,6 +106,66 @@
|
||||||
(recur the-plot (rest ks))))))))
|
(recur the-plot (rest ks))))))))
|
||||||
|
|
||||||
|
|
||||||
|
;; visualization of different objects
|
||||||
|
|
||||||
|
(defn dataset-display-numeric-attributes [dataset attributes & visualization-options]
|
||||||
|
"Displays the provided attributes into a box plot"
|
||||||
|
(let [attr (map #(if (keyword? %1) (index-attr dataset %1) %1) attributes)
|
||||||
|
options-pre (first-or-default visualization-options {})
|
||||||
|
options (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)]
|
||||||
|
(display-object :dataset :boxplot {:dataset dataset :cols attr} options)))
|
||||||
|
|
||||||
|
(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options]
|
||||||
|
"Displays how a pair of attributes are distributed for each class"
|
||||||
|
(let [attr-x (if (keyword? attribute-x) (index-attr dataset attribute-x) attribute-x)
|
||||||
|
attr-y (if (keyword? attribute-y) (index-attr dataset attribute-y) attribute-y)
|
||||||
|
options-pre (first-or-default visualization-options {})
|
||||||
|
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
|
||||||
|
class-index (dataset-get-class dataset)]
|
||||||
|
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts)))
|
||||||
|
|
||||||
|
(defn dataset-display-attributes [dataset attribute-x attribute-y & visualization-options]
|
||||||
|
"Displays the distribution of a set of attributes for a dataset"
|
||||||
|
(let [attr-x (if (keyword? attribute-x) (index-attr dataset attribute-x) attribute-x)
|
||||||
|
attr-y (if (keyword? attribute-y) (index-attr dataset attribute-y) attribute-y)
|
||||||
|
options-pre (first-or-default visualization-options {})
|
||||||
|
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
|
||||||
|
class-index (dataset-get-class dataset)]
|
||||||
|
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y]} opts)))
|
||||||
|
|
||||||
|
|
||||||
|
;; visualization
|
||||||
|
|
||||||
|
(defmulti clusterer-display-for-attributes
|
||||||
|
(fn [clusterer dataset attribute-x attribute-y] (class clusterer)))
|
||||||
|
|
||||||
|
(defmethod clusterer-display-for-attributes SimpleKMeans
|
||||||
|
([clusterer dataset attribute-x attribute-y & visualization-options]
|
||||||
|
(let [attr-x (if (keyword? attribute-x) (instance-index-attr dataset attribute-x) attribute-x)
|
||||||
|
attr-y (if (keyword? attribute-y) (instance-index-attr dataset attribute-y) attribute-y)
|
||||||
|
opts (first-or-default visualization-options {})
|
||||||
|
display? (if (= (get visualization-options :visualize) false)
|
||||||
|
false
|
||||||
|
true)
|
||||||
|
true-opts (conj opts {:visualize false})
|
||||||
|
plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts)
|
||||||
|
info (clusterer-info clusterer)
|
||||||
|
centroids (:centroids info)]
|
||||||
|
(do
|
||||||
|
(loop [ks (keys centroids)]
|
||||||
|
(if (empty? ks)
|
||||||
|
(if display?
|
||||||
|
(visualize-plot plot)
|
||||||
|
plot)
|
||||||
|
(let [k (first ks)
|
||||||
|
centroid (get centroids k)
|
||||||
|
val-x (instance-value-at centroid attr-x)
|
||||||
|
val-y (instance-value-at centroid attr-y)]
|
||||||
|
(add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")"))
|
||||||
|
(recur (rest ks)))))))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Things to load to test this from slime
|
;; Things to load to test this from slime
|
||||||
|
|
||||||
;(defn load-test-from-slime []
|
;(defn load-test-from-slime []
|
||||||
|
|
|
@ -24,6 +24,23 @@
|
||||||
default
|
default
|
||||||
(first col))))
|
(first col))))
|
||||||
|
|
||||||
|
;; trying metrics
|
||||||
|
|
||||||
|
(defn try-metric [f]
|
||||||
|
(try (f)
|
||||||
|
(catch Exception ex {:nan (.getMessage ex)})))
|
||||||
|
|
||||||
|
(defn try-multiple-values-metric [class-values f]
|
||||||
|
(loop [acum {}
|
||||||
|
ks (keys class-values)]
|
||||||
|
(if (empty? ks)
|
||||||
|
acum
|
||||||
|
(let [index (get class-values (first ks))
|
||||||
|
val (f index)]
|
||||||
|
(recur (conj acum {(first ks) val})
|
||||||
|
(rest ks))))))
|
||||||
|
|
||||||
|
|
||||||
;; Manipulation of array of options
|
;; Manipulation of array of options
|
||||||
|
|
||||||
(defn check-option [opts val flag map]
|
(defn check-option [opts val flag map]
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
"1"))))
|
"1"))))
|
||||||
|
|
||||||
|
|
||||||
(deftest make-and-buld-classifier
|
(deftest make-and-build-classifier
|
||||||
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
|
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
|
||||||
c (make-clusterer :k-means)]
|
c (make-clusterer :k-means)]
|
||||||
(clusterer-build c ds)
|
(clusterer-build c ds)
|
||||||
|
|
Loading…
Reference in a new issue