Visualization of more objects, clustering of collections and good clusterers evaluation

This commit is contained in:
Antonio Garrote 2010-03-02 21:24:11 +01:00
parent fba9f97845
commit 00ff6b1bc4
6 changed files with 109 additions and 75 deletions

View file

@ -103,20 +103,6 @@
;; Evaluating classifiers ;; Evaluating classifiers
(defn- try-metric [f]
(try (f)
(catch Exception ex {:nan (.getMessage ex)})))
(defn- try-multiple-values-metric [class-values f]
(loop [acum {}
ks (keys class-values)]
(if (empty? ks)
acum
(let [index (get class-values (first ks))
val (f index)]
(recur (conj acum {(first ks) val})
(rest ks))))))
(defn- collect-evaluation-results (defn- collect-evaluation-results
"Collects all the statistics from the evaluation of a classifier" "Collects all the statistics from the evaluation of a classifier"
([class-values evaluation] ([class-values evaluation]

View file

@ -4,7 +4,7 @@
;; ;;
(ns clj-ml.clusterers (ns clj-ml.clusterers
(:use [clj-ml utils data ui] (:use [clj-ml utils data]
[incanter charts]) [incanter charts])
(:import (java.util Date Random) (:import (java.util Date Random)
(weka.clusterers ClusterEvaluation SimpleKMeans))) (weka.clusterers ClusterEvaluation SimpleKMeans)))
@ -88,8 +88,22 @@
;; Evaluating clusterers ;; Evaluating clusterers
(defn- collect-evaluation-results
"Collects all the statistics from the evaluation of a classifier"
([evaluation]
(do
(println "hola?")
(println (.clusterResultsToString evaluation))
{:classes-to-clusters (try-metric
#(reduce (fn [acum i] (conj acum {i (aget (.getClassesToClusters evaluation) i)}))
{}
(range 0 (.getNumClusters evaluation))))
:log-likelihood (try-metric #(.getLogLikelihood evaluation))
:evaluation-object evaluation})))
(defmulti clusterer-evaluate (defmulti clusterer-evaluate
"Evaluetes a trained clusterer using the provided dataset or cross-validation" "Evaluates a trained clusterer using the provided dataset or cross-validation"
(fn [clusterer mode & evaluation-data] mode)) (fn [clusterer mode & evaluation-data] mode))
(defmethod clusterer-evaluate :dataset (defmethod clusterer-evaluate :dataset
@ -100,7 +114,8 @@
evl))] evl))]
(.evaluateClusterer evaluation test-data) (.evaluateClusterer evaluation test-data)
(println (.clusterResultsToString evaluation)) (println (.clusterResultsToString evaluation))
evaluation))) ; evaluation)))
(collect-evaluation-results evaluation))))
;; Clustering collections ;; Clustering collections
@ -117,33 +132,3 @@
clustered)] clustered)]
(dataset-set-class nds (- (count attributes) 1)) (dataset-set-class nds (- (count attributes) 1))
nds))) nds)))
;; visualization
(defmulti clusterer-display-for-attributes
(fn [clusterer dataset attribute-x attribute-y] (class clusterer)))
(defmethod clusterer-display-for-attributes SimpleKMeans
([clusterer dataset attribute-x attribute-y & visualization-options]
(let [attr-x (if (keyword? attribute-x) (instance-index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (instance-index-attr dataset attribute-y) attribute-y)
opts (first-or-default visualization-options {})
display? (if (= (get visualization-options :visualize) false)
false
true)
true-opts (conj opts {:visualize false})
plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts)
info (clusterer-info clusterer)
centroids (:centroids info)]
(do
(loop [ks (keys centroids)]
(if (empty? ks)
(if display?
(visualize-plot plot)
plot)
(let [k (first ks)
centroid (get centroids k)
val-x (instance-value-at centroid attr-x)
val-y (instance-value-at centroid attr-y)]
(add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")"))
(recur (rest ks)))))))))

View file

@ -4,11 +4,10 @@
;; ;;
(ns clj-ml.data (ns clj-ml.data
(:use [clj-ml utils ui]) (:use [clj-ml utils])
(:import (weka.core Instance Instances FastVector Attribute) (:import (weka.core Instance Instances FastVector Attribute)
(cljml ClojureInstances))) (cljml ClojureInstances)))
;; Construction of individual data and datasets ;; Construction of individual data and datasets
(defn attribute-name-at [dataset-or-instance pos] (defn attribute-name-at [dataset-or-instance pos]
@ -267,19 +266,3 @@
"Removes and returns the first instance in the dataset" "Removes and returns the first instance in the dataset"
[dataset] [dataset]
(dataset-extract-at dataset 0)) (dataset-extract-at dataset 0))
;; visualization
(defn dataset-display-numeric-attributes [dataset attributes & visualization-options]
"Displays the provided attributes into a box plot"
(let [attr (map #(if (keyword? %1) (index-attr dataset %1) %1) attributes)
options (first-or-default visualization-options {})]
(display-object :dataset :boxplot {:dataset dataset :cols attr} options)))
(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options]
"Displays how a pair of attributes are distributed for each class"
(let [attr-x (if (keyword? attribute-x) (index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (index-attr dataset attribute-y) attribute-y)
opts (first-or-default visualization-options {})
class-index (dataset-get-class dataset)]
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts)))

View file

@ -4,8 +4,9 @@
;; ;;
(ns clj-ml.ui (ns clj-ml.ui
(:use (incanter core stats charts) (:use (clj-ml data utils clusterers)
(clj-ml data utils))) (incanter core stats charts)))
(defn visualize-plot [plot] (defn visualize-plot [plot]
"Prepare a plot to be displayed" "Prepare a plot to be displayed"
@ -55,9 +56,10 @@
col-1 (nth cols 1) col-1 (nth cols 1)
group-by (get dataset-opts :group-by) group-by (get dataset-opts :group-by)
cols-names (dataset-attributes-definition dataset) cols-names (dataset-attributes-definition dataset)
group-vals (dataset-values-at dataset group-by) group-vals (if (nil? group-by) {:no-group-by :no-class} (dataset-values-at dataset group-by))
acum-map (reduce (fn [acum group-val] acum-map (reduce (fn [acum group-val]
(conj acum {(first group-val) (reduce (fn [acum x] (conj acum {x []})) (conj acum {(first group-val)
(reduce (fn [acum x] (conj acum {x []}))
{} {}
cols)})) cols)}))
{} {}
@ -66,15 +68,16 @@
(let [inst (instance-to-vector instance) (let [inst (instance-to-vector instance)
val-0 (nth inst col-0) val-0 (nth inst col-0)
val-1 (nth inst col-1) val-1 (nth inst col-1)
class (nth inst group-by)] class (if (nil? group-by)
:no-group-by
(nth inst group-by))]
(merge-with (merge-with
(fn [a b] {col-0 (conj (get a col-0) (fn [a b] {col-0 (conj (get a col-0)
(get b col-0)) (get b col-0))
col-1 (conj (get a col-1) col-1 (conj (get a col-1)
(get b col-1))}) (get b col-1))})
acum acum
{class {col-0 val-0 col-1 val-1}}) {class {col-0 val-0 col-1 val-1}})))
))
acum-map acum-map
dataseq) dataseq)
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot (" title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot ("
@ -103,6 +106,66 @@
(recur the-plot (rest ks)))))))) (recur the-plot (rest ks))))))))
;; visualization of different objects
(defn dataset-display-numeric-attributes [dataset attributes & visualization-options]
"Displays the provided attributes into a box plot"
(let [attr (map #(if (keyword? %1) (index-attr dataset %1) %1) attributes)
options-pre (first-or-default visualization-options {})
options (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)]
(display-object :dataset :boxplot {:dataset dataset :cols attr} options)))
(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options]
"Displays how a pair of attributes are distributed for each class"
(let [attr-x (if (keyword? attribute-x) (index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (index-attr dataset attribute-y) attribute-y)
options-pre (first-or-default visualization-options {})
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
class-index (dataset-get-class dataset)]
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts)))
(defn dataset-display-attributes [dataset attribute-x attribute-y & visualization-options]
"Displays the distribution of a set of attributes for a dataset"
(let [attr-x (if (keyword? attribute-x) (index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (index-attr dataset attribute-y) attribute-y)
options-pre (first-or-default visualization-options {})
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
class-index (dataset-get-class dataset)]
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y]} opts)))
;; visualization
(defmulti clusterer-display-for-attributes
(fn [clusterer dataset attribute-x attribute-y] (class clusterer)))
(defmethod clusterer-display-for-attributes SimpleKMeans
([clusterer dataset attribute-x attribute-y & visualization-options]
(let [attr-x (if (keyword? attribute-x) (instance-index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (instance-index-attr dataset attribute-y) attribute-y)
opts (first-or-default visualization-options {})
display? (if (= (get visualization-options :visualize) false)
false
true)
true-opts (conj opts {:visualize false})
plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts)
info (clusterer-info clusterer)
centroids (:centroids info)]
(do
(loop [ks (keys centroids)]
(if (empty? ks)
(if display?
(visualize-plot plot)
plot)
(let [k (first ks)
centroid (get centroids k)
val-x (instance-value-at centroid attr-x)
val-y (instance-value-at centroid attr-y)]
(add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")"))
(recur (rest ks)))))))))
;; Things to load to test this from slime ;; Things to load to test this from slime
;(defn load-test-from-slime [] ;(defn load-test-from-slime []

View file

@ -24,6 +24,23 @@
default default
(first col)))) (first col))))
;; trying metrics
(defn try-metric [f]
(try (f)
(catch Exception ex {:nan (.getMessage ex)})))
(defn try-multiple-values-metric [class-values f]
(loop [acum {}
ks (keys class-values)]
(if (empty? ks)
acum
(let [index (get class-values (first ks))
val (f index)]
(recur (conj acum {(first ks) val})
(rest ks))))))
;; Manipulation of array of options ;; Manipulation of array of options
(defn check-option [opts val flag map] (defn check-option [opts val flag map]

View file

@ -27,7 +27,7 @@
"1")))) "1"))))
(deftest make-and-buld-classifier (deftest make-and-build-classifier
(let [ds (make-dataset :test [:a :b] [[1 2] [3 4]]) (let [ds (make-dataset :test [:a :b] [[1 2] [3 4]])
c (make-clusterer :k-means)] c (make-clusterer :k-means)]
(clusterer-build c ds) (clusterer-build c ds)