Initial support for clustering algorithms
This commit is contained in:
parent
d149acc376
commit
a7990c5373
4 changed files with 137 additions and 4 deletions
53
README
53
README
|
@ -31,6 +31,22 @@ the jar manually.
|
|||
<version>0.0.3-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
## Supported algorithms
|
||||
|
||||
* Filters
|
||||
- supervised discretize
|
||||
- unsupervised discretize
|
||||
- supervised nominal to binary
|
||||
- unsupervised nominal to binary
|
||||
|
||||
* Classifiers
|
||||
- C4.5 (J4.8)
|
||||
- naive Bayes
|
||||
- multilayer perceptron
|
||||
|
||||
* Clusterers
|
||||
- k-means
|
||||
|
||||
## Usage
|
||||
|
||||
* I/O of data
|
||||
|
@ -48,7 +64,7 @@ the jar manually.
|
|||
REPL>(use 'clj-ml.data)
|
||||
|
||||
REPL>; Defining a dataset
|
||||
REPL>(def ds (make-dataset "name" [:length :width {:kind [:good :bad]}] [12 34 :good] [24 53 :bad] ]))
|
||||
REPL>(def ds (make-dataset "name" [:length :width {:kind [:good :bad]}] [ [12 34 :good] [24 53 :bad] ]))
|
||||
REPL>ds
|
||||
|
||||
#<ClojureInstances @relation name
|
||||
|
@ -193,6 +209,41 @@ the jar manually.
|
|||
REPL>(serialize-to-file classifier
|
||||
REPL> "/Users/antonio.garrote/Desktop/classifier.bin")
|
||||
|
||||
* Using clusterers
|
||||
|
||||
REPL>(use 'clj-ml.clusterers)
|
||||
|
||||
REPL> ; we build a clusterer using k-means and three clusters
|
||||
REPL> (def kmeans (make-clusterer :k-means {:number-clusters 3}))
|
||||
|
||||
REPL> ; we need to remove the class from the dataset to
|
||||
REPL> ; use this clustering algorithm
|
||||
REPL> (dataset-remove-class ds)
|
||||
|
||||
REPL> ; we build the clusters
|
||||
REPL> (clusterer-build kmeans ds)
|
||||
REPL> kmeans
|
||||
|
||||
#<SimpleKMeans
|
||||
kMeans
|
||||
======
|
||||
|
||||
Number of iterations: 3
|
||||
Within cluster sum of squared errors: 7.817456892309574
|
||||
Missing values globally replaced with mean/mode
|
||||
|
||||
Cluster centroids:
|
||||
Cluster#
|
||||
Attribute Full Data 0 1 2
|
||||
(150) (50) (50) (50)
|
||||
==================================================================================
|
||||
sepallength 5.8433 5.936 5.006 6.588
|
||||
sepalwidth 3.054 2.77 3.418 2.974
|
||||
petallength 3.7587 4.26 1.464 5.552
|
||||
petalwidth 1.1987 1.326 0.244 2.026
|
||||
class Iris-setosa Iris-versicolor Iris-setosa Iris-virginica
|
||||
|
||||
|
||||
## License
|
||||
|
||||
MIT License
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
;;
|
||||
;; Data processing of data with different filtering algorithms
|
||||
;; Classifiers
|
||||
;; @author Antonio Garrote
|
||||
;;
|
||||
|
||||
|
@ -69,7 +69,7 @@
|
|||
|
||||
(defmacro make-classifier-m
|
||||
([kind algorithm classifier-class options]
|
||||
`(let [options-read# (if (empty? ~options) {} ~options)
|
||||
`(let [options-read# (if (empty? ~options) {} (first ~options))
|
||||
classifier# (new ~classifier-class)
|
||||
opts# (make-classifier-options ~kind ~algorithm options-read#)]
|
||||
(.setOptions classifier# opts#)
|
||||
|
|
56
src/clj_ml/clusterers.clj
Normal file
56
src/clj_ml/clusterers.clj
Normal file
|
@ -0,0 +1,56 @@
|
|||
;;
|
||||
;; Clusterers
|
||||
;; @author Antonio Garrote
|
||||
;;
|
||||
|
||||
(ns clj-ml.clusterers
|
||||
(:use [clj-ml utils data])
|
||||
(:import (java.util Date Random)
|
||||
(weka.clusterers SimpleKMeans)))
|
||||
|
||||
;; Setting up clusterer options
|
||||
|
||||
(defmulti make-clusterer-options
|
||||
"Creates ther right parameters for a clusterer"
|
||||
(fn [kind map] kind))
|
||||
|
||||
(defmethod make-clusterer-options :k-means
|
||||
([kind map]
|
||||
(let [cols-val (check-options {:display-standard-deviation "-V"
|
||||
:replace-missing-values "-M"
|
||||
:preserve-instances-order "-O"}
|
||||
map
|
||||
[""])
|
||||
cols-val-a (check-option-values {:number-clusters "-N"
|
||||
:random-seed "-S"
|
||||
:number-iterations "-I"}
|
||||
map
|
||||
cols-val)]
|
||||
(into-array cols-val-a))))
|
||||
|
||||
;; Building clusterers
|
||||
|
||||
(defmacro make-clusterer-m
|
||||
([kind clusterer-class options]
|
||||
`(let [options-read# (if (empty? ~options) {} (first ~options))
|
||||
clusterer# (new ~clusterer-class)
|
||||
opts# (make-clusterer-options ~kind options-read#)]
|
||||
(.setOptions clusterer# opts#)
|
||||
clusterer#)))
|
||||
|
||||
(defmulti make-clusterer
|
||||
"Creates a new clusterer for the given kind algorithm and options"
|
||||
(fn [kind & options] kind))
|
||||
|
||||
|
||||
(defmethod make-clusterer :k-means
|
||||
([kind & options]
|
||||
(make-clusterer-m kind SimpleKMeans options)))
|
||||
|
||||
|
||||
;; Clustering data
|
||||
|
||||
(defn clusterer-build
|
||||
"Applies a clustering algorithm to a set of data"
|
||||
([clusterer dataset]
|
||||
(.buildClusterer clusterer dataset)))
|
|
@ -98,12 +98,13 @@
|
|||
;; we haven't received a vector so we create an empty dataset
|
||||
(new Instances (key-to-str name) (parse-attributes attributes) capacity-or-values))]
|
||||
;; by default the class is the last attribute in the dataset
|
||||
(.setClassIndex ds (- (.numAttributes ds) 1))
|
||||
;; (.setClassIndex ds (- (.numAttributes ds) 1))
|
||||
ds)))
|
||||
|
||||
;; dataset information
|
||||
|
||||
(defn dataset-class-values [dataset]
|
||||
"Returns the possible values for the class attribute"
|
||||
(let [class-attr (.classAttribute dataset)
|
||||
values (.enumerateValues class-attr)]
|
||||
(loop [continue (.hasMoreElements values)
|
||||
|
@ -116,6 +117,7 @@
|
|||
acum))))
|
||||
|
||||
(defn dataset-values-at [dataset-or-instance pos]
|
||||
"Returns the possible values for a nominal attribute at the provided position"
|
||||
(let [class-attr (.attribute dataset-or-instance pos)
|
||||
values (.enumerateValues class-attr)]
|
||||
(if (nil? values)
|
||||
|
@ -129,6 +131,24 @@
|
|||
(conj acum {(keyword val) index})))
|
||||
acum)))))
|
||||
|
||||
(defn dataset-attributes-definition [dataset]
|
||||
"Returns the definition of the attributes of this dataset"
|
||||
(let [max (.numAttributes dataset)]
|
||||
(loop [acum []
|
||||
c 0]
|
||||
(if (< c max)
|
||||
(let [attr (.attribute dataset c)
|
||||
index c
|
||||
name (keyword (.name attr))
|
||||
nominal? (.isNominal attr)
|
||||
to-add (if nominal?
|
||||
(let [vals (dataset-values-at dataset index)]
|
||||
{name (keys vals)})
|
||||
name)]
|
||||
(recur (conj acum to-add)
|
||||
(+ c 1)))
|
||||
acum))))
|
||||
|
||||
;; manipulation of instances
|
||||
|
||||
(defn instance-set-class [instance pos]
|
||||
|
@ -192,6 +212,12 @@
|
|||
(do (.setClassIndex dataset pos)
|
||||
dataset))
|
||||
|
||||
(defn dataset-remove-class [dataset]
|
||||
"Removes the class attribute from the dataset"
|
||||
(do
|
||||
(.setClassIndex dataset -1)
|
||||
dataset))
|
||||
|
||||
(defn dataset-count [dataset]
|
||||
"Returns the number of elements in a dataset"
|
||||
(.numInstances dataset))
|
||||
|
|
Loading…
Reference in a new issue