Removed UI code (Weka can do that better) and some other unused or broken dependencies.
This commit is contained in:
parent
65a851341b
commit
dcf6534ea4
4 changed files with 2 additions and 222 deletions
|
@ -5,14 +5,10 @@
|
||||||
:url "http://opensource.org/licenses/MIT"}
|
:url "http://opensource.org/licenses/MIT"}
|
||||||
:url "https://github.com/joshuaeckroth/clj-ml"
|
:url "https://github.com/joshuaeckroth/clj-ml"
|
||||||
:dependencies [[org.clojure/clojure "1.4.0"]
|
:dependencies [[org.clojure/clojure "1.4.0"]
|
||||||
[incanter/incanter-core "1.4.1"]
|
|
||||||
[incanter/incanter-charts "1.4.1"]
|
|
||||||
[nz.ac.waikato.cms.weka/weka-stable "3.6.9"]
|
[nz.ac.waikato.cms.weka/weka-stable "3.6.9"]
|
||||||
[tw.edu.ntu.csie/libsvm "3.1"]
|
[tw.edu.ntu.csie/libsvm "3.1"]
|
||||||
[org.jsoup/jsoup "1.7.2"]
|
|
||||||
[org.clojure/data.xml "0.0.7"]
|
[org.clojure/data.xml "0.0.7"]
|
||||||
[org.apache.lucene/lucene-analyzers-common "4.3.0"]
|
[org.apache.lucene/lucene-analyzers-common "4.3.0"]]
|
||||||
[org.clojars.chapmanb/fast-random-forest "0.98"]]
|
|
||||||
:profiles {:dev
|
:profiles {:dev
|
||||||
{:plugins [[lein-midje "2.0.0"]]
|
{:plugins [[lein-midje "2.0.0"]]
|
||||||
:dependencies [[midje "1.4.0"]]}}
|
:dependencies [[midje "1.4.0"]]}}
|
||||||
|
|
|
@ -62,7 +62,6 @@
|
||||||
"
|
"
|
||||||
(:use [clj-ml utils data kernel-functions options-utils])
|
(:use [clj-ml utils data kernel-functions options-utils])
|
||||||
(:import (java.util Date Random)
|
(:import (java.util Date Random)
|
||||||
(hr.irb.fastRandomForest FastRandomForest)
|
|
||||||
(weka.core Instance Instances)
|
(weka.core Instance Instances)
|
||||||
(weka.classifiers.lazy IBk)
|
(weka.classifiers.lazy IBk)
|
||||||
(weka.classifiers.trees J48 RandomForest M5P)
|
(weka.classifiers.trees J48 RandomForest M5P)
|
||||||
|
@ -241,16 +240,6 @@
|
||||||
:random-seed "-S"
|
:random-seed "-S"
|
||||||
:depth "-depth"}))))
|
:depth "-depth"}))))
|
||||||
|
|
||||||
(defmethod make-classifier-options [:decision-tree :fast-random-forest]
|
|
||||||
([kind algorithm m]
|
|
||||||
(->>
|
|
||||||
(check-options m {:debug "-D"})
|
|
||||||
(check-option-values m
|
|
||||||
{:num-trees-in-forest "-I"
|
|
||||||
:num-features-to-consider "-K"
|
|
||||||
:random-seed "-S"
|
|
||||||
:depth "-depth"}))))
|
|
||||||
|
|
||||||
(defmethod make-classifier-options [:decision-tree :rotation-forest]
|
(defmethod make-classifier-options [:decision-tree :rotation-forest]
|
||||||
([kind algorithm m]
|
([kind algorithm m]
|
||||||
(->>
|
(->>
|
||||||
|
@ -536,10 +525,6 @@
|
||||||
([kind algorithm & options]
|
([kind algorithm & options]
|
||||||
(make-classifier-with kind algorithm RandomForest options)))
|
(make-classifier-with kind algorithm RandomForest options)))
|
||||||
|
|
||||||
(defmethod make-classifier [:decision-tree :fast-random-forest]
|
|
||||||
([kind algorithm & options]
|
|
||||||
(make-classifier-with kind algorithm FastRandomForest options)))
|
|
||||||
|
|
||||||
(defmethod make-classifier [:decision-tree :rotation-forest]
|
(defmethod make-classifier [:decision-tree :rotation-forest]
|
||||||
([kind algorithm & options]
|
([kind algorithm & options]
|
||||||
(make-classifier-with kind algorithm RotationForest options)))
|
(make-classifier-with kind algorithm RotationForest options)))
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
(:require [clojure.string :as str])
|
(:require [clojure.string :as str])
|
||||||
(:require [clojure.set :as set])
|
(:require [clojure.set :as set])
|
||||||
(:use [clojure.java.io :only [file]])
|
(:use [clojure.java.io :only [file]])
|
||||||
(:import [org.jsoup Jsoup])
|
|
||||||
(:import (weka.core Instance Instances FastVector Attribute)
|
(:import (weka.core Instance Instances FastVector Attribute)
|
||||||
(cljml ClojureInstances)))
|
(cljml ClojureInstances)))
|
||||||
|
|
||||||
|
@ -535,7 +534,7 @@ split immediately you can use do-split-dataset."
|
||||||
ds (make-dataset
|
ds (make-dataset
|
||||||
:docs [{:class [:no :yes]} {:title nil} {:fulltext nil}]
|
:docs [{:class [:no :yes]} {:title nil} {:fulltext nil}]
|
||||||
(for [doc docs-keep-n]
|
(for [doc docs-keep-n]
|
||||||
(let [orig-fulltext (.text (Jsoup/parse (or (:fulltext doc) (:extracted doc) "")))
|
(let [orig-fulltext (:fulltext doc "")
|
||||||
fulltext (str/replace orig-fulltext #"\s+" " ")
|
fulltext (str/replace orig-fulltext #"\s+" " ")
|
||||||
fulltext-fixed (str/replace fulltext #"[^ \w\d]" "")
|
fulltext-fixed (str/replace fulltext #"[^ \w\d]" "")
|
||||||
title (str/replace (:title doc "") #"[^ \w\d]" "")
|
title (str/replace (:title doc "") #"[^ \w\d]" "")
|
||||||
|
|
|
@ -1,200 +0,0 @@
|
||||||
;;
|
|
||||||
;; User interface utilities
|
|
||||||
;; @author Antonio Garrote
|
|
||||||
;;
|
|
||||||
|
|
||||||
(ns #^{:author "Antonio Garrote <antoniogarrote@gmail.com>"}
|
|
||||||
clj-ml.ui
|
|
||||||
"Namespace containing functions for plotting classifiers, clusterers and data sets."
|
|
||||||
(:use (clj-ml data utils clusterers)
|
|
||||||
(incanter core stats charts))
|
|
||||||
(:import (weka.clusterers ClusterEvaluation SimpleKMeans)))
|
|
||||||
|
|
||||||
|
|
||||||
(defn visualize-plot [plot]
|
|
||||||
"Prepare a plot to be displayed"
|
|
||||||
(do (clear-background plot)
|
|
||||||
(view plot)
|
|
||||||
plot))
|
|
||||||
|
|
||||||
(defmulti display-object
|
|
||||||
"Displays some kind of clj-ml object"
|
|
||||||
(fn [kind chart data opts] [kind chart]))
|
|
||||||
|
|
||||||
(defmethod display-object [:dataset :boxplot]
|
|
||||||
([kind chart dataset-opts display-opts]
|
|
||||||
(let [dataset (get dataset-opts :dataset)
|
|
||||||
dataseq (dataset-seq dataset)
|
|
||||||
cols (get dataset-opts :cols)
|
|
||||||
cols-names (dataset-format dataset)
|
|
||||||
vals-map (reduce (fn [acum col]
|
|
||||||
(let [name (name (nth cols-names col))
|
|
||||||
vals (map #(nth (instance-to-vector %1) col) dataseq)]
|
|
||||||
(conj acum {name vals})))
|
|
||||||
{}
|
|
||||||
cols)
|
|
||||||
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Box Plot"))
|
|
||||||
legend (if (nil? (get display-opts :legend)) true (get display-opts :legend))
|
|
||||||
should-display (get display-opts :visualize)]
|
|
||||||
(loop [plot nil
|
|
||||||
ks (keys vals-map)]
|
|
||||||
(if (empty? ks)
|
|
||||||
(if should-display
|
|
||||||
(visualize-plot plot)
|
|
||||||
plot)
|
|
||||||
(let [this-val (get vals-map (first ks))
|
|
||||||
the-plot (if (nil? plot)
|
|
||||||
(box-plot this-val :title title :legend legend :series-label (name (first ks)))
|
|
||||||
(do (add-box-plot plot this-val :series-label (name (first ks)))
|
|
||||||
plot))]
|
|
||||||
(recur the-plot (rest ks))))))))
|
|
||||||
|
|
||||||
|
|
||||||
(defmethod display-object [:dataset :scatter-plot]
|
|
||||||
([kind chart dataset-opts display-opts]
|
|
||||||
(let [dataset (get dataset-opts :dataset)
|
|
||||||
dataseq (dataset-seq dataset)
|
|
||||||
cols (get dataset-opts :cols)
|
|
||||||
col-0 (nth cols 0)
|
|
||||||
col-1 (nth cols 1)
|
|
||||||
group-by (get dataset-opts :group-by)
|
|
||||||
cols-names (dataset-format dataset)
|
|
||||||
group-vals (if (nil? group-by) {:no-group-by :no-class} (dataset-values-at dataset group-by))
|
|
||||||
acum-map (reduce (fn [acum group-val]
|
|
||||||
(conj acum {(first group-val)
|
|
||||||
(reduce (fn [acum x] (conj acum {x []}))
|
|
||||||
{}
|
|
||||||
cols)}))
|
|
||||||
{}
|
|
||||||
group-vals)
|
|
||||||
folded-points (reduce (fn [acum instance]
|
|
||||||
(let [inst (instance-to-vector instance)
|
|
||||||
val-0 (nth inst col-0)
|
|
||||||
val-1 (nth inst col-1)
|
|
||||||
class (if (nil? group-by)
|
|
||||||
:no-group-by
|
|
||||||
(nth inst group-by))]
|
|
||||||
(merge-with
|
|
||||||
(fn [a b] {col-0 (conj (get a col-0)
|
|
||||||
(get b col-0))
|
|
||||||
col-1 (conj (get a col-1)
|
|
||||||
(get b col-1))})
|
|
||||||
acum
|
|
||||||
{class {col-0 val-0 col-1 val-1}})))
|
|
||||||
acum-map
|
|
||||||
dataseq)
|
|
||||||
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot ("
|
|
||||||
(name (nth cols-names col-0)) " vs "
|
|
||||||
(name (nth cols-names col-1)) ")"))
|
|
||||||
legend (if (nil? (get display-opts :legend)) true (get display-opts :legend))
|
|
||||||
should-display (get display-opts :visualize)]
|
|
||||||
(loop [plot nil
|
|
||||||
ks (keys folded-points)]
|
|
||||||
(if (empty? ks)
|
|
||||||
(if should-display
|
|
||||||
(visualize-plot plot)
|
|
||||||
plot)
|
|
||||||
(let [this-vals (get folded-points (first ks))
|
|
||||||
this-val-0 (get this-vals col-0)
|
|
||||||
this-val-1 (get this-vals col-1)
|
|
||||||
the-plot (if (nil? plot)
|
|
||||||
(scatter-plot this-val-0 this-val-1
|
|
||||||
:title title
|
|
||||||
:x-label (name (nth cols-names col-0))
|
|
||||||
:y-label (name (nth cols-names col-1))
|
|
||||||
:series-label (name (first ks))
|
|
||||||
:legend legend)
|
|
||||||
(do (add-points plot this-val-0 this-val-1 :series-label (name (first ks)))
|
|
||||||
plot))]
|
|
||||||
(recur the-plot (rest ks))))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; visualization of different objects
|
|
||||||
|
|
||||||
(defn dataset-display-numeric-attributes [dataset attributes & visualization-options]
|
|
||||||
"Displays the provided attributes into a box plot"
|
|
||||||
(let [attr (map #(if (keyword? %1) (dataset-index-attr dataset %1) %1) attributes)
|
|
||||||
options-pre (first-or-default visualization-options {})
|
|
||||||
options (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)]
|
|
||||||
(display-object :dataset :boxplot {:dataset dataset :cols attr} options)))
|
|
||||||
|
|
||||||
(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options]
|
|
||||||
"Displays how a pair of attributes are distributed for each class"
|
|
||||||
(let [attr-x (if (keyword? attribute-x) (dataset-index-attr dataset attribute-x) attribute-x)
|
|
||||||
attr-y (if (keyword? attribute-y) (dataset-index-attr dataset attribute-y) attribute-y)
|
|
||||||
options-pre (first-or-default visualization-options {})
|
|
||||||
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
|
|
||||||
class-index (dataset-get-class dataset)]
|
|
||||||
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts)))
|
|
||||||
|
|
||||||
(defn dataset-display-attributes [dataset attribute-x attribute-y & visualization-options]
|
|
||||||
"Displays the distribution of a set of attributes for a dataset"
|
|
||||||
(let [attr-x (if (keyword? attribute-x) (datset-index-attr dataset attribute-x) attribute-x)
|
|
||||||
attr-y (if (keyword? attribute-y) (datset-index-attr dataset attribute-y) attribute-y)
|
|
||||||
options-pre (first-or-default visualization-options {})
|
|
||||||
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
|
|
||||||
class-index (dataset-get-class dataset)]
|
|
||||||
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y]} opts)))
|
|
||||||
|
|
||||||
|
|
||||||
;; visualization
|
|
||||||
|
|
||||||
(defmulti clusterer-display-for-attributes
|
|
||||||
(fn [clusterer dataset attribute-x attribute-y] (class clusterer)))
|
|
||||||
|
|
||||||
(defmethod clusterer-display-for-attributes SimpleKMeans
|
|
||||||
([clusterer dataset attribute-x attribute-y & visualization-options]
|
|
||||||
(let [attr-x (if (keyword? attribute-x) (dataset-index-attr dataset attribute-x) attribute-x)
|
|
||||||
attr-y (if (keyword? attribute-y) (dataset-index-attr dataset attribute-y) attribute-y)
|
|
||||||
opts (first-or-default visualization-options {})
|
|
||||||
display? (if (= (get visualization-options :visualize) false)
|
|
||||||
false
|
|
||||||
true)
|
|
||||||
true-opts (conj opts {:visualize false})
|
|
||||||
plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts)
|
|
||||||
info (clusterer-info clusterer)
|
|
||||||
centroids (:centroids info)]
|
|
||||||
(do
|
|
||||||
(loop [ks (keys centroids)]
|
|
||||||
(if (empty? ks)
|
|
||||||
(if display?
|
|
||||||
(visualize-plot plot)
|
|
||||||
plot)
|
|
||||||
(let [k (first ks)
|
|
||||||
centroid (get centroids k)
|
|
||||||
val-x (instance-value-at centroid attr-x)
|
|
||||||
val-y (instance-value-at centroid attr-y)]
|
|
||||||
(add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")"))
|
|
||||||
(recur (rest ks)))))))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Things to load to test this from slime
|
|
||||||
|
|
||||||
;(defn load-test-from-slime []
|
|
||||||
; (do
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/joda-time-1.6.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/opencsv-2.0.1.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/classes/")
|
|
||||||
; (add-classpath "file:///Applications/weka-3-6-2/weka.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/src/")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-charts-1.0-master-SNAPSHOT.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-core-1.0-master-SNAPSHOT.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-io-1.0-master-SNAPSHOT.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-processing-1.0-master-SNAPSHOT.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-chrono-1.0-master-SNAPSHOT.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-full-1.0-master-SNAPSHOT.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-mongodb-1.0-master-SNAPSHOT.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jfreechart-1.0.13.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/parallelcolt-0.7.2.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/arpack-combo-0.1.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/gnujaxp-1.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-json-1.1-20091229.021828-4.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-db-object-0.1.1-20091229.021828-2.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jcommon-1.0.16.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/netlib-java-0.9.1.jar")
|
|
||||||
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/processing-core-1.jar")
|
|
||||||
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/congomongo-0.1.1-20091229.021828-1.jar")
|
|
||||||
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-1.0.jar")
|
|
||||||
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-java-driver-1.1.0-20091229.021828-3.jar")
|
|
||||||
; ))
|
|
Loading…
Reference in a new issue