Removed UI code (Weka can do that better) and some other unused or broken dependencies.

This commit is contained in:
Joshua Eckroth 2013-08-07 10:32:24 -04:00
parent 65a851341b
commit dcf6534ea4
4 changed files with 2 additions and 222 deletions

View file

@ -5,14 +5,10 @@
:url "http://opensource.org/licenses/MIT"} :url "http://opensource.org/licenses/MIT"}
:url "https://github.com/joshuaeckroth/clj-ml" :url "https://github.com/joshuaeckroth/clj-ml"
:dependencies [[org.clojure/clojure "1.4.0"] :dependencies [[org.clojure/clojure "1.4.0"]
[incanter/incanter-core "1.4.1"]
[incanter/incanter-charts "1.4.1"]
[nz.ac.waikato.cms.weka/weka-stable "3.6.9"] [nz.ac.waikato.cms.weka/weka-stable "3.6.9"]
[tw.edu.ntu.csie/libsvm "3.1"] [tw.edu.ntu.csie/libsvm "3.1"]
[org.jsoup/jsoup "1.7.2"]
[org.clojure/data.xml "0.0.7"] [org.clojure/data.xml "0.0.7"]
[org.apache.lucene/lucene-analyzers-common "4.3.0"] [org.apache.lucene/lucene-analyzers-common "4.3.0"]]
[org.clojars.chapmanb/fast-random-forest "0.98"]]
:profiles {:dev :profiles {:dev
{:plugins [[lein-midje "2.0.0"]] {:plugins [[lein-midje "2.0.0"]]
:dependencies [[midje "1.4.0"]]}} :dependencies [[midje "1.4.0"]]}}

View file

@ -62,7 +62,6 @@
" "
(:use [clj-ml utils data kernel-functions options-utils]) (:use [clj-ml utils data kernel-functions options-utils])
(:import (java.util Date Random) (:import (java.util Date Random)
(hr.irb.fastRandomForest FastRandomForest)
(weka.core Instance Instances) (weka.core Instance Instances)
(weka.classifiers.lazy IBk) (weka.classifiers.lazy IBk)
(weka.classifiers.trees J48 RandomForest M5P) (weka.classifiers.trees J48 RandomForest M5P)
@ -241,16 +240,6 @@
:random-seed "-S" :random-seed "-S"
:depth "-depth"})))) :depth "-depth"}))))
(defmethod make-classifier-options [:decision-tree :fast-random-forest]
([kind algorithm m]
(->>
(check-options m {:debug "-D"})
(check-option-values m
{:num-trees-in-forest "-I"
:num-features-to-consider "-K"
:random-seed "-S"
:depth "-depth"}))))
(defmethod make-classifier-options [:decision-tree :rotation-forest] (defmethod make-classifier-options [:decision-tree :rotation-forest]
([kind algorithm m] ([kind algorithm m]
(->> (->>
@ -536,10 +525,6 @@
([kind algorithm & options] ([kind algorithm & options]
(make-classifier-with kind algorithm RandomForest options))) (make-classifier-with kind algorithm RandomForest options)))
(defmethod make-classifier [:decision-tree :fast-random-forest]
([kind algorithm & options]
(make-classifier-with kind algorithm FastRandomForest options)))
(defmethod make-classifier [:decision-tree :rotation-forest] (defmethod make-classifier [:decision-tree :rotation-forest]
([kind algorithm & options] ([kind algorithm & options]
(make-classifier-with kind algorithm RotationForest options))) (make-classifier-with kind algorithm RotationForest options)))

View file

@ -16,7 +16,6 @@
(:require [clojure.string :as str]) (:require [clojure.string :as str])
(:require [clojure.set :as set]) (:require [clojure.set :as set])
(:use [clojure.java.io :only [file]]) (:use [clojure.java.io :only [file]])
(:import [org.jsoup Jsoup])
(:import (weka.core Instance Instances FastVector Attribute) (:import (weka.core Instance Instances FastVector Attribute)
(cljml ClojureInstances))) (cljml ClojureInstances)))
@ -535,7 +534,7 @@ split immediately you can use do-split-dataset."
ds (make-dataset ds (make-dataset
:docs [{:class [:no :yes]} {:title nil} {:fulltext nil}] :docs [{:class [:no :yes]} {:title nil} {:fulltext nil}]
(for [doc docs-keep-n] (for [doc docs-keep-n]
(let [orig-fulltext (.text (Jsoup/parse (or (:fulltext doc) (:extracted doc) ""))) (let [orig-fulltext (:fulltext doc "")
fulltext (str/replace orig-fulltext #"\s+" " ") fulltext (str/replace orig-fulltext #"\s+" " ")
fulltext-fixed (str/replace fulltext #"[^ \w\d]" "") fulltext-fixed (str/replace fulltext #"[^ \w\d]" "")
title (str/replace (:title doc "") #"[^ \w\d]" "") title (str/replace (:title doc "") #"[^ \w\d]" "")

View file

@ -1,200 +0,0 @@
;;
;; User interface utilities
;; @author Antonio Garrote
;;
(ns #^{:author "Antonio Garrote <antoniogarrote@gmail.com>"}
clj-ml.ui
"Namespace containing functions for plotting classifiers, clusterers and data sets."
(:use (clj-ml data utils clusterers)
(incanter core stats charts))
(:import (weka.clusterers ClusterEvaluation SimpleKMeans)))
(defn visualize-plot [plot]
"Prepare a plot to be displayed"
(do (clear-background plot)
(view plot)
plot))
(defmulti display-object
"Displays some kind of clj-ml object"
(fn [kind chart data opts] [kind chart]))
(defmethod display-object [:dataset :boxplot]
([kind chart dataset-opts display-opts]
(let [dataset (get dataset-opts :dataset)
dataseq (dataset-seq dataset)
cols (get dataset-opts :cols)
cols-names (dataset-format dataset)
vals-map (reduce (fn [acum col]
(let [name (name (nth cols-names col))
vals (map #(nth (instance-to-vector %1) col) dataseq)]
(conj acum {name vals})))
{}
cols)
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Box Plot"))
legend (if (nil? (get display-opts :legend)) true (get display-opts :legend))
should-display (get display-opts :visualize)]
(loop [plot nil
ks (keys vals-map)]
(if (empty? ks)
(if should-display
(visualize-plot plot)
plot)
(let [this-val (get vals-map (first ks))
the-plot (if (nil? plot)
(box-plot this-val :title title :legend legend :series-label (name (first ks)))
(do (add-box-plot plot this-val :series-label (name (first ks)))
plot))]
(recur the-plot (rest ks))))))))
(defmethod display-object [:dataset :scatter-plot]
([kind chart dataset-opts display-opts]
(let [dataset (get dataset-opts :dataset)
dataseq (dataset-seq dataset)
cols (get dataset-opts :cols)
col-0 (nth cols 0)
col-1 (nth cols 1)
group-by (get dataset-opts :group-by)
cols-names (dataset-format dataset)
group-vals (if (nil? group-by) {:no-group-by :no-class} (dataset-values-at dataset group-by))
acum-map (reduce (fn [acum group-val]
(conj acum {(first group-val)
(reduce (fn [acum x] (conj acum {x []}))
{}
cols)}))
{}
group-vals)
folded-points (reduce (fn [acum instance]
(let [inst (instance-to-vector instance)
val-0 (nth inst col-0)
val-1 (nth inst col-1)
class (if (nil? group-by)
:no-group-by
(nth inst group-by))]
(merge-with
(fn [a b] {col-0 (conj (get a col-0)
(get b col-0))
col-1 (conj (get a col-1)
(get b col-1))})
acum
{class {col-0 val-0 col-1 val-1}})))
acum-map
dataseq)
title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot ("
(name (nth cols-names col-0)) " vs "
(name (nth cols-names col-1)) ")"))
legend (if (nil? (get display-opts :legend)) true (get display-opts :legend))
should-display (get display-opts :visualize)]
(loop [plot nil
ks (keys folded-points)]
(if (empty? ks)
(if should-display
(visualize-plot plot)
plot)
(let [this-vals (get folded-points (first ks))
this-val-0 (get this-vals col-0)
this-val-1 (get this-vals col-1)
the-plot (if (nil? plot)
(scatter-plot this-val-0 this-val-1
:title title
:x-label (name (nth cols-names col-0))
:y-label (name (nth cols-names col-1))
:series-label (name (first ks))
:legend legend)
(do (add-points plot this-val-0 this-val-1 :series-label (name (first ks)))
plot))]
(recur the-plot (rest ks))))))))
;; visualization of different objects
(defn dataset-display-numeric-attributes [dataset attributes & visualization-options]
"Displays the provided attributes into a box plot"
(let [attr (map #(if (keyword? %1) (dataset-index-attr dataset %1) %1) attributes)
options-pre (first-or-default visualization-options {})
options (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)]
(display-object :dataset :boxplot {:dataset dataset :cols attr} options)))
(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options]
"Displays how a pair of attributes are distributed for each class"
(let [attr-x (if (keyword? attribute-x) (dataset-index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (dataset-index-attr dataset attribute-y) attribute-y)
options-pre (first-or-default visualization-options {})
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
class-index (dataset-get-class dataset)]
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts)))
(defn dataset-display-attributes [dataset attribute-x attribute-y & visualization-options]
"Displays the distribution of a set of attributes for a dataset"
(let [attr-x (if (keyword? attribute-x) (datset-index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (datset-index-attr dataset attribute-y) attribute-y)
options-pre (first-or-default visualization-options {})
opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)
class-index (dataset-get-class dataset)]
(display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y]} opts)))
;; visualization
(defmulti clusterer-display-for-attributes
(fn [clusterer dataset attribute-x attribute-y] (class clusterer)))
(defmethod clusterer-display-for-attributes SimpleKMeans
([clusterer dataset attribute-x attribute-y & visualization-options]
(let [attr-x (if (keyword? attribute-x) (dataset-index-attr dataset attribute-x) attribute-x)
attr-y (if (keyword? attribute-y) (dataset-index-attr dataset attribute-y) attribute-y)
opts (first-or-default visualization-options {})
display? (if (= (get visualization-options :visualize) false)
false
true)
true-opts (conj opts {:visualize false})
plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts)
info (clusterer-info clusterer)
centroids (:centroids info)]
(do
(loop [ks (keys centroids)]
(if (empty? ks)
(if display?
(visualize-plot plot)
plot)
(let [k (first ks)
centroid (get centroids k)
val-x (instance-value-at centroid attr-x)
val-y (instance-value-at centroid attr-y)]
(add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")"))
(recur (rest ks)))))))))
;; Things to load to test this from slime
;(defn load-test-from-slime []
; (do
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/joda-time-1.6.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/opencsv-2.0.1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/classes/")
; (add-classpath "file:///Applications/weka-3-6-2/weka.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/src/")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-charts-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-core-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-io-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-processing-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-chrono-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-full-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-mongodb-1.0-master-SNAPSHOT.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jfreechart-1.0.13.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/parallelcolt-0.7.2.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/arpack-combo-0.1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/gnujaxp-1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-json-1.1-20091229.021828-4.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-db-object-0.1.1-20091229.021828-2.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jcommon-1.0.16.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/netlib-java-0.9.1.jar")
; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/processing-core-1.jar")
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/congomongo-0.1.1-20091229.021828-1.jar")
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-1.0.jar")
; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-java-driver-1.1.0-20091229.021828-3.jar")
; ))