From dcf6534ea4b7dd07aecbfc58ec26c39fa0cb56b8 Mon Sep 17 00:00:00 2001 From: Joshua Eckroth Date: Wed, 7 Aug 2013 10:32:24 -0400 Subject: [PATCH] Removed UI code (Weka can do that better) and some other unused or broken dependencies. --- project.clj | 6 +- src/clj_ml/classifiers.clj | 15 --- src/clj_ml/data.clj | 3 +- src/clj_ml/ui.clj | 200 ------------------------------------- 4 files changed, 2 insertions(+), 222 deletions(-) delete mode 100644 src/clj_ml/ui.clj diff --git a/project.clj b/project.clj index 14282a4..ae80687 100644 --- a/project.clj +++ b/project.clj @@ -5,14 +5,10 @@ :url "http://opensource.org/licenses/MIT"} :url "https://github.com/joshuaeckroth/clj-ml" :dependencies [[org.clojure/clojure "1.4.0"] - [incanter/incanter-core "1.4.1"] - [incanter/incanter-charts "1.4.1"] [nz.ac.waikato.cms.weka/weka-stable "3.6.9"] [tw.edu.ntu.csie/libsvm "3.1"] - [org.jsoup/jsoup "1.7.2"] [org.clojure/data.xml "0.0.7"] - [org.apache.lucene/lucene-analyzers-common "4.3.0"] - [org.clojars.chapmanb/fast-random-forest "0.98"]] + [org.apache.lucene/lucene-analyzers-common "4.3.0"]] :profiles {:dev {:plugins [[lein-midje "2.0.0"]] :dependencies [[midje "1.4.0"]]}} diff --git a/src/clj_ml/classifiers.clj b/src/clj_ml/classifiers.clj index ecae7b6..3310f4b 100644 --- a/src/clj_ml/classifiers.clj +++ b/src/clj_ml/classifiers.clj @@ -62,7 +62,6 @@ " (:use [clj-ml utils data kernel-functions options-utils]) (:import (java.util Date Random) - (hr.irb.fastRandomForest FastRandomForest) (weka.core Instance Instances) (weka.classifiers.lazy IBk) (weka.classifiers.trees J48 RandomForest M5P) @@ -241,16 +240,6 @@ :random-seed "-S" :depth "-depth"})))) -(defmethod make-classifier-options [:decision-tree :fast-random-forest] - ([kind algorithm m] - (->> - (check-options m {:debug "-D"}) - (check-option-values m - {:num-trees-in-forest "-I" - :num-features-to-consider "-K" - :random-seed "-S" - :depth "-depth"})))) - (defmethod make-classifier-options [:decision-tree :rotation-forest] ([kind algorithm m] (->> @@ -536,10 +525,6 @@ ([kind algorithm & options] (make-classifier-with kind algorithm RandomForest options))) -(defmethod make-classifier [:decision-tree :fast-random-forest] - ([kind algorithm & options] - (make-classifier-with kind algorithm FastRandomForest options))) - (defmethod make-classifier [:decision-tree :rotation-forest] ([kind algorithm & options] (make-classifier-with kind algorithm RotationForest options))) diff --git a/src/clj_ml/data.clj b/src/clj_ml/data.clj index 68cad88..2ffea90 100644 --- a/src/clj_ml/data.clj +++ b/src/clj_ml/data.clj @@ -16,7 +16,6 @@ (:require [clojure.string :as str]) (:require [clojure.set :as set]) (:use [clojure.java.io :only [file]]) - (:import [org.jsoup Jsoup]) (:import (weka.core Instance Instances FastVector Attribute) (cljml ClojureInstances))) @@ -535,7 +534,7 @@ split immediately you can use do-split-dataset." ds (make-dataset :docs [{:class [:no :yes]} {:title nil} {:fulltext nil}] (for [doc docs-keep-n] - (let [orig-fulltext (.text (Jsoup/parse (or (:fulltext doc) (:extracted doc) ""))) + (let [orig-fulltext (:fulltext doc "") fulltext (str/replace orig-fulltext #"\s+" " ") fulltext-fixed (str/replace fulltext #"[^ \w\d]" "") title (str/replace (:title doc "") #"[^ \w\d]" "") diff --git a/src/clj_ml/ui.clj b/src/clj_ml/ui.clj deleted file mode 100644 index e09a447..0000000 --- a/src/clj_ml/ui.clj +++ /dev/null @@ -1,200 +0,0 @@ -;; -;; User interface utilities -;; @author Antonio Garrote -;; - -(ns #^{:author "Antonio Garrote "} - clj-ml.ui - "Namespace containing functions for plotting classifiers, clusterers and data sets." - (:use (clj-ml data utils clusterers) - (incanter core stats charts)) - (:import (weka.clusterers ClusterEvaluation SimpleKMeans))) - - -(defn visualize-plot [plot] - "Prepare a plot to be displayed" - (do (clear-background plot) - (view plot) - plot)) - -(defmulti display-object - "Displays some kind of clj-ml object" - (fn [kind chart data opts] [kind chart])) - -(defmethod display-object [:dataset :boxplot] - ([kind chart dataset-opts display-opts] - (let [dataset (get dataset-opts :dataset) - dataseq (dataset-seq dataset) - cols (get dataset-opts :cols) - cols-names (dataset-format dataset) - vals-map (reduce (fn [acum col] - (let [name (name (nth cols-names col)) - vals (map #(nth (instance-to-vector %1) col) dataseq)] - (conj acum {name vals}))) - {} - cols) - title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Box Plot")) - legend (if (nil? (get display-opts :legend)) true (get display-opts :legend)) - should-display (get display-opts :visualize)] - (loop [plot nil - ks (keys vals-map)] - (if (empty? ks) - (if should-display - (visualize-plot plot) - plot) - (let [this-val (get vals-map (first ks)) - the-plot (if (nil? plot) - (box-plot this-val :title title :legend legend :series-label (name (first ks))) - (do (add-box-plot plot this-val :series-label (name (first ks))) - plot))] - (recur the-plot (rest ks)))))))) - - -(defmethod display-object [:dataset :scatter-plot] - ([kind chart dataset-opts display-opts] - (let [dataset (get dataset-opts :dataset) - dataseq (dataset-seq dataset) - cols (get dataset-opts :cols) - col-0 (nth cols 0) - col-1 (nth cols 1) - group-by (get dataset-opts :group-by) - cols-names (dataset-format dataset) - group-vals (if (nil? group-by) {:no-group-by :no-class} (dataset-values-at dataset group-by)) - acum-map (reduce (fn [acum group-val] - (conj acum {(first group-val) - (reduce (fn [acum x] (conj acum {x []})) - {} - cols)})) - {} - group-vals) - folded-points (reduce (fn [acum instance] - (let [inst (instance-to-vector instance) - val-0 (nth inst col-0) - val-1 (nth inst col-1) - class (if (nil? group-by) - :no-group-by - (nth inst group-by))] - (merge-with - (fn [a b] {col-0 (conj (get a col-0) - (get b col-0)) - col-1 (conj (get a col-1) - (get b col-1))}) - acum - {class {col-0 val-0 col-1 val-1}}))) - acum-map - dataseq) - title (or (get display-opts :title) (str "Dataset '" (dataset-name dataset) "' Scatter Plot (" - (name (nth cols-names col-0)) " vs " - (name (nth cols-names col-1)) ")")) - legend (if (nil? (get display-opts :legend)) true (get display-opts :legend)) - should-display (get display-opts :visualize)] - (loop [plot nil - ks (keys folded-points)] - (if (empty? ks) - (if should-display - (visualize-plot plot) - plot) - (let [this-vals (get folded-points (first ks)) - this-val-0 (get this-vals col-0) - this-val-1 (get this-vals col-1) - the-plot (if (nil? plot) - (scatter-plot this-val-0 this-val-1 - :title title - :x-label (name (nth cols-names col-0)) - :y-label (name (nth cols-names col-1)) - :series-label (name (first ks)) - :legend legend) - (do (add-points plot this-val-0 this-val-1 :series-label (name (first ks))) - plot))] - (recur the-plot (rest ks)))))))) - - -;; visualization of different objects - -(defn dataset-display-numeric-attributes [dataset attributes & visualization-options] - "Displays the provided attributes into a box plot" - (let [attr (map #(if (keyword? %1) (dataset-index-attr dataset %1) %1) attributes) - options-pre (first-or-default visualization-options {}) - options (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre)] - (display-object :dataset :boxplot {:dataset dataset :cols attr} options))) - -(defn dataset-display-class-for-attributes [dataset attribute-x attribute-y & visualization-options] - "Displays how a pair of attributes are distributed for each class" - (let [attr-x (if (keyword? attribute-x) (dataset-index-attr dataset attribute-x) attribute-x) - attr-y (if (keyword? attribute-y) (dataset-index-attr dataset attribute-y) attribute-y) - options-pre (first-or-default visualization-options {}) - opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre) - class-index (dataset-get-class dataset)] - (display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y] :group-by class-index} opts))) - -(defn dataset-display-attributes [dataset attribute-x attribute-y & visualization-options] - "Displays the distribution of a set of attributes for a dataset" - (let [attr-x (if (keyword? attribute-x) (datset-index-attr dataset attribute-x) attribute-x) - attr-y (if (keyword? attribute-y) (datset-index-attr dataset attribute-y) attribute-y) - options-pre (first-or-default visualization-options {}) - opts (if (nil? (:visualize options-pre)) (conj options-pre {:visualize true}) options-pre) - class-index (dataset-get-class dataset)] - (display-object :dataset :scatter-plot {:dataset dataset :cols [attr-x attr-y]} opts))) - - -;; visualization - -(defmulti clusterer-display-for-attributes - (fn [clusterer dataset attribute-x attribute-y] (class clusterer))) - -(defmethod clusterer-display-for-attributes SimpleKMeans - ([clusterer dataset attribute-x attribute-y & visualization-options] - (let [attr-x (if (keyword? attribute-x) (dataset-index-attr dataset attribute-x) attribute-x) - attr-y (if (keyword? attribute-y) (dataset-index-attr dataset attribute-y) attribute-y) - opts (first-or-default visualization-options {}) - display? (if (= (get visualization-options :visualize) false) - false - true) - true-opts (conj opts {:visualize false}) - plot (dataset-display-class-for-attributes dataset attribute-x attribute-y true-opts) - info (clusterer-info clusterer) - centroids (:centroids info)] - (do - (loop [ks (keys centroids)] - (if (empty? ks) - (if display? - (visualize-plot plot) - plot) - (let [k (first ks) - centroid (get centroids k) - val-x (instance-value-at centroid attr-x) - val-y (instance-value-at centroid attr-y)] - (add-pointer plot val-x val-y :text (str "centroid " k " (" (float val-x) "," (float val-y) ")")) - (recur (rest ks))))))))) - - - -;; Things to load to test this from slime - - ;(defn load-test-from-slime [] - ; (do - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/joda-time-1.6.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/opencsv-2.0.1.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/classes/") - ; (add-classpath "file:///Applications/weka-3-6-2/weka.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/src/") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-charts-1.0-master-SNAPSHOT.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-core-1.0-master-SNAPSHOT.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-io-1.0-master-SNAPSHOT.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-processing-1.0-master-SNAPSHOT.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-chrono-1.0-master-SNAPSHOT.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-full-1.0-master-SNAPSHOT.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/incanter-mongodb-1.0-master-SNAPSHOT.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jfreechart-1.0.13.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/parallelcolt-0.7.2.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/arpack-combo-0.1.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/gnujaxp-1.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-json-1.1-20091229.021828-4.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/clojure-db-object-0.1.1-20091229.021828-2.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/jcommon-1.0.16.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/netlib-java-0.9.1.jar") - ; (add-classpath "file:///Users/antonio.garrote/Development/old/clj-ml/lib/processing-core-1.jar") - ; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/congomongo-0.1.1-20091229.021828-1.jar") - ; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-1.0.jar") - ; (add-classpath"file:///Users/antonio.garrote/Development/old/clj-ml/lib/mongo-java-driver-1.1.0-20091229.021828-3.jar") - ; ))