extracts Weka Options helpers into own ns due to circ dep issue

This commit is contained in:
Ben Mabey 2011-08-29 16:50:06 -06:00
parent ed02bdd643
commit 6f5c100d95
8 changed files with 76 additions and 59 deletions

View file

@ -1,4 +1,4 @@
(defproject com.leadtune/clj-ml "0.1.2" (defproject com.leadtune/clj-ml "0.1.3-SNAPSHOT"
:description "Machine Learning library for Clojure built around Weka and friends" :description "Machine Learning library for Clojure built around Weka and friends"
:repositories {"leadtune-repo" "http://c0026236.cdn1.cloudfiles.rackspacecloud.com/repo"} :repositories {"leadtune-repo" "http://c0026236.cdn1.cloudfiles.rackspacecloud.com/repo"}
:java-source-path "src/java" :java-source-path "src/java"
@ -11,7 +11,8 @@
[incanter/incanter-charts "1.2.3"] [incanter/incanter-charts "1.2.3"]
[lt/weka "3.6.3"] [lt/weka "3.6.3"]
[hr.irb/fastRandomForest "0.98"]] [hr.irb/fastRandomForest "0.98"]]
:dev-dependencies [[autodoc "0.7.0" :dev-dependencies [[midje "1.3-alpha1"]
[autodoc "0.7.0"
:exclusions [org.clojure/clojure org.clojure/clojure-contrib :exclusions [org.clojure/clojure org.clojure/clojure-contrib
ant/ant ant/ant-launcher]] ;; older ant breaks newer lein ant/ant ant/ant-launcher]] ;; older ant breaks newer lein
[lein-javac "1.2.1-SNAPSHOT" [lein-javac "1.2.1-SNAPSHOT"

View file

@ -60,7 +60,7 @@
(serialize-to-file *classifier* (serialize-to-file *classifier*
\"/Users/antonio.garrote/Desktop/classifier.bin\") \"/Users/antonio.garrote/Desktop/classifier.bin\")
" "
(:use [clj-ml utils data kernel-functions]) (:use [clj-ml utils data kernel-functions options-utils])
(:import (java.util Date Random) (:import (java.util Date Random)
(hr.irb.fastRandomForest FastRandomForest) (hr.irb.fastRandomForest FastRandomForest)
(weka.core Instance Instances) (weka.core Instance Instances)

View file

@ -13,7 +13,7 @@
having the full data set in main memory. Functions for evaluating the clusterer having the full data set in main memory. Functions for evaluating the clusterer
as well as for clustering new instances are also supported as well as for clustering new instances are also supported
" "
(:use [clj-ml utils data distance-functions]) (:use [clj-ml utils data distance-functions options-utils])
(:import (java.util Date Random) (:import (java.util Date Random)
(weka.clusterers ClusterEvaluation SimpleKMeans Cobweb EM))) (weka.clusterers ClusterEvaluation SimpleKMeans Cobweb EM)))

View file

@ -9,7 +9,7 @@
classifiers and clusterers like K-Means. classifiers and clusterers like K-Means.
Euclidean, Manhattan and Chebysev distance functions are supported." Euclidean, Manhattan and Chebysev distance functions are supported."
(:use [clj-ml utils data]) (:use [clj-ml utils data options-utils])
(:import (weka.core EuclideanDistance ManhattanDistance ChebyshevDistance))) (:import (weka.core EuclideanDistance ManhattanDistance ChebyshevDistance)))

View file

@ -36,7 +36,7 @@
The previous sample of code could be rewritten with the make-apply-filter function: The previous sample of code could be rewritten with the make-apply-filter function:
(def filtered-ds (make-apply-filter :remove-attributes {:attributes [:a :c]} ds))" (def filtered-ds (make-apply-filter :remove-attributes {:attributes [:a :c]} ds))"
(:use [clj-ml data utils] (:use [clj-ml data utils options-utils]
[clojure.contrib [def :only [defvar defvar-]]]) [clojure.contrib [def :only [defvar defvar-]]])
(:require [clojure.contrib [string :as str]]) (:require [clojure.contrib [string :as str]])
(:import (weka.filters Filter) (:import (weka.filters Filter)
@ -51,13 +51,6 @@
"Creates the right parameters for a filter. Returns a clojure vector." "Creates the right parameters for a filter. Returns a clojure vector."
(fn [kind map] kind)) (fn [kind map] kind))
(defn- extract-attributes
"Transforms the :attributes value from m into the appropriate weka flag"
[m]
["-R" (str/join ","
(for [attr (:attributes m)]
(inc (dataset-index-attr (:dataset-format m) attr))))])
(declare make-apply-filter) (declare make-apply-filter)
;TODO: consider passing in the make-filter-options body here as well in additon to the docstring. ;TODO: consider passing in the make-filter-options body here as well in additon to the docstring.
(defmacro deffilter (defmacro deffilter

View file

@ -8,7 +8,7 @@
"Kernel functions that can be passed as parameters to support vector machines classifiers. "Kernel functions that can be passed as parameters to support vector machines classifiers.
Polynomic, radial basis and string kernels are supported" Polynomic, radial basis and string kernels are supported"
(:use [clj-ml utils data]) (:use [clj-ml utils data options-utils])
(:import (weka.classifiers.functions.supportVector PolyKernel RBFKernel StringKernel))) (:import (weka.classifiers.functions.supportVector PolyKernel RBFKernel StringKernel)))
(defmulti make-kernel-function-options (defmulti make-kernel-function-options

View file

@ -0,0 +1,68 @@
;;
;; Utilities for converting clojure hash maps into Weka string options
;; @author Ben Mabey
;;
(ns #^{:author "Ben Mabey <ben@benmabey.com>"
:skip-wiki true}
clj-ml.options-utils
(:use [clj-ml data])
(:require [clojure.contrib [string :as str]]))
;; Manipulation of array of options
(defn check-option [opts val flag map]
"Sets an option for a filter"
(let [val-in-map (get map val)]
(if (nil? val-in-map)
opts
(conj opts flag))))
(defn check-option-value [opts val flag map]
"Sets an option with value for a filter"
(let [val-in-map (get map val)]
(if (nil? val-in-map)
opts
(conj (conj opts flag) (str val-in-map)))))
(defn extract-attributes
"Transforms the :attributes value from m into the appropriate weka flag"
([m] (extract-attributes "-R" m))
([flag m] (extract-attributes flag :attributes m))
([flag key-name m]
(if-let [attributes (key-name m)]
[flag (str/join ","
(for [attr attributes]
(inc (dataset-index-attr (:dataset-format m) attr))))]
[])))
; TODO: Raise a helpful exception when the keys don't match up with the provided flags.
(defn check-options
"Checks the presence of a set of options for a filter"
([args-map opts-map] (check-options args-map opts-map []))
( [args-map opts-map tmp]
(loop [rem (keys opts-map)
acum tmp]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option acum k vk args-map)))))))
(defn check-option-values
"Checks the presence of a set of options with value for a filter"
([args-map opts-map] (check-option-values args-map opts-map []))
([args-map opts-map val]
(loop [rem (keys opts-map)
acum val]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option-value acum k vk args-map)))))))

View file

@ -67,51 +67,6 @@
(catch NoSuchAlgorithmException e (catch NoSuchAlgorithmException e
(throw (new RuntimeException e)))))) (throw (new RuntimeException e))))))
;; Manipulation of array of options
(defn check-option [opts val flag map]
"Sets an option for a filter"
(let [val-in-map (get map val)]
(if (nil? val-in-map)
opts
(conj opts flag))))
(defn check-option-value [opts val flag map]
"Sets an option with value for a filter"
(let [val-in-map (get map val)]
(if (nil? val-in-map)
opts
(conj (conj opts flag) (str val-in-map)))))
; TODO: Raise a helpful exception when the keys don't match up with the provided flags.
(defn check-options
"Checks the presence of a set of options for a filter"
([args-map opts-map] (check-options args-map opts-map []))
( [args-map opts-map tmp]
(loop [rem (keys opts-map)
acum tmp]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option acum k vk args-map)))))))
(defn check-option-values
"Checks the presence of a set of options with value for a filter"
([args-map opts-map] (check-option-values args-map opts-map []))
([args-map opts-map val]
(loop [rem (keys opts-map)
acum val]
(if (empty? rem)
acum
(let [k (first rem)
vk (get opts-map k)
rst (rest rem)]
(recur rst
(check-option-value acum k vk args-map)))))))
;; Serializing classifiers ;; Serializing classifiers
(defn serialize (defn serialize