diff --git a/src/clj_ml/data.clj b/src/clj_ml/data.clj
index c76c0a4..5bb2d20 100644
--- a/src/clj_ml/data.clj
+++ b/src/clj_ml/data.clj
@@ -12,6 +12,7 @@
    that can be transformed using usual Clojure functions like map, reduce, etc."
   (:use [clj-ml utils]
         [clojure.contrib.seq :only [find-first]])
+  (:require [clj-ml.filters :as filters])
   (:import (weka.core Instance Instances FastVector Attribute)
            (cljml ClojureInstances)))
 
@@ -431,3 +432,18 @@ The intention is for this to be used on data-formats and not on datasets with da
     (doto dataset
       (.deleteAttributeAt (int attr-pos))
       (.insertAttributeAt new-attr (int attr-pos)))))
+
+(defn split-dataset
+  "Splits the dataset into two parts based on the percentage given.
+The first dataset returned will have 'percentage ammount of the original dataset and the second has the
+remaining portion. Both datasets are Delay objects that need to be dereffed.  If you want to have the
+split immediately you can use do-split-dataset."
+  [ds percentage]
+  [(delay (filters/remove-percentage ds {:percentage percentage :invert true}))
+   (delay (filters/remove-percentage ds {:percentage percentage}))])
+
+(defn do-split-dataset
+  "Splits the dataset into two parts based on the percentage given. The same as split-dataset but
+actual datasets are returned and not Delay objects that need dereffing."
+  [ds percentage]
+  (map deref (split-dataset ds percentage)))
diff --git a/src/clj_ml/filters.clj b/src/clj_ml/filters.clj
index 21371b8..70eea9e 100644
--- a/src/clj_ml/filters.clj
+++ b/src/clj_ml/filters.clj
@@ -36,7 +36,7 @@
    The previous sample of code could be rewritten with the make-apply-filter function:
 
      (def filtered-ds (make-apply-filter :remove-attributes {:attributes [:a :c]} ds))"
-  (:use [clj-ml data utils options-utils]
+  (:use [clj-ml utils options-utils]
         [clojure.contrib [def :only [defvar defvar-]]])
   (:require [clojure.contrib [string :as str]])
   (:import (weka.filters Filter)
@@ -135,6 +135,13 @@
 
 (deffilter remove-attributes)
 
+(defmethod make-filter-options :remove-percentage
+  ([kind m]
+     (->> (check-option-values m {:percentage "-P"})
+          (check-options m {:invert "-V"}))))
+
+(deffilter remove-percentage)
+
 (defmethod make-filter-options :remove-useless-attributes
   ([kind m]
      (check-option-values m {:max-variance "-M"})))
@@ -170,6 +177,7 @@
    :numeric-to-nominal weka.filters.unsupervised.attribute.NumericToNominal
    :add-attribute weka.filters.unsupervised.attribute.Add
    :remove-attributes weka.filters.unsupervised.attribute.Remove
+   :remove-percentage weka.filters.unsupervised.instance.RemovePercentage
    :remove-useless-attributes weka.filters.unsupervised.attribute.RemoveUseless
    :select-append-attributes weka.filters.unsupervised.attribute.Copy
    :project-attributes weka.filters.unsupervised.attribute.Remove}
@@ -188,6 +196,7 @@
      - :numeric-to-nominal
      - :add-attribute
      - :remove-attributes
+     - :remove-percentage
      - :remove-useless-attributes
      - :select-append-attributes
      - :project-attributes
diff --git a/src/clj_ml/options_utils.clj b/src/clj_ml/options_utils.clj
index 3ff43de..bb1e405 100644
--- a/src/clj_ml/options_utils.clj
+++ b/src/clj_ml/options_utils.clj
@@ -6,7 +6,7 @@
 (ns #^{:author "Ben Mabey <ben@benmabey.com>"
        :skip-wiki true}
   clj-ml.options-utils
-  (:use [clj-ml data])
+  (:use     [clojure.contrib.seq :only [find-first]])
   (:require [clojure.contrib [string :as str]]))
 
 ;; Manipulation of array of options
@@ -26,6 +26,18 @@
       (conj  (conj opts flag) (str val-in-map)))))
 
 
+;; attr-name and dataset-index-attr copy and pasted from data due to Clojure's inability
+;; to handle circular dependencies. :(
+(defn- attr-name [^weka.core.Attribute attr]
+  (.name attr))
+
+(defn- dataset-index-attr
+  "Returns the index of an attribute in the attributes definition of a dataset."
+  [^weka.core.Instances dataset attr]
+  (if (number? attr)
+    attr
+    (find-first #(= (name attr) (attr-name (.attribute dataset (int %)))) (range (.numAttributes dataset)))))
+
 (defn extract-attributes
   "Transforms the :attributes value from m into the appropriate weka flag"
   ([m] (extract-attributes "-R" m))
diff --git a/test/clj_ml/data_test.clj b/test/clj_ml/data_test.clj
index 00007bd..d3d4d70 100644
--- a/test/clj_ml/data_test.clj
+++ b/test/clj_ml/data_test.clj
@@ -180,3 +180,23 @@
     (is (= nil (dataset-class-name ds)))
     (dataset-set-class ds :b)
     (is (= :b (dataset-class-name ds)))))
+
+(deftest split-dataset-test
+  (let [ds (make-dataset "test" [:a {:b [:foo :bar]}]
+                         [[1 :foo]
+                          [2 :bar]
+                          [3 :bar]
+                          [4 :foo]])
+        [a b] (split-dataset ds 25)]
+    (is (= (dataset-count @a) 1))
+    (is (= (dataset-count @b) 3))))
+
+(deftest do-split-dataset-test
+  (let [ds (make-dataset "test" [:a {:b [:foo :bar]}]
+                         [[1 :foo]
+                          [2 :bar]
+                          [3 :bar]
+                          [4 :foo]])
+        [a b] (do-split-dataset ds 25)]
+    (is (= (dataset-count a) 1))
+    (is (= (dataset-count b) 3))))
diff --git a/test/clj_ml/filters_test.clj b/test/clj_ml/filters_test.clj
index f38ae42..09c4bb4 100644
--- a/test/clj_ml/filters_test.clj
+++ b/test/clj_ml/filters_test.clj
@@ -107,6 +107,13 @@
     (is (= (dataset-format res)
            [:b {:c '(:g :m)}]))))
 
+(deftest remove-precentage-test
+  (let [ds (make-dataset :test [:a :b {:c [:g :m]}]
+                                     [ [1 2 :g]
+                                       [2 3 :m]
+                                       [4 2 :m]
+                                       [4 5 :g]])]
+    (is (= (dataset-count (remove-percentage ds {:percentage 75})) 1))))
 
 (deftest make-apply-filter-numeric-to-nominal
   (let [ds (make-dataset :test [:a :b {:c [:g :m]}]