diff --git a/src/clj_ml/data.clj b/src/clj_ml/data.clj index 99619cd..a028a65 100644 --- a/src/clj_ml/data.clj +++ b/src/clj_ml/data.clj @@ -245,7 +245,7 @@ (defn instance-value-at "Returns the value of an instance attribute" - [instance pos] + [^Instance instance pos] (let [attr (.attribute instance pos)] (if (.isNominal attr) (let [val (.value instance pos) @@ -260,26 +260,16 @@ (defn instance-to-vector "Builds a vector with the values of the instance" - [instance] - (let [max (.numValues instance)] - (loop [c 0 - acum []] - (if (= c max) - acum - (recur (+ c 1) - (conj acum (instance-value-at instance c))))))) + [^Instance instance] + (vec (map (partial instance-value-at instance) (range (.numValues instance))))) (defn instance-to-map "Builds a vector with the values of the instance" - [instance] - (let [max (.numValues instance)] - (loop [c 0 - acum {}] - (if (= c max) - acum - (recur (+ c 1) - (conj acum {(keyword (. (.attribute instance c) name)) - (instance-value-at instance c)} )))))) + [^Instance instance] + (reduce (fn [m i] + (assoc m (keyword (attribute-name-at instance i)) (instance-value-at instance i))) + {} + (range (.numValues instance)))) ;; manipulation of datasets diff --git a/test/clj_ml/data_test.clj b/test/clj_ml/data_test.clj index efe3125..3b8cd01 100644 --- a/test/clj_ml/data_test.clj +++ b/test/clj_ml/data_test.clj @@ -100,14 +100,11 @@ (is (sequential? seq)))) -(deftest working-sequences +(deftest working-sequences-and-helpers (let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]])] (is (= 2 (dataset-count ds))) - (let [dsm (map #(instance-to-map %1) (dataset-seq ds))] - (is (= 2 (count dsm))) - (is (= 1.0 (:a (first dsm)))) - (let [dsb (make-dataset "test" [:a :b {:c [:d :e]}] dsm)] - (is (= 2 (dataset-count dsb))))))) + (is (= [{:a 1 :b 2 :c :d} {:a 4 :b 5 :c :e}] (dataset-as-maps ds))) + (is (= [{:a 1 :b 2 :c :d} {:a 4 :b 5 :c :e}] (map #(instance-to-map %1) (dataset-seq ds)))))) (deftest dataset-instance-predicates (let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]]) diff --git a/test/clj_ml/filters_test.clj b/test/clj_ml/filters_test.clj index 90d3571..7914e73 100644 --- a/test/clj_ml/filters_test.clj +++ b/test/clj_ml/filters_test.clj @@ -187,7 +187,7 @@ res (make-apply-filter :clj-batch {:process add-max-diff-values :determine-dataset-format add-max-diff-attr} ds)] - (is (= (dataset-as-maps res) - [{:a 1 :max-diff 3} + (is (= [{:a 1 :max-diff 3} {:a 2 :max-diff 2} - {:a 4 :max-diff 0}])))) + {:a 4 :max-diff 0}] + (dataset-as-maps res)))))