not keywording the instance nominal values by default due to performance

On large datasets if you use dataset-as-maps too much time is taken up
in interning the nominal values time and time again.  For the vast
majority of use cases string values should be fine.  If we were in
clojure-land 100% then keywords would be the better option.. but we
aren't.
This commit is contained in:
Ben Mabey 2010-12-08 16:10:45 -07:00
parent 5d59785f3b
commit 4d18af461e
3 changed files with 7 additions and 7 deletions

View file

@ -256,12 +256,12 @@ If the class is nominal then the string value (not keyword) is returned."
(.classValue instance)) (.classValue instance))
(defn instance-value-at (defn instance-value-at
"Returns the value of an instance attribute" "Returns the value of an instance attribute. A string, not a keyword is returned."
[^Instance instance pos] [^Instance instance pos]
(let [pos (int pos) (let [pos (int pos)
attr (.attribute instance pos)] attr (.attribute instance pos)]
(if (.isNominal attr) (if (.isNominal attr)
(keyword (.stringValue instance pos)) (.stringValue instance pos)
(.value instance pos)))) (.value instance pos))))
(defn instance-to-list (defn instance-to-list

View file

@ -119,8 +119,8 @@
(deftest working-sequences-and-helpers (deftest working-sequences-and-helpers
(let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]])] (let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]])]
(is (= 2 (dataset-count ds))) (is (= 2 (dataset-count ds)))
(is (= [{:a 1 :b 2 :c :d} {:a 4 :b 5 :c :e}] (dataset-as-maps ds))) (is (= [{:a 1 :b 2 :c "d"} {:a 4 :b 5 :c "e"}] (dataset-as-maps ds)))
(is (= [{:a 1 :b 2 :c :d} {:a 4 :b 5 :c :e}] (map #(instance-to-map %1) (dataset-seq ds)))))) (is (= [{:a 1 :b 2 :c "d"} {:a 4 :b 5 :c "e"}] (map #(instance-to-map %1) (dataset-seq ds))))))
(deftest dataset-instance-predicates (deftest dataset-instance-predicates
(let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]]) (let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]])

View file

@ -174,9 +174,9 @@
{:process inc-nums {:process inc-nums
:determine-dataset-format rename-attributes} ds)] :determine-dataset-format rename-attributes} ds)]
(is (= (map instance-to-map (dataset-seq res)) (is (= (map instance-to-map (dataset-seq res))
[{:foo 2 :bar 3 :c :g} [{:foo 2 :bar 3 :c "g"}
{:foo 3 :bar 5 :c :m} {:foo 3 :bar 5 :c "m"}
{:foo 5 :bar 9 :c :g}])))) {:foo 5 :bar 9 :c "g"}]))))
(deftest make-apply-filter-clj-batch (deftest make-apply-filter-clj-batch