not keywording the instance nominal values by default due to performance

On large datasets if you use dataset-as-maps too much time is taken up
in interning the nominal values time and time again.  For the vast
majority of use cases string values should be fine.  If we were in
clojure-land 100% then keywords would be the better option.. but we
aren't.
This commit is contained in:
Ben Mabey 2010-12-08 16:10:45 -07:00
parent 5d59785f3b
commit 4d18af461e
3 changed files with 7 additions and 7 deletions

View file

@ -256,12 +256,12 @@ If the class is nominal then the string value (not keyword) is returned."
(.classValue instance))
(defn instance-value-at
"Returns the value of an instance attribute"
"Returns the value of an instance attribute. A string, not a keyword is returned."
[^Instance instance pos]
(let [pos (int pos)
attr (.attribute instance pos)]
(if (.isNominal attr)
(keyword (.stringValue instance pos))
(.stringValue instance pos)
(.value instance pos))))
(defn instance-to-list

View file

@ -119,8 +119,8 @@
(deftest working-sequences-and-helpers
(let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]])]
(is (= 2 (dataset-count ds)))
(is (= [{:a 1 :b 2 :c :d} {:a 4 :b 5 :c :e}] (dataset-as-maps ds)))
(is (= [{:a 1 :b 2 :c :d} {:a 4 :b 5 :c :e}] (map #(instance-to-map %1) (dataset-seq ds))))))
(is (= [{:a 1 :b 2 :c "d"} {:a 4 :b 5 :c "e"}] (dataset-as-maps ds)))
(is (= [{:a 1 :b 2 :c "d"} {:a 4 :b 5 :c "e"}] (map #(instance-to-map %1) (dataset-seq ds))))))
(deftest dataset-instance-predicates
(let [ds (make-dataset "test" [:a :b {:c [:d :e]}] [{:a 1 :b 2 :c :d} [4 5 :e]])

View file

@ -174,9 +174,9 @@
{:process inc-nums
:determine-dataset-format rename-attributes} ds)]
(is (= (map instance-to-map (dataset-seq res))
[{:foo 2 :bar 3 :c :g}
{:foo 3 :bar 5 :c :m}
{:foo 5 :bar 9 :c :g}]))))
[{:foo 2 :bar 3 :c "g"}
{:foo 3 :bar 5 :c "m"}
{:foo 5 :bar 9 :c "g"}]))))
(deftest make-apply-filter-clj-batch