Don't shuffle docs in doc-to-dataset.
This commit is contained in:
parent
c7c7cdd9f1
commit
e11690dd93
1 changed files with 4 additions and 4 deletions
|
@ -524,13 +524,13 @@ split immediately you can use do-split-dataset."
|
||||||
[docs vocab term keep-n datadir & opts]
|
[docs vocab term keep-n datadir & opts]
|
||||||
(let [parsed-opts (apply hash-map opts)
|
(let [parsed-opts (apply hash-map opts)
|
||||||
docs-with-term (filter (fn [doc] (some #{term} (get-in doc [:terms vocab]))) docs)
|
docs-with-term (filter (fn [doc] (some #{term} (get-in doc [:terms vocab]))) docs)
|
||||||
docs-without-term (let [dwt (set/difference (set docs) (set docs-with-term))]
|
docs-without-term (let [dwt (filter (fn [doc] (not-any? #{term} (get-in doc [:terms vocab]))) docs)]
|
||||||
(if (:resample parsed-opts)
|
(if (:resample parsed-opts)
|
||||||
(take (count docs-with-term) dwt)
|
(take (count docs-with-term) dwt)
|
||||||
dwt))
|
dwt))
|
||||||
docs-keep-n (shuffle (if keep-n (concat (take (/ keep-n 2) docs-with-term)
|
docs-keep-n (if keep-n (concat (take (/ keep-n 2) docs-with-term)
|
||||||
(take (/ keep-n 2) docs-without-term))
|
(take (/ keep-n 2) docs-without-term))
|
||||||
(concat docs-with-term docs-without-term)))
|
(concat docs-with-term docs-without-term))
|
||||||
ds (make-dataset
|
ds (make-dataset
|
||||||
:docs [{:class [:no :yes]} {:title nil} {:fulltext nil}]
|
:docs [{:class [:no :yes]} {:title nil} {:fulltext nil}]
|
||||||
(for [doc docs-keep-n]
|
(for [doc docs-keep-n]
|
||||||
|
|
Loading…
Reference in a new issue