Integrated improved parser.

This commit is contained in:
Nicolas Buduroi 2011-01-16 20:05:28 -05:00
parent db934b93fb
commit 97e9877d60
3 changed files with 142 additions and 105 deletions

View file

@ -3,7 +3,9 @@
into an easily consumable format." into an easily consumable format."
(:require [clojure.java.io :as io] (:require [clojure.java.io :as io]
[clojure.string :as str]) [clojure.string :as str])
(:use [marginalia.html :only (uberdoc-html)] (:use [marginalia
[html :only (uberdoc-html)]
[parser :only (parse-file)]]
[clojure.contrib [clojure.contrib
[find-namespaces :only (read-file-ns-decl)] [find-namespaces :only (read-file-ns-decl)]
[command-line :only (print-help with-command-line)]]) [command-line :only (print-help with-command-line)]])
@ -13,8 +15,6 @@
(def *test* "./src/cljojo/core.clj") (def *test* "./src/cljojo/core.clj")
(def *docs* "./docs") (def *docs* "./docs")
(def *comment* #"^\s*;;\s?") (def *comment* #"^\s*;;\s?")
(def *divider-text* "\n;;DIVIDER\n")
(def *divider-html* #"\n*<span class=\"c[1]?\">;;DIVIDER</span>\n*")
;; ## File System Utilities ;; ## File System Utilities
@ -89,14 +89,6 @@
;; ## Source File Analysis ;; ## Source File Analysis
;; This line should be replaced
;; and this one too!
(defn parse [src]
(for [line (line-seq src)]
(if (re-find *comment* line)
{:docs-text (str (str/replace line *comment* ""))}
{:code-text (str line)})))
(defn end-of-block? [cur-group groups lines] (defn end-of-block? [cur-group groups lines]
(let [line (first lines) (let [line (first lines)
@ -176,38 +168,6 @@
(= "" (str/trim (str line))))) (= "" (str/trim (str line)))))
(catch Exception e nil)))) (catch Exception e nil))))
(defn parse [src]
(loop [[line & more] (line-seq src) cnum 1 dnum 0 sections []]
(if line
(if (re-find *comment* line)
(recur more
cnum
(inc dnum)
(conj sections {:docs-text (str (str/replace line *comment* "")) :line (+ cnum dnum)}))
(recur more
(inc cnum)
0
(if (docstring-line? (str line) sections)
(conj sections {:docstring-text (str line) :line cnum})
(conj sections {:code-text (str line) :line cnum}))))
sections)))
;; How is this handled?
;; I wonder?
;; No idea ne
(defn gen-doc! [path]
(println "Generating documentation for " path)
(with-open [src (io/reader (io/file path))]
(doseq [section (parse src)]
;; and this?
(println section))))
(defn gen-doc! [path]
(with-open [src (io/reader (io/file path))]
(parse src)))
(re-find *comment* " ;; this is a comment") (re-find *comment* " ;; this is a comment")
(defn path-to-doc [fn] (defn path-to-doc [fn]
@ -215,9 +175,7 @@
(read-file-ns-decl) (read-file-ns-decl)
(second) (second)
(str)) (str))
groups (->> fn groups (parse-file fn)]
(gen-doc!)
(group-lines))]
{:ns ns {:ns ns
:groups groups})) :groups groups}))
@ -234,8 +192,7 @@
- :version - :version
" "
[output-file-name files-to-analyze props] [output-file-name files-to-analyze props]
(let [docs (map path-to-doc files-to-analyze) (let [source (uberdoc-html
source (uberdoc-html
output-file-name output-file-name
props props
(map path-to-doc files-to-analyze))] (map path-to-doc files-to-analyze))]

View file

@ -102,29 +102,24 @@
-> \"<h1>hello world!</h1><br />\"` -> \"<h1>hello world!</h1><br />\"`
" "
[docs] [docs]
(->> docs (-> docs
(map #(if (:docs-text %) str
(prep-docs-text (:docs-text %)) prep-docs-text
(prep-docstring-text (:docstring-text %)))) replace-special-chars
(map replace-special-chars)
(interpose "\n")
(apply str)
(md))) (md)))
(defn codes-to-html [code-block]
(html [:pre {:class "brush: clojure"} code-block]))
(defn codes-to-html [codes] (defn section-to-html [section]
(html [:pre {:class "brush: clojure"} (html [:tr
(->> codes [:td {:class "docs"} (docs-to-html
(map :code-text) (if (= (:type section) :comment)
(map escape-html) (:raw section)
(interpose "\n") (:docstring section)))]
(apply str))])) [:td {:class "codes"}] (if (= (:type section) :code)
(codes-to-html (:raw section))
(defn group-to-html [group] "")]))
(html
[:tr
[:td {:class "docs"} (docs-to-html (:docs group))]
[:td {:class "codes"} (codes-to-html (:codes group))]]))
(defn dependencies-html [deps & header-name] (defn dependencies-html [deps & header-name]
(let [header-name (or header-name "dependencies")] (let [header-name (or header-name "dependencies")]
@ -221,7 +216,7 @@
[:a {:href "#toc" :class "toc-link"} [:a {:href "#toc" :class "toc-link"}
"toc"]]]] "toc"]]]]
[:td {:class "codes"}]] [:td {:class "codes"}]]
(map group-to-html (:groups doc)) (map section-to-html (:groups doc))
[:tr [:tr
[:td {:class "spacer docs"} "&nbsp;"] [:td {:class "spacer docs"} "&nbsp;"]
[:td {:class "codes"}]])) [:td {:class "codes"}]]))

View file

@ -3,53 +3,138 @@
;; Clojure parsing solution. ;; Clojure parsing solution.
(ns marginalia.parser (ns marginalia.parser
"Provides the parsing facilities for Marginalia." "Provides the parsing facilities for Marginalia."
(:require [clojure.java.io :as io] (:refer-clojure :exclude [replace])
[clojure.string :as str]) (:use [clojure.contrib [reflect :only (get-field)]]
(:use clojure.contrib.reflect)) [clojure [string :only (join replace)]]))
(defrecord CommentLine [line comment-str]) (deftype Comment [content])
(defn- read-comment (defmethod print-method Comment [comment ^String out]
[reader semicolon] (.write out (str \" (.content comment) \")))
(defn read-comment [reader semicolon]
(let [sb (StringBuilder.)] (let [sb (StringBuilder.)]
(.append sb semicolon) (.append sb semicolon)
(loop [ch (char (.read reader))] (loop [ch (char (.read reader))]
(if (or (= ch \newline) (if (or (= ch \newline)
(= ch \return) (= ch \return))
(= ch -1)) (Comment. (.toString sb))
(CommentLine. (.getLineNumber reader) (.toString sb))
(do (do
(.append sb (Character/toString ch)) (.append sb (Character/toString ch))
(recur (char (.read reader)))))))) (recur (char (.read reader))))))))
(defn make-parse-fn (defn set-comment-reader [reader]
[src] (aset (get-field clojure.lang.LispReader :macros nil)
(let [rdr (clojure.lang.LineNumberingPushbackReader. (java.io.StringReader. src))] (int \;)
reader))
(defn skip-spaces [rdr]
(loop [c (.read rdr)]
(cond (= c -1) nil
(#{\space \tab \return \newline \,} (char c))
(recur (.read rdr))
:else (.unread rdr c))))
(defn parse* [reader]
(take-while
:form
(repeatedly
(fn [] (fn []
(let [old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \;))] (skip-spaces reader)
(aset (get-field clojure.lang.LispReader :macros nil) (int \;) read-comment) (let [start (.getLineNumber reader)
(let [result (read rdr)] form (. clojure.lang.LispReader
(aset (get-field clojure.lang.LispReader :macros nil) (int \;) old-cmt-rdr) (read reader false nil false))
result))))) end (if (instance? Comment form)
start
(.getLineNumber reader))]
{:form form :start start :end end})))))
(defn parse-file (defn comment? [o]
[filepath] (->> o :form (instance? Comment)))
(let [parser! (make-parse-fn (slurp filepath))]
(loop [lines []]
(if-let [result (try (parser!) (catch Exception _ nil))]
(recur (conj lines result))
lines))))
(comment (defn strip-docstring [docstring raw]
(parse-file "../marginalia/src/marginalia/parser.clj") (-> raw
(replace (str \" docstring \") "")
(replace #"\n\s*\n" "\n")
(replace #"\n\s*\)" ")")))
(aget (get-field clojure.lang.LispReader :macros nil) (int (first "^"))) (defn get-var-docstring [nspace-sym sym]
(try
(-> `(var ~(symbol (str nspace-sym) (str sym))) eval meta :doc)
;; HACK: to handle types
(catch Exception _)))
(def R (make-parse-fn "(def ^{:doc \"this is doc\" :author \"fogus\"} (defn extract-docstring [form raw nspace-sym]
foo (when (re-find #"^(def|ns)" (-> form first name))
^{:gah :goo} (let [sym (-> form second)
[1 2 3])")) _ (when-not nspace-sym (require sym))
nspace (find-ns sym)]
(let [docstring (if nspace
(-> nspace meta :doc)
(get-var-docstring nspace-sym sym))]
[docstring
(strip-docstring docstring raw)
(if nspace sym nspace-sym)]))))
(def e (R)) (defn- ->str [m]
(map meta e) (replace (-> m :form .content) #"^;+\s*" ""))
)
(defn merge-comments [f s]
{:form (Comment. (str (->str f) "\n" (->str s)))
:start (:start f)
:end (:end s)})
(defn arrange-in-sections [parsed-code raw-code]
(loop [sections []
f (first parsed-code)
s (second parsed-code)
nn (nnext parsed-code)
nspace nil]
(if f
(cond
;; ignore comments with only one semicolon
(and (comment? f) (re-find #"^;\s" (-> f :form .content)))
(recur sections s (first nn) (next nn) nspace)
;; merging comments block
(and (comment? f) (comment? s)
(= (-> f :end) (-> s :start dec)))
(recur sections (merge-comments f s)
(first nn) (next nn)
nspace)
;; adding comment section
(comment? f)
(recur (conj sections (assoc f :type :comment :raw (->str f)))
s
(first nn) (next nn)
nspace)
;; adding code section
:else
(let [raw-code (join "\n" (subvec raw-code (-> f :start dec) (:end f)))
[docstring raw-code nspace]
(extract-docstring (:form f) raw-code nspace)]
(recur (conj sections (assoc f
:type :code
:raw raw-code
:docstring docstring))
s
(first nn) (next nn)
nspace)))
sections)))
(defn parse [source-string]
(let [make-reader #(java.io.BufferedReader.
(java.io.StringReader. (str source-string "\n")))
lines (vec (line-seq (make-reader)))
reader (clojure.lang.LineNumberingPushbackReader. (make-reader))
old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \;))]
(try
(set-comment-reader read-comment)
(let [parsed-code (doall (parse* reader))]
(set-comment-reader old-cmt-rdr)
(arrange-in-sections parsed-code lines))
(catch Exception e
(set-comment-reader old-cmt-rdr)
(throw e)))))
(defn parse-file [file]
(parse (slurp file)))