Integrated improved parser.

This commit is contained in:
Nicolas Buduroi 2011-01-16 20:05:28 -05:00
parent db934b93fb
commit 97e9877d60
3 changed files with 142 additions and 105 deletions

View file

@ -3,7 +3,9 @@
into an easily consumable format."
(:require [clojure.java.io :as io]
[clojure.string :as str])
(:use [marginalia.html :only (uberdoc-html)]
(:use [marginalia
[html :only (uberdoc-html)]
[parser :only (parse-file)]]
[clojure.contrib
[find-namespaces :only (read-file-ns-decl)]
[command-line :only (print-help with-command-line)]])
@ -13,8 +15,6 @@
(def *test* "./src/cljojo/core.clj")
(def *docs* "./docs")
(def *comment* #"^\s*;;\s?")
(def *divider-text* "\n;;DIVIDER\n")
(def *divider-html* #"\n*<span class=\"c[1]?\">;;DIVIDER</span>\n*")
;; ## File System Utilities
@ -89,14 +89,6 @@
;; ## Source File Analysis
;; This line should be replaced
;; and this one too!
(defn parse [src]
(for [line (line-seq src)]
(if (re-find *comment* line)
{:docs-text (str (str/replace line *comment* ""))}
{:code-text (str line)})))
(defn end-of-block? [cur-group groups lines]
(let [line (first lines)
@ -176,38 +168,6 @@
(= "" (str/trim (str line)))))
(catch Exception e nil))))
(defn parse [src]
(loop [[line & more] (line-seq src) cnum 1 dnum 0 sections []]
(if line
(if (re-find *comment* line)
(recur more
cnum
(inc dnum)
(conj sections {:docs-text (str (str/replace line *comment* "")) :line (+ cnum dnum)}))
(recur more
(inc cnum)
0
(if (docstring-line? (str line) sections)
(conj sections {:docstring-text (str line) :line cnum})
(conj sections {:code-text (str line) :line cnum}))))
sections)))
;; How is this handled?
;; I wonder?
;; No idea ne
(defn gen-doc! [path]
(println "Generating documentation for " path)
(with-open [src (io/reader (io/file path))]
(doseq [section (parse src)]
;; and this?
(println section))))
(defn gen-doc! [path]
(with-open [src (io/reader (io/file path))]
(parse src)))
(re-find *comment* " ;; this is a comment")
(defn path-to-doc [fn]
@ -215,9 +175,7 @@
(read-file-ns-decl)
(second)
(str))
groups (->> fn
(gen-doc!)
(group-lines))]
groups (parse-file fn)]
{:ns ns
:groups groups}))
@ -234,8 +192,7 @@
- :version
"
[output-file-name files-to-analyze props]
(let [docs (map path-to-doc files-to-analyze)
source (uberdoc-html
(let [source (uberdoc-html
output-file-name
props
(map path-to-doc files-to-analyze))]

View file

@ -102,29 +102,24 @@
-> \"<h1>hello world!</h1><br />\"`
"
[docs]
(->> docs
(map #(if (:docs-text %)
(prep-docs-text (:docs-text %))
(prep-docstring-text (:docstring-text %))))
(map replace-special-chars)
(interpose "\n")
(apply str)
(md)))
(-> docs
str
prep-docs-text
replace-special-chars
(md)))
(defn codes-to-html [code-block]
(html [:pre {:class "brush: clojure"} code-block]))
(defn codes-to-html [codes]
(html [:pre {:class "brush: clojure"}
(->> codes
(map :code-text)
(map escape-html)
(interpose "\n")
(apply str))]))
(defn group-to-html [group]
(html
[:tr
[:td {:class "docs"} (docs-to-html (:docs group))]
[:td {:class "codes"} (codes-to-html (:codes group))]]))
(defn section-to-html [section]
(html [:tr
[:td {:class "docs"} (docs-to-html
(if (= (:type section) :comment)
(:raw section)
(:docstring section)))]
[:td {:class "codes"}] (if (= (:type section) :code)
(codes-to-html (:raw section))
"")]))
(defn dependencies-html [deps & header-name]
(let [header-name (or header-name "dependencies")]
@ -221,7 +216,7 @@
[:a {:href "#toc" :class "toc-link"}
"toc"]]]]
[:td {:class "codes"}]]
(map group-to-html (:groups doc))
(map section-to-html (:groups doc))
[:tr
[:td {:class "spacer docs"} "&nbsp;"]
[:td {:class "codes"}]]))

View file

@ -3,53 +3,138 @@
;; Clojure parsing solution.
(ns marginalia.parser
"Provides the parsing facilities for Marginalia."
(:require [clojure.java.io :as io]
[clojure.string :as str])
(:use clojure.contrib.reflect))
(:refer-clojure :exclude [replace])
(:use [clojure.contrib [reflect :only (get-field)]]
[clojure [string :only (join replace)]]))
(defrecord CommentLine [line comment-str])
(deftype Comment [content])
(defn- read-comment
[reader semicolon]
(defmethod print-method Comment [comment ^String out]
(.write out (str \" (.content comment) \")))
(defn read-comment [reader semicolon]
(let [sb (StringBuilder.)]
(.append sb semicolon)
(loop [ch (char (.read reader))]
(if (or (= ch \newline)
(= ch \return)
(= ch -1))
(CommentLine. (.getLineNumber reader) (.toString sb))
(= ch \return))
(Comment. (.toString sb))
(do
(.append sb (Character/toString ch))
(recur (char (.read reader))))))))
(defn make-parse-fn
[src]
(let [rdr (clojure.lang.LineNumberingPushbackReader. (java.io.StringReader. src))]
(defn set-comment-reader [reader]
(aset (get-field clojure.lang.LispReader :macros nil)
(int \;)
reader))
(defn skip-spaces [rdr]
(loop [c (.read rdr)]
(cond (= c -1) nil
(#{\space \tab \return \newline \,} (char c))
(recur (.read rdr))
:else (.unread rdr c))))
(defn parse* [reader]
(take-while
:form
(repeatedly
(fn []
(let [old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \;))]
(aset (get-field clojure.lang.LispReader :macros nil) (int \;) read-comment)
(let [result (read rdr)]
(aset (get-field clojure.lang.LispReader :macros nil) (int \;) old-cmt-rdr)
result)))))
(skip-spaces reader)
(let [start (.getLineNumber reader)
form (. clojure.lang.LispReader
(read reader false nil false))
end (if (instance? Comment form)
start
(.getLineNumber reader))]
{:form form :start start :end end})))))
(defn parse-file
[filepath]
(let [parser! (make-parse-fn (slurp filepath))]
(loop [lines []]
(if-let [result (try (parser!) (catch Exception _ nil))]
(recur (conj lines result))
lines))))
(defn comment? [o]
(->> o :form (instance? Comment)))
(comment
(parse-file "../marginalia/src/marginalia/parser.clj")
(defn strip-docstring [docstring raw]
(-> raw
(replace (str \" docstring \") "")
(replace #"\n\s*\n" "\n")
(replace #"\n\s*\)" ")")))
(aget (get-field clojure.lang.LispReader :macros nil) (int (first "^")))
(defn get-var-docstring [nspace-sym sym]
(try
(-> `(var ~(symbol (str nspace-sym) (str sym))) eval meta :doc)
;; HACK: to handle types
(catch Exception _)))
(def R (make-parse-fn "(def ^{:doc \"this is doc\" :author \"fogus\"}
foo
^{:gah :goo}
[1 2 3])"))
(def e (R))
(map meta e)
)
(defn extract-docstring [form raw nspace-sym]
(when (re-find #"^(def|ns)" (-> form first name))
(let [sym (-> form second)
_ (when-not nspace-sym (require sym))
nspace (find-ns sym)]
(let [docstring (if nspace
(-> nspace meta :doc)
(get-var-docstring nspace-sym sym))]
[docstring
(strip-docstring docstring raw)
(if nspace sym nspace-sym)]))))
(defn- ->str [m]
(replace (-> m :form .content) #"^;+\s*" ""))
(defn merge-comments [f s]
{:form (Comment. (str (->str f) "\n" (->str s)))
:start (:start f)
:end (:end s)})
(defn arrange-in-sections [parsed-code raw-code]
(loop [sections []
f (first parsed-code)
s (second parsed-code)
nn (nnext parsed-code)
nspace nil]
(if f
(cond
;; ignore comments with only one semicolon
(and (comment? f) (re-find #"^;\s" (-> f :form .content)))
(recur sections s (first nn) (next nn) nspace)
;; merging comments block
(and (comment? f) (comment? s)
(= (-> f :end) (-> s :start dec)))
(recur sections (merge-comments f s)
(first nn) (next nn)
nspace)
;; adding comment section
(comment? f)
(recur (conj sections (assoc f :type :comment :raw (->str f)))
s
(first nn) (next nn)
nspace)
;; adding code section
:else
(let [raw-code (join "\n" (subvec raw-code (-> f :start dec) (:end f)))
[docstring raw-code nspace]
(extract-docstring (:form f) raw-code nspace)]
(recur (conj sections (assoc f
:type :code
:raw raw-code
:docstring docstring))
s
(first nn) (next nn)
nspace)))
sections)))
(defn parse [source-string]
(let [make-reader #(java.io.BufferedReader.
(java.io.StringReader. (str source-string "\n")))
lines (vec (line-seq (make-reader)))
reader (clojure.lang.LineNumberingPushbackReader. (make-reader))
old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \;))]
(try
(set-comment-reader read-comment)
(let [parsed-code (doall (parse* reader))]
(set-comment-reader old-cmt-rdr)
(arrange-in-sections parsed-code lines))
(catch Exception e
(set-comment-reader old-cmt-rdr)
(throw e)))))
(defn parse-file [file]
(parse (slurp file)))