Use updated search index format and location. Lots of TODOs still.

This commit is contained in:
Phil Hagelberg 2012-08-09 00:17:03 -07:00
parent 0c22b5d893
commit ec0187d8e7
3 changed files with 78 additions and 100 deletions

View file

@ -7,9 +7,12 @@
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[leiningen-core "2.0.0-SNAPSHOT"]
[clucy "0.3.0"]
[org.clojure/data.xml "0.0.3"]
[bultitude "0.1.7"]
[org.apache.maven.indexer/indexer-core "4.1.3"
:exclusions [org.apache.maven/maven-model
org.sonatype.aether/aether-api
org.sonatype.aether/aether-util]]
[reply "0.1.0-beta8"]
;; once reply bumps its drawbridge dep we can collapse these
[com.cemerick/drawbridge "0.0.6" :exclusions [ring/ring-core]]

View file

@ -1,116 +1,89 @@
(ns leiningen.search
(:require [clojure.java.io :as io]
[clojure.string :as string]
[leiningen.core.user :as user]
[leiningen.core.project :as project]
[leiningen.core.main :as main]
[clucy.core :as clucy]
[leiningen.core.user :as user]
[clj-http.client :as http])
(:import (java.util.zip ZipFile)
(java.net URL)
(java.io File InputStream OutputStream FileOutputStream)))
(:import (org.apache.maven.index ArtifactInfo IteratorSearchRequest MAVEN
NexusIndexer)
(org.apache.maven.index.context IndexingContext)
(org.apache.maven.index.creator
JarFileContentsIndexCreator MavenPluginArtifactInfoIndexCreator
MinimalArtifactInfoIndexCreator)
(org.apache.maven.index.expr UserInputSearchExpression)
(org.apache.maven.index.updater IndexUpdater IndexUpdateRequest
ResourceFetcher)
(org.codehaus.plexus DefaultPlexusContainer PlexusContainer)))
;;; Fetching Indices
(defonce container (DefaultPlexusContainer.))
(defn- unzip [source target-dir]
(let [zip (ZipFile. source)
entries (enumeration-seq (.entries zip))
target-file #(io/file target-dir (.getName %))]
(doseq [entry entries :when (not (.isDirectory entry))
:let [f (target-file entry)]]
(.mkdirs (.getParentFile f))
(io/copy (.getInputStream zip entry) f))))
(defonce indexer (.lookup container NexusIndexer))
(defn ^:internal index-location [url]
(def ^:private default-indexers [(MinimalArtifactInfoIndexCreator.)
(JarFileContentsIndexCreator.)
(MavenPluginArtifactInfoIndexCreator.)])
(defn index-location [url]
(io/file (user/leiningen-home) "indices" (string/replace url #"[:/]" "_")))
(defn ^:internal remote-index-url [url]
(URL. (format "%s/.index/nexus-maven-repository-index.zip" url)))
(defmacro with-context [[context-local id url] & body]
`(let [~context-local (.addIndexingContextForced
indexer ~id ~url nil (index-location ~url)
~url nil default-indexers)]
(locking ~url
(try ~@body
(finally
(.removeIndexingContext indexer ~context-local false))))))
(defn- download [^URL url ^OutputStream out-stream & {:keys [callback]}]
(let [resp (http/get (str url) {:as :stream
:headers {"User-Agent" (main/user-agent)}})
content-len (try (Long/valueOf
(get-in resp [:headers "content-length"]))
(catch Exception _))
in ^InputStream (:body resp) ;closes itself after completion
buf (byte-array 1024)]
(loop [cnt 0]
(let [size (.read in buf)]
(when (pos? size)
(let [cnt* (+ cnt size)]
(.write out-stream buf 0 size)
(when callback
(callback {:byte-count cnt*
:content-len content-len
:percentage (if content-len
(int (* 100 (/ cnt* content-len))))}))
(recur cnt*)))))))
;; TODO: add progress reporting back in
(defn- http-resource-fetcher []
(let [base-url (promise)]
(proxy [ResourceFetcher] []
;; TODO: handle connect/disconnect properly
(connect [id url]
(deliver base-url url))
(disconnect [])
(^java.io.InputStream retrieve [name]
(println "Downloading" name "from" @base-url)
(:body (http/get (str @base-url "/" name) {:as :stream}))))))
(defn- download-index [[id {url :url}]]
(main/info "Downloading index from" id "-" url)
(main/info "This can take a very, very long time. While you wait you might")
(main/info "be interested in searching via the web interfaces at")
(main/info "http://search.maven.org or http://clojars.org.")
(main/info "0%...")
(flush)
(let [index-url ^URL (remote-index-url url)
tmp (File/createTempFile "lein" "index")
tmp-stream (FileOutputStream. tmp)
progress (atom 0)
callback (fn [{:keys [percentage]}]
(when (and main/*info* (not= percentage @progress))
(reset! progress percentage)
(print (str "\r" percentage "%..."))
(flush)))]
(try (if (= "file" (.getProtocol index-url))
(io/copy (.openStream index-url) tmp-stream)
(download index-url tmp-stream :callback callback))
(unzip tmp (index-location url))
(finally (.delete tmp))))
(main/info))
(defn update-index [context]
(.fetchAndUpdateIndex (.lookup container IndexUpdater)
(IndexUpdateRequest. context (http-resource-fetcher))))
(defn- download-needed? [[id {:keys [url]}]]
(not (.exists (index-location url))))
(defn- parse-result [result]
(let [group-id (.groupId result)
artifact-id (.artifactId result)
version (.version result)
classifier (.classifier result)
packaging (.packaging result)
name (if (= group-id artifact-id)
(symbol artifact-id)
(symbol group-id artifact-id))
classifier-opts (and classifier [:classifier classifier])
packaging-opts (if (not= "jar" packaging) [:packaging packaging])]
[(pr-str (into [name version] (concat classifier-opts packaging-opts)))
(.description result)]))
(defn ^:internal ensure-fresh-index [repository]
(try (when (download-needed? repository)
(download-index repository))
true
(catch java.io.IOException _
false)))
;;; Searching
(def ^:private page-size (:search-page-size (:user (user/profiles)) 25))
(defn search-repository [[id {:keys [url]} :as repo] query page]
(if (ensure-fresh-index repo)
(let [location (.getAbsolutePath (index-location url))
fetch-count (* page page-size)
offset (* (dec page) page-size)
results (clucy/search (clucy/disk-index location)
query fetch-count :default-field :a)]
(with-meta (drop offset results) (meta results)))
(binding [*out* *err*]
(println "Warning: couldn't download index for" url))))
(defn ^:internal parse-result [{:keys [u d]}]
(let [[group artifact version classifier] (.split u "\\|")
group (if (not= group artifact) group)
identifier [(symbol group artifact) (format "\"%s\"" version)]]
(if d
[identifier d]
[identifier])))
(defn- print-results [[id] results page]
(when (seq results)
(defn- print-results [id response page]
(when (seq response)
(println " == Results from" id "-" "Showing page" page "/"
(-> results meta :_total-hits (/ page-size) Math/ceil int) "total")
(doseq [result (map parse-result results)]
(apply println result))
(.getTotalHitsCount response))
(doseq [[dep description] (map parse-result response)]
(println dep description))
(println)))
(defn search-repository [[id {:keys [url]}] query page offline?]
(with-context [context id url]
(when-not offline? (update-index context))
(let [search-expression (UserInputSearchExpression. query)
;; TODO: support querying other fields
artifact-id-query (.constructQuery indexer MAVEN/ARTIFACT_ID
search-expression)
request (IteratorSearchRequest. artifact-id-query context)]
(with-open [response (.searchIterator indexer request)]
(print-results id response page)))))
(defn ^:no-project-needed search
"Search remote maven repositories for matching jars.
@ -124,6 +97,8 @@ matches or do more advanced queries such as this:
Also accepts a second parameter for fetching successive pages."
([project query] (search project query 1))
([project query page]
(doseq [repo (:repositories project (:repositories project/defaults))
;; TODO: still some issues with central
(doseq [repo (reverse (:repositories project (:repositories project/defaults)))
:let [page (Integer. page)]]
(print-results repo (search-repository repo query page) page))))
;; TODO: bring back pagination
(search-repository repo query page (:offline? project)))))

View file

@ -3,7 +3,7 @@
(:use [clojure.test]
[leiningen.search]))
(deftest test-searchy
#_(deftest test-searchy
(with-redefs [remote-index-url (constantly
(io/resource "sample-index.zip"))]
(ensure-fresh-index ["test" {:url "http://example.com/repo"}])