Use updated search index format and location. Lots of TODOs still.

This commit is contained in:
Phil Hagelberg 2012-08-09 00:17:03 -07:00
parent 0c22b5d893
commit ec0187d8e7
3 changed files with 78 additions and 100 deletions

View file

@ -7,9 +7,12 @@
:license {:name "Eclipse Public License" :license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"} :url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[leiningen-core "2.0.0-SNAPSHOT"] :dependencies [[leiningen-core "2.0.0-SNAPSHOT"]
[clucy "0.3.0"]
[org.clojure/data.xml "0.0.3"] [org.clojure/data.xml "0.0.3"]
[bultitude "0.1.7"] [bultitude "0.1.7"]
[org.apache.maven.indexer/indexer-core "4.1.3"
:exclusions [org.apache.maven/maven-model
org.sonatype.aether/aether-api
org.sonatype.aether/aether-util]]
[reply "0.1.0-beta8"] [reply "0.1.0-beta8"]
;; once reply bumps its drawbridge dep we can collapse these ;; once reply bumps its drawbridge dep we can collapse these
[com.cemerick/drawbridge "0.0.6" :exclusions [ring/ring-core]] [com.cemerick/drawbridge "0.0.6" :exclusions [ring/ring-core]]

View file

@ -1,116 +1,89 @@
(ns leiningen.search (ns leiningen.search
(:require [clojure.java.io :as io] (:require [clojure.java.io :as io]
[clojure.string :as string] [clojure.string :as string]
[leiningen.core.user :as user]
[leiningen.core.project :as project] [leiningen.core.project :as project]
[leiningen.core.main :as main] [leiningen.core.user :as user]
[clucy.core :as clucy]
[clj-http.client :as http]) [clj-http.client :as http])
(:import (java.util.zip ZipFile) (:import (org.apache.maven.index ArtifactInfo IteratorSearchRequest MAVEN
(java.net URL) NexusIndexer)
(java.io File InputStream OutputStream FileOutputStream))) (org.apache.maven.index.context IndexingContext)
(org.apache.maven.index.creator
JarFileContentsIndexCreator MavenPluginArtifactInfoIndexCreator
MinimalArtifactInfoIndexCreator)
(org.apache.maven.index.expr UserInputSearchExpression)
(org.apache.maven.index.updater IndexUpdater IndexUpdateRequest
ResourceFetcher)
(org.codehaus.plexus DefaultPlexusContainer PlexusContainer)))
;;; Fetching Indices (defonce container (DefaultPlexusContainer.))
(defn- unzip [source target-dir] (defonce indexer (.lookup container NexusIndexer))
(let [zip (ZipFile. source)
entries (enumeration-seq (.entries zip))
target-file #(io/file target-dir (.getName %))]
(doseq [entry entries :when (not (.isDirectory entry))
:let [f (target-file entry)]]
(.mkdirs (.getParentFile f))
(io/copy (.getInputStream zip entry) f))))
(defn ^:internal index-location [url] (def ^:private default-indexers [(MinimalArtifactInfoIndexCreator.)
(JarFileContentsIndexCreator.)
(MavenPluginArtifactInfoIndexCreator.)])
(defn index-location [url]
(io/file (user/leiningen-home) "indices" (string/replace url #"[:/]" "_"))) (io/file (user/leiningen-home) "indices" (string/replace url #"[:/]" "_")))
(defn ^:internal remote-index-url [url] (defmacro with-context [[context-local id url] & body]
(URL. (format "%s/.index/nexus-maven-repository-index.zip" url))) `(let [~context-local (.addIndexingContextForced
indexer ~id ~url nil (index-location ~url)
~url nil default-indexers)]
(locking ~url
(try ~@body
(finally
(.removeIndexingContext indexer ~context-local false))))))
(defn- download [^URL url ^OutputStream out-stream & {:keys [callback]}] ;; TODO: add progress reporting back in
(let [resp (http/get (str url) {:as :stream (defn- http-resource-fetcher []
:headers {"User-Agent" (main/user-agent)}}) (let [base-url (promise)]
content-len (try (Long/valueOf (proxy [ResourceFetcher] []
(get-in resp [:headers "content-length"])) ;; TODO: handle connect/disconnect properly
(catch Exception _)) (connect [id url]
in ^InputStream (:body resp) ;closes itself after completion (deliver base-url url))
buf (byte-array 1024)] (disconnect [])
(loop [cnt 0] (^java.io.InputStream retrieve [name]
(let [size (.read in buf)] (println "Downloading" name "from" @base-url)
(when (pos? size) (:body (http/get (str @base-url "/" name) {:as :stream}))))))
(let [cnt* (+ cnt size)]
(.write out-stream buf 0 size)
(when callback
(callback {:byte-count cnt*
:content-len content-len
:percentage (if content-len
(int (* 100 (/ cnt* content-len))))}))
(recur cnt*)))))))
(defn- download-index [[id {url :url}]] (defn update-index [context]
(main/info "Downloading index from" id "-" url) (.fetchAndUpdateIndex (.lookup container IndexUpdater)
(main/info "This can take a very, very long time. While you wait you might") (IndexUpdateRequest. context (http-resource-fetcher))))
(main/info "be interested in searching via the web interfaces at")
(main/info "http://search.maven.org or http://clojars.org.")
(main/info "0%...")
(flush)
(let [index-url ^URL (remote-index-url url)
tmp (File/createTempFile "lein" "index")
tmp-stream (FileOutputStream. tmp)
progress (atom 0)
callback (fn [{:keys [percentage]}]
(when (and main/*info* (not= percentage @progress))
(reset! progress percentage)
(print (str "\r" percentage "%..."))
(flush)))]
(try (if (= "file" (.getProtocol index-url))
(io/copy (.openStream index-url) tmp-stream)
(download index-url tmp-stream :callback callback))
(unzip tmp (index-location url))
(finally (.delete tmp))))
(main/info))
(defn- download-needed? [[id {:keys [url]}]] (defn- parse-result [result]
(not (.exists (index-location url)))) (let [group-id (.groupId result)
artifact-id (.artifactId result)
version (.version result)
classifier (.classifier result)
packaging (.packaging result)
name (if (= group-id artifact-id)
(symbol artifact-id)
(symbol group-id artifact-id))
classifier-opts (and classifier [:classifier classifier])
packaging-opts (if (not= "jar" packaging) [:packaging packaging])]
[(pr-str (into [name version] (concat classifier-opts packaging-opts)))
(.description result)]))
(defn ^:internal ensure-fresh-index [repository] (defn- print-results [id response page]
(try (when (download-needed? repository) (when (seq response)
(download-index repository))
true
(catch java.io.IOException _
false)))
;;; Searching
(def ^:private page-size (:search-page-size (:user (user/profiles)) 25))
(defn search-repository [[id {:keys [url]} :as repo] query page]
(if (ensure-fresh-index repo)
(let [location (.getAbsolutePath (index-location url))
fetch-count (* page page-size)
offset (* (dec page) page-size)
results (clucy/search (clucy/disk-index location)
query fetch-count :default-field :a)]
(with-meta (drop offset results) (meta results)))
(binding [*out* *err*]
(println "Warning: couldn't download index for" url))))
(defn ^:internal parse-result [{:keys [u d]}]
(let [[group artifact version classifier] (.split u "\\|")
group (if (not= group artifact) group)
identifier [(symbol group artifact) (format "\"%s\"" version)]]
(if d
[identifier d]
[identifier])))
(defn- print-results [[id] results page]
(when (seq results)
(println " == Results from" id "-" "Showing page" page "/" (println " == Results from" id "-" "Showing page" page "/"
(-> results meta :_total-hits (/ page-size) Math/ceil int) "total") (.getTotalHitsCount response))
(doseq [result (map parse-result results)] (doseq [[dep description] (map parse-result response)]
(apply println result)) (println dep description))
(println))) (println)))
(defn search-repository [[id {:keys [url]}] query page offline?]
(with-context [context id url]
(when-not offline? (update-index context))
(let [search-expression (UserInputSearchExpression. query)
;; TODO: support querying other fields
artifact-id-query (.constructQuery indexer MAVEN/ARTIFACT_ID
search-expression)
request (IteratorSearchRequest. artifact-id-query context)]
(with-open [response (.searchIterator indexer request)]
(print-results id response page)))))
(defn ^:no-project-needed search (defn ^:no-project-needed search
"Search remote maven repositories for matching jars. "Search remote maven repositories for matching jars.
@ -124,6 +97,8 @@ matches or do more advanced queries such as this:
Also accepts a second parameter for fetching successive pages." Also accepts a second parameter for fetching successive pages."
([project query] (search project query 1)) ([project query] (search project query 1))
([project query page] ([project query page]
(doseq [repo (:repositories project (:repositories project/defaults)) ;; TODO: still some issues with central
(doseq [repo (reverse (:repositories project (:repositories project/defaults)))
:let [page (Integer. page)]] :let [page (Integer. page)]]
(print-results repo (search-repository repo query page) page)))) ;; TODO: bring back pagination
(search-repository repo query page (:offline? project)))))

View file

@ -3,7 +3,7 @@
(:use [clojure.test] (:use [clojure.test]
[leiningen.search])) [leiningen.search]))
(deftest test-searchy #_(deftest test-searchy
(with-redefs [remote-index-url (constantly (with-redefs [remote-index-url (constantly
(io/resource "sample-index.zip"))] (io/resource "sample-index.zip"))]
(ensure-fresh-index ["test" {:url "http://example.com/repo"}]) (ensure-fresh-index ["test" {:url "http://example.com/repo"}])