diff --git a/bin/downloads.clj b/bin/downloads.clj deleted file mode 100644 index a760b944..00000000 --- a/bin/downloads.clj +++ /dev/null @@ -1,83 +0,0 @@ -(ns leiningen.downloads - "Calculate download statistics from logs." - (:require [clojure.java.io] - [clojure.pprint :refer [pprint]] - [clojure.java.shell :refer [sh]])) - -;; Before GitHub shut down its download service all uberjars were -;; hosted there. Here's the latest data we have on it. -(def github {"leiningen-1.6.1.1-standalone.jar" 15143, - "leiningen-1.6.2-standalone.jar" 16640, - "leiningen-1.7.1-standalone.jar" 64026, - "leiningen-full.jpg" 519, - "leiningen-1.5.2-standalone.jar" 24865, - "leiningen-1.6.1-standalone.jar" 9405, - "leiningen-1.7.0-standalone.jar" 10969, - "leiningen-1.4.2-standalone.jar" 31651, - "leiningen-1.5.1-standalone.jar" 290, - "leiningen-1.6.0-standalone.jar" 1065, - "leiningen-1.4.1-standalone.jar" 1606, - "leiningen-1.5.0-standalone.jar" 9575, - "leiningen-1.3.1-standalone.jar" 7905, - "leiningen-1.4.0-standalone.jar" 1589, - "leiningen-1.3.0-SNAPSHOT-standalone.jar" 280, - "leiningen-1.4.0-SNAPSHOT-standalone.jar" 423, - "leiningen-1.3.0-standalone.jar" 2442, - "leiningen-banner.png" 399328, - "leiningen-1.2.0-standalone.jar" 3617, - "leiningen-1.1.0-standalone.jar" 12858, - "leiningen-1.7.0-SNAPSHOT-standalone.jar" 434, - "leiningen-1.6.2-SNAPSHOT-standalone.jar" 637, - "leiningen-1.7.1-SNAPSHOT-standalone.jar" 971, - "leiningen-2.0.0-preview10-standalone.jar" 555530, ; huh? - "leiningen-1.4.0-RC2-standalone.jar" 188, - "leiningen-1.5.0-RC1-standalone.jar" 177, - "leiningen-2.0.0-preview10-standalone.jar.asc" 272, - "leiningen-1.4.0-RC1-standalone.jar" 200, - "leiningen-1.3.0-RC1-standalone.jar" 103, - "leiningen-2.0.0-preview9-standalone.jar" 442, - "leiningen-2.0.0-preview8-standalone.jar" 2050, - "leiningen-2.0.0-preview7-standalone.jar" 8022, - "leiningen-2.0.0-preview6-standalone.jar" 2839, - "leiningen-2.0.0-preview9-standalone.jar.asc" 41, - "leiningen-1.4.0-win32.zip" 70, - "leiningen-2.0.0-preview8-standalone.jar.asc" 37, - "leiningen-1.5.0-win32.zip" 464, - "leiningen-1.4.1-win32.zip" 260, - "leiningen-2.0.0-preview5-standalone.jar" 200, - "leiningen-1.4.2-win32.zip" 1108, - "leiningen-2.0.0-preview4-standalone.jar" 1701, - "leiningen-2.0.0-preview3-standalone.jar" 2029, - "leiningen-2.0.0-preview2-standalone.jar" 1437, - "leiningen-1.5.2-win.zip" 4346, - "lein-win32.zip" 1502, - "leiningen-2.0.0-preview1-standalone.jar" 282}) - -;; filter out non-release-jars -(def github-releases - (into {} (remove (comp (partial re-find #"SNAPSHOT|RC|zip|jpg|png|asc") key) - github))) - -(def total (apply + (vals github-releases))) ; 788178 - -(defn file-for-line [line] - (let [[_ file] (re-find #"\"GET ([^ ]+) " line)] - (if file - (last (.split file "/"))))) - -(defn parse-line [sums line] - (if-let [file (file-for-line line)] - (update-in sums [file] (fnil inc 0)) - sums)) - -(defn parse-file [f] - (with-open [rdr (clojure.java.io/reader f)] - (reduce parse-line {} (line-seq rdr)))) - -(defn parse-dir [d] - (apply merge-with + (->> (.listFiles (java.io.File. d)) - (filter (memfn isFile)) - (map parse-file)))) - -;; TODO: fetch S3 logs? -(def -main parse-dir) diff --git a/bin/leiningen/downloads.clj b/bin/leiningen/downloads.clj new file mode 100644 index 00000000..e9f276cd --- /dev/null +++ b/bin/leiningen/downloads.clj @@ -0,0 +1,113 @@ +(use '[cemerick.pomegranate :only (add-dependencies)]) + +(add-dependencies :coordinates '[[clj-aws-s3 "0.3.6"] + [tentacles "0.2.4"]] + :repositories (merge cemerick.pomegranate.aether/maven-central + {"clojars" "http://clojars.org/repo"})) + +(ns leiningen.downloads + "Calculate download statistics from logs." + (:require [aws.sdk.s3 :as s3] + [clojure.java.io :as io] + [tentacles.repos :as repo] + [clojure.pprint :refer [pprint]]) + (:import [java.io File])) + +(def ^:internal aws-cred + + ;; in order to run, you need to define a map with the appropriate AWS + ;; credentials in ~/.secrets/leiningen_downloads_aws_cred.clj: + + ;; {:access-key "AWS_ACCESS_KEY" + ;; :secret-key "AWS_SECRET_KEY"} + + (read-string + (slurp (File. (System/getenv "HOME") + "/.secrets/leiningen_downloads_aws_cred.clj")))) + +(defn- list-all-objects + [bucket & [objects next-marker]] + (let [response (s3/list-objects aws-cred bucket {:marker next-marker}) + truncated? (:truncated? response) + next-marker (:next-marker response) + objects (concat objects (:objects response))] + (if (not truncated?) + objects + (recur bucket [objects next-marker])))) + +(defn- fetch-all-objects + [bucket] + (for [object (list-all-objects bucket)] + (do + (println (str "Processing: " (:key object))) + (s3/get-object aws-cred bucket (:key object))))) + +(defn- file-for-line + [line] + (let [[_ file] (re-find #"\"GET ([^ ]+) " line)] + (if file + (last (.split file "/"))))) + +(defn- ip-for-line + [line] + (re-find #"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b" line)) + +(defn- status-for-line + [line] + (second (re-find #"\" (\d\d\d)" line))) + +(defn- parse-files + [content] + (with-open [rdr (io/reader content)] + (doall (for [line (line-seq rdr)] + {:file (file-for-line line) + :status (status-for-line line) + :ip (ip-for-line line)})))) + +(defn- s3-downloads + [] + (flatten + (for [logfile (map :content (fetch-all-objects "leiningen-logs"))] + (filter #(and (get % :file) ;; file is present + (re-find #"\.jar\b" (get % :file)) ;; file is a jar + (= "200" (get % :status))) ;; and only HTTP 200 responses + (parse-files logfile))))) + +(defn- github-downloads + [] + (reverse + (sort-by #(first (vals %)) + (filter #(re-find #"\.jar$" (first (keys %))) + (let [downloads {}] + (for [download (repo/downloads "technomancy" "leiningen")] + (assoc downloads + (:name download) + (:download_count download)))))))) + +(defn print-report + [] + (let [s3-downloads (s3-downloads) + s3-download-count (count s3-downloads) + github-downloads (github-downloads) + github-download-count + (reduce + (map #(first (vals %)) github-downloads))] + (println (str "GitHub Downloads: " github-download-count)) + (println (str "S3 Downloads: " s3-download-count)) + (println (str "Unique IP Addresses (S3 Downloads Only): " + (count (distinct (map :ip s3-downloads))))) + (println (str "Total Downloads: " + (+ github-download-count s3-download-count))) + (print "\n\n") + (println "GitHub downloads by file:") + (print "\n\n") + (pprint github-downloads) + (print "\n\n") + (println "S3 downloads by file:") + (print "\n\n") + (pprint (frequencies (map :file s3-downloads))) + (println ""))) ;; need this last println for some reason or else + ;; the above doesn't print out using lein run... + +(defn -main + [] + (print-report)) diff --git a/project.clj b/project.clj index 1a0b9b2a..7774f998 100644 --- a/project.clj +++ b/project.clj @@ -34,7 +34,7 @@ :test-selectors {:default (complement :disabled) :offline (comp (partial not-any? identity) (juxt :online :disabled))} - :source-paths ["leiningen-core/src" "src"] + :source-paths ["leiningen-core/src" "src" "bin"] ;; work around Clojure bug http://dev.clojure.org/jira/browse/CLJ-1034 :uberjar-exclusions [#"^data_readers.clj$"] :eval-in :leiningen)