Merge pull request #1252 from gphil/leiningen-download-stats

automate collection and reporting of leiningen download statistics
This commit is contained in:
Phil Hagelberg 2013-07-12 07:37:36 -07:00
commit f4b489b8da
3 changed files with 114 additions and 84 deletions

View file

@ -1,83 +0,0 @@
(ns leiningen.downloads
"Calculate download statistics from logs."
(:require [clojure.java.io]
[clojure.pprint :refer [pprint]]
[clojure.java.shell :refer [sh]]))
;; Before GitHub shut down its download service all uberjars were
;; hosted there. Here's the latest data we have on it.
(def github {"leiningen-1.6.1.1-standalone.jar" 15143,
"leiningen-1.6.2-standalone.jar" 16640,
"leiningen-1.7.1-standalone.jar" 64026,
"leiningen-full.jpg" 519,
"leiningen-1.5.2-standalone.jar" 24865,
"leiningen-1.6.1-standalone.jar" 9405,
"leiningen-1.7.0-standalone.jar" 10969,
"leiningen-1.4.2-standalone.jar" 31651,
"leiningen-1.5.1-standalone.jar" 290,
"leiningen-1.6.0-standalone.jar" 1065,
"leiningen-1.4.1-standalone.jar" 1606,
"leiningen-1.5.0-standalone.jar" 9575,
"leiningen-1.3.1-standalone.jar" 7905,
"leiningen-1.4.0-standalone.jar" 1589,
"leiningen-1.3.0-SNAPSHOT-standalone.jar" 280,
"leiningen-1.4.0-SNAPSHOT-standalone.jar" 423,
"leiningen-1.3.0-standalone.jar" 2442,
"leiningen-banner.png" 399328,
"leiningen-1.2.0-standalone.jar" 3617,
"leiningen-1.1.0-standalone.jar" 12858,
"leiningen-1.7.0-SNAPSHOT-standalone.jar" 434,
"leiningen-1.6.2-SNAPSHOT-standalone.jar" 637,
"leiningen-1.7.1-SNAPSHOT-standalone.jar" 971,
"leiningen-2.0.0-preview10-standalone.jar" 555530, ; huh?
"leiningen-1.4.0-RC2-standalone.jar" 188,
"leiningen-1.5.0-RC1-standalone.jar" 177,
"leiningen-2.0.0-preview10-standalone.jar.asc" 272,
"leiningen-1.4.0-RC1-standalone.jar" 200,
"leiningen-1.3.0-RC1-standalone.jar" 103,
"leiningen-2.0.0-preview9-standalone.jar" 442,
"leiningen-2.0.0-preview8-standalone.jar" 2050,
"leiningen-2.0.0-preview7-standalone.jar" 8022,
"leiningen-2.0.0-preview6-standalone.jar" 2839,
"leiningen-2.0.0-preview9-standalone.jar.asc" 41,
"leiningen-1.4.0-win32.zip" 70,
"leiningen-2.0.0-preview8-standalone.jar.asc" 37,
"leiningen-1.5.0-win32.zip" 464,
"leiningen-1.4.1-win32.zip" 260,
"leiningen-2.0.0-preview5-standalone.jar" 200,
"leiningen-1.4.2-win32.zip" 1108,
"leiningen-2.0.0-preview4-standalone.jar" 1701,
"leiningen-2.0.0-preview3-standalone.jar" 2029,
"leiningen-2.0.0-preview2-standalone.jar" 1437,
"leiningen-1.5.2-win.zip" 4346,
"lein-win32.zip" 1502,
"leiningen-2.0.0-preview1-standalone.jar" 282})
;; filter out non-release-jars
(def github-releases
(into {} (remove (comp (partial re-find #"SNAPSHOT|RC|zip|jpg|png|asc") key)
github)))
(def total (apply + (vals github-releases))) ; 788178
(defn file-for-line [line]
(let [[_ file] (re-find #"\"GET ([^ ]+) " line)]
(if file
(last (.split file "/")))))
(defn parse-line [sums line]
(if-let [file (file-for-line line)]
(update-in sums [file] (fnil inc 0))
sums))
(defn parse-file [f]
(with-open [rdr (clojure.java.io/reader f)]
(reduce parse-line {} (line-seq rdr))))
(defn parse-dir [d]
(apply merge-with + (->> (.listFiles (java.io.File. d))
(filter (memfn isFile))
(map parse-file))))
;; TODO: fetch S3 logs?
(def -main parse-dir)

113
bin/leiningen/downloads.clj Normal file
View file

@ -0,0 +1,113 @@
(use '[cemerick.pomegranate :only (add-dependencies)])
(add-dependencies :coordinates '[[clj-aws-s3 "0.3.6"]
[tentacles "0.2.4"]]
:repositories (merge cemerick.pomegranate.aether/maven-central
{"clojars" "http://clojars.org/repo"}))
(ns leiningen.downloads
"Calculate download statistics from logs."
(:require [aws.sdk.s3 :as s3]
[clojure.java.io :as io]
[tentacles.repos :as repo]
[clojure.pprint :refer [pprint]])
(:import [java.io File]))
(def ^:internal aws-cred
;; in order to run, you need to define a map with the appropriate AWS
;; credentials in ~/.secrets/leiningen_downloads_aws_cred.clj:
;; {:access-key "AWS_ACCESS_KEY"
;; :secret-key "AWS_SECRET_KEY"}
(read-string
(slurp (File. (System/getenv "HOME")
"/.secrets/leiningen_downloads_aws_cred.clj"))))
(defn- list-all-objects
[bucket & [objects next-marker]]
(let [response (s3/list-objects aws-cred bucket {:marker next-marker})
truncated? (:truncated? response)
next-marker (:next-marker response)
objects (concat objects (:objects response))]
(if (not truncated?)
objects
(recur bucket [objects next-marker]))))
(defn- fetch-all-objects
[bucket]
(for [object (list-all-objects bucket)]
(do
(println (str "Processing: " (:key object)))
(s3/get-object aws-cred bucket (:key object)))))
(defn- file-for-line
[line]
(let [[_ file] (re-find #"\"GET ([^ ]+) " line)]
(if file
(last (.split file "/")))))
(defn- ip-for-line
[line]
(re-find #"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b" line))
(defn- status-for-line
[line]
(second (re-find #"\" (\d\d\d)" line)))
(defn- parse-files
[content]
(with-open [rdr (io/reader content)]
(doall (for [line (line-seq rdr)]
{:file (file-for-line line)
:status (status-for-line line)
:ip (ip-for-line line)}))))
(defn- s3-downloads
[]
(flatten
(for [logfile (map :content (fetch-all-objects "leiningen-logs"))]
(filter #(and (get % :file) ;; file is present
(re-find #"\.jar\b" (get % :file)) ;; file is a jar
(= "200" (get % :status))) ;; and only HTTP 200 responses
(parse-files logfile)))))
(defn- github-downloads
[]
(reverse
(sort-by #(first (vals %))
(filter #(re-find #"\.jar$" (first (keys %)))
(let [downloads {}]
(for [download (repo/downloads "technomancy" "leiningen")]
(assoc downloads
(:name download)
(:download_count download))))))))
(defn print-report
[]
(let [s3-downloads (s3-downloads)
s3-download-count (count s3-downloads)
github-downloads (github-downloads)
github-download-count
(reduce + (map #(first (vals %)) github-downloads))]
(println (str "GitHub Downloads: " github-download-count))
(println (str "S3 Downloads: " s3-download-count))
(println (str "Unique IP Addresses (S3 Downloads Only): "
(count (distinct (map :ip s3-downloads)))))
(println (str "Total Downloads: "
(+ github-download-count s3-download-count)))
(print "\n\n")
(println "GitHub downloads by file:")
(print "\n\n")
(pprint github-downloads)
(print "\n\n")
(println "S3 downloads by file:")
(print "\n\n")
(pprint (frequencies (map :file s3-downloads)))
(println ""))) ;; need this last println for some reason or else
;; the above doesn't print out using lein run...
(defn -main
[]
(print-report))

View file

@ -34,7 +34,7 @@
:test-selectors {:default (complement :disabled)
:offline (comp (partial not-any? identity)
(juxt :online :disabled))}
:source-paths ["leiningen-core/src" "src"]
:source-paths ["leiningen-core/src" "src" "bin"]
;; work around Clojure bug http://dev.clojure.org/jira/browse/CLJ-1034
:uberjar-exclusions [#"^data_readers.clj$"]
:eval-in :leiningen)