Merge pull request #1252 from gphil/leiningen-download-stats
automate collection and reporting of leiningen download statistics
This commit is contained in:
commit
f4b489b8da
3 changed files with 114 additions and 84 deletions
|
@ -1,83 +0,0 @@
|
|||
(ns leiningen.downloads
|
||||
"Calculate download statistics from logs."
|
||||
(:require [clojure.java.io]
|
||||
[clojure.pprint :refer [pprint]]
|
||||
[clojure.java.shell :refer [sh]]))
|
||||
|
||||
;; Before GitHub shut down its download service all uberjars were
|
||||
;; hosted there. Here's the latest data we have on it.
|
||||
(def github {"leiningen-1.6.1.1-standalone.jar" 15143,
|
||||
"leiningen-1.6.2-standalone.jar" 16640,
|
||||
"leiningen-1.7.1-standalone.jar" 64026,
|
||||
"leiningen-full.jpg" 519,
|
||||
"leiningen-1.5.2-standalone.jar" 24865,
|
||||
"leiningen-1.6.1-standalone.jar" 9405,
|
||||
"leiningen-1.7.0-standalone.jar" 10969,
|
||||
"leiningen-1.4.2-standalone.jar" 31651,
|
||||
"leiningen-1.5.1-standalone.jar" 290,
|
||||
"leiningen-1.6.0-standalone.jar" 1065,
|
||||
"leiningen-1.4.1-standalone.jar" 1606,
|
||||
"leiningen-1.5.0-standalone.jar" 9575,
|
||||
"leiningen-1.3.1-standalone.jar" 7905,
|
||||
"leiningen-1.4.0-standalone.jar" 1589,
|
||||
"leiningen-1.3.0-SNAPSHOT-standalone.jar" 280,
|
||||
"leiningen-1.4.0-SNAPSHOT-standalone.jar" 423,
|
||||
"leiningen-1.3.0-standalone.jar" 2442,
|
||||
"leiningen-banner.png" 399328,
|
||||
"leiningen-1.2.0-standalone.jar" 3617,
|
||||
"leiningen-1.1.0-standalone.jar" 12858,
|
||||
"leiningen-1.7.0-SNAPSHOT-standalone.jar" 434,
|
||||
"leiningen-1.6.2-SNAPSHOT-standalone.jar" 637,
|
||||
"leiningen-1.7.1-SNAPSHOT-standalone.jar" 971,
|
||||
"leiningen-2.0.0-preview10-standalone.jar" 555530, ; huh?
|
||||
"leiningen-1.4.0-RC2-standalone.jar" 188,
|
||||
"leiningen-1.5.0-RC1-standalone.jar" 177,
|
||||
"leiningen-2.0.0-preview10-standalone.jar.asc" 272,
|
||||
"leiningen-1.4.0-RC1-standalone.jar" 200,
|
||||
"leiningen-1.3.0-RC1-standalone.jar" 103,
|
||||
"leiningen-2.0.0-preview9-standalone.jar" 442,
|
||||
"leiningen-2.0.0-preview8-standalone.jar" 2050,
|
||||
"leiningen-2.0.0-preview7-standalone.jar" 8022,
|
||||
"leiningen-2.0.0-preview6-standalone.jar" 2839,
|
||||
"leiningen-2.0.0-preview9-standalone.jar.asc" 41,
|
||||
"leiningen-1.4.0-win32.zip" 70,
|
||||
"leiningen-2.0.0-preview8-standalone.jar.asc" 37,
|
||||
"leiningen-1.5.0-win32.zip" 464,
|
||||
"leiningen-1.4.1-win32.zip" 260,
|
||||
"leiningen-2.0.0-preview5-standalone.jar" 200,
|
||||
"leiningen-1.4.2-win32.zip" 1108,
|
||||
"leiningen-2.0.0-preview4-standalone.jar" 1701,
|
||||
"leiningen-2.0.0-preview3-standalone.jar" 2029,
|
||||
"leiningen-2.0.0-preview2-standalone.jar" 1437,
|
||||
"leiningen-1.5.2-win.zip" 4346,
|
||||
"lein-win32.zip" 1502,
|
||||
"leiningen-2.0.0-preview1-standalone.jar" 282})
|
||||
|
||||
;; filter out non-release-jars
|
||||
(def github-releases
|
||||
(into {} (remove (comp (partial re-find #"SNAPSHOT|RC|zip|jpg|png|asc") key)
|
||||
github)))
|
||||
|
||||
(def total (apply + (vals github-releases))) ; 788178
|
||||
|
||||
(defn file-for-line [line]
|
||||
(let [[_ file] (re-find #"\"GET ([^ ]+) " line)]
|
||||
(if file
|
||||
(last (.split file "/")))))
|
||||
|
||||
(defn parse-line [sums line]
|
||||
(if-let [file (file-for-line line)]
|
||||
(update-in sums [file] (fnil inc 0))
|
||||
sums))
|
||||
|
||||
(defn parse-file [f]
|
||||
(with-open [rdr (clojure.java.io/reader f)]
|
||||
(reduce parse-line {} (line-seq rdr))))
|
||||
|
||||
(defn parse-dir [d]
|
||||
(apply merge-with + (->> (.listFiles (java.io.File. d))
|
||||
(filter (memfn isFile))
|
||||
(map parse-file))))
|
||||
|
||||
;; TODO: fetch S3 logs?
|
||||
(def -main parse-dir)
|
113
bin/leiningen/downloads.clj
Normal file
113
bin/leiningen/downloads.clj
Normal file
|
@ -0,0 +1,113 @@
|
|||
(use '[cemerick.pomegranate :only (add-dependencies)])
|
||||
|
||||
(add-dependencies :coordinates '[[clj-aws-s3 "0.3.6"]
|
||||
[tentacles "0.2.4"]]
|
||||
:repositories (merge cemerick.pomegranate.aether/maven-central
|
||||
{"clojars" "http://clojars.org/repo"}))
|
||||
|
||||
(ns leiningen.downloads
|
||||
"Calculate download statistics from logs."
|
||||
(:require [aws.sdk.s3 :as s3]
|
||||
[clojure.java.io :as io]
|
||||
[tentacles.repos :as repo]
|
||||
[clojure.pprint :refer [pprint]])
|
||||
(:import [java.io File]))
|
||||
|
||||
(def ^:internal aws-cred
|
||||
|
||||
;; in order to run, you need to define a map with the appropriate AWS
|
||||
;; credentials in ~/.secrets/leiningen_downloads_aws_cred.clj:
|
||||
|
||||
;; {:access-key "AWS_ACCESS_KEY"
|
||||
;; :secret-key "AWS_SECRET_KEY"}
|
||||
|
||||
(read-string
|
||||
(slurp (File. (System/getenv "HOME")
|
||||
"/.secrets/leiningen_downloads_aws_cred.clj"))))
|
||||
|
||||
(defn- list-all-objects
|
||||
[bucket & [objects next-marker]]
|
||||
(let [response (s3/list-objects aws-cred bucket {:marker next-marker})
|
||||
truncated? (:truncated? response)
|
||||
next-marker (:next-marker response)
|
||||
objects (concat objects (:objects response))]
|
||||
(if (not truncated?)
|
||||
objects
|
||||
(recur bucket [objects next-marker]))))
|
||||
|
||||
(defn- fetch-all-objects
|
||||
[bucket]
|
||||
(for [object (list-all-objects bucket)]
|
||||
(do
|
||||
(println (str "Processing: " (:key object)))
|
||||
(s3/get-object aws-cred bucket (:key object)))))
|
||||
|
||||
(defn- file-for-line
|
||||
[line]
|
||||
(let [[_ file] (re-find #"\"GET ([^ ]+) " line)]
|
||||
(if file
|
||||
(last (.split file "/")))))
|
||||
|
||||
(defn- ip-for-line
|
||||
[line]
|
||||
(re-find #"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b" line))
|
||||
|
||||
(defn- status-for-line
|
||||
[line]
|
||||
(second (re-find #"\" (\d\d\d)" line)))
|
||||
|
||||
(defn- parse-files
|
||||
[content]
|
||||
(with-open [rdr (io/reader content)]
|
||||
(doall (for [line (line-seq rdr)]
|
||||
{:file (file-for-line line)
|
||||
:status (status-for-line line)
|
||||
:ip (ip-for-line line)}))))
|
||||
|
||||
(defn- s3-downloads
|
||||
[]
|
||||
(flatten
|
||||
(for [logfile (map :content (fetch-all-objects "leiningen-logs"))]
|
||||
(filter #(and (get % :file) ;; file is present
|
||||
(re-find #"\.jar\b" (get % :file)) ;; file is a jar
|
||||
(= "200" (get % :status))) ;; and only HTTP 200 responses
|
||||
(parse-files logfile)))))
|
||||
|
||||
(defn- github-downloads
|
||||
[]
|
||||
(reverse
|
||||
(sort-by #(first (vals %))
|
||||
(filter #(re-find #"\.jar$" (first (keys %)))
|
||||
(let [downloads {}]
|
||||
(for [download (repo/downloads "technomancy" "leiningen")]
|
||||
(assoc downloads
|
||||
(:name download)
|
||||
(:download_count download))))))))
|
||||
|
||||
(defn print-report
|
||||
[]
|
||||
(let [s3-downloads (s3-downloads)
|
||||
s3-download-count (count s3-downloads)
|
||||
github-downloads (github-downloads)
|
||||
github-download-count
|
||||
(reduce + (map #(first (vals %)) github-downloads))]
|
||||
(println (str "GitHub Downloads: " github-download-count))
|
||||
(println (str "S3 Downloads: " s3-download-count))
|
||||
(println (str "Unique IP Addresses (S3 Downloads Only): "
|
||||
(count (distinct (map :ip s3-downloads)))))
|
||||
(println (str "Total Downloads: "
|
||||
(+ github-download-count s3-download-count)))
|
||||
(print "\n\n")
|
||||
(println "GitHub downloads by file:")
|
||||
(print "\n\n")
|
||||
(pprint github-downloads)
|
||||
(print "\n\n")
|
||||
(println "S3 downloads by file:")
|
||||
(print "\n\n")
|
||||
(pprint (frequencies (map :file s3-downloads)))
|
||||
(println ""))) ;; need this last println for some reason or else
|
||||
;; the above doesn't print out using lein run...
|
||||
|
||||
(defn -main
|
||||
[]
|
||||
(print-report))
|
|
@ -34,7 +34,7 @@
|
|||
:test-selectors {:default (complement :disabled)
|
||||
:offline (comp (partial not-any? identity)
|
||||
(juxt :online :disabled))}
|
||||
:source-paths ["leiningen-core/src" "src"]
|
||||
:source-paths ["leiningen-core/src" "src" "bin"]
|
||||
;; work around Clojure bug http://dev.clojure.org/jira/browse/CLJ-1034
|
||||
:uberjar-exclusions [#"^data_readers.clj$"]
|
||||
:eval-in :leiningen)
|
||||
|
|
Loading…
Reference in a new issue