added html generated for gh-pages

2016-03-31 20:22:46 +02:00 · 2016-03-31 20:22:46 +02:00 · e6ef99a6a9
commit e6ef99a6a9
parent 0dbd092244
9 changed files with 1317 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,5 +3,3 @@ build/
 compile
 *.hi
 *.o
-*.html
-*.pdf
--- a/README.beamer.pdf
+++ b/README.beamer.pdf
--- a/README.html
+++ b/README.html
@ -0,0 +1,44 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <meta name="generator" content="pandoc">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+  <title></title>
+  <style type="text/css">code{white-space: pre;}</style>
+  <!--[if lt IE 9]>
+    <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+  <![endif]-->
+  <link rel="stylesheet" href="styling.css">
+</head>
+<body>
+<nav id="TOC">
+<ul>
+<li><a href="#no-brainer-markdown-to-html-pdf">No Brainer Markdown to HTML &amp; PDF</a><ul>
+<li><a href="#dependencies">Dependencies</a></li>
+</ul></li>
+</ul>
+</nav>
+<h1 id="no-brainer-markdown-to-html-pdf">No Brainer Markdown to HTML &amp; PDF</h1>
+<p>For each markdown files it will generate:</p>
+<ul>
+<li>an HTML Document</li>
+<li>an HTML Presentation (using reveal.js)</li>
+<li>a PDF Document (using XeLaTeX)</li>
+<li>a PDF Presentation (using Beamer)</li>
+</ul>
+<pre><code>./compile.sh</code></pre>
+<p>If you want to be the 1337, install <a href="http://haskellstack.org"><code>stack</code></a> and</p>
+<pre><code>./build.sh
+./compile</code></pre>
+<h2 id="dependencies">Dependencies</h2>
+<ul>
+<li><a href="http://pandoc.org">pandoc</a> – Tested with pandoc 1.15.0.6</li>
+<li><a href="http://xelatex.org">XeLaTeX</a> – Tested with XeTeX 3.14159265-2.6-0.99992 (TeX Live 2015)</li>
+<li><a href="https://github.com/matze/mtheme">metropolis</a> Beamer theme (working forked version here: <a href="https://github.com/yogsototh/mtheme"><code>https://github.com/yogsototh/mtheme</code></a>)</li>
+</ul>
+<div id="footer">
+<a href="yannesposito.com">Y</a>
+</div>
+</body>
+</html>
--- a/README.pdf
+++ b/README.pdf
--- a/README.reveal.html
+++ b/README.reveal.html
@ -0,0 +1,96 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title></title>
+<meta name="description" content="">
+<meta name="apple-mobile-web-app-capable" content="yes" />
+<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
+<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
+<link rel="stylesheet" href=".reveal.js-3.2.0/css/reveal.css">
+  <link rel="stylesheet" href=".reveal.js-3.2.0/css/theme/metropolis.css" id="theme">
+<!-- For syntax highlighting -->
+  <link rel="stylesheet" href=".reveal.js-3.2.0/lib/css/zenburn.css">
+<!-- If the query includes 'print-pdf', use the PDF print sheet -->
+<script>
+  document.write( '<link rel="stylesheet" href=".reveal.js-3.2.0/css/print/' +
+    ( window.location.search.match( /print-pdf/gi ) ? 'pdf' : 'paper' ) +
+    '.css" type="text/css" media="print">' );
+</script>
+<!--[if lt IE 9]>
+<script src=".reveal.js-3.2.0/lib/js/html5shiv.js"></script>
+<![endif]-->
+</head>
+<body>
+
+<div class="reveal">
+
+<!-- Any section element inside of this container is displayed as a slide -->
+<div class="slides">
+
+<section>
+<h1></h1>
+
+<p>
+<h4></h4>
+</p>
+</section>
+
+
+<section id="no-brainer-markdown-to-html-pdf" class="level1">
+<h1>No Brainer Markdown to HTML &amp; PDF</h1>
+<p>For each markdown files it will generate:</p>
+<ul>
+<li>an HTML Document</li>
+<li>an HTML Presentation (using reveal.js)</li>
+<li>a PDF Document (using XeLaTeX)</li>
+<li>a PDF Presentation (using Beamer)</li>
+</ul>
+<pre><code>./compile.sh</code></pre>
+<p>If you want to be the 1337, install <a href="http://haskellstack.org"><code>stack</code></a> and</p>
+<pre><code>./build.sh
+./compile</code></pre>
+<section id="dependencies" class="level2">
+<h2>Dependencies</h2>
+<ul>
+<li><a href="http://pandoc.org">pandoc</a> -- Tested with pandoc 1.15.0.6</li>
+<li><a href="http://xelatex.org">XeLaTeX</a> -- Tested with XeTeX 3.14159265-2.6-0.99992 (TeX Live 2015)</li>
+<li><a href="https://github.com/matze/mtheme">metropolis</a> Beamer theme (working forked version here: <a href="https://github.com/yogsototh/mtheme"><code>https://github.com/yogsototh/mtheme</code></a>)</li>
+</ul>
+</section>
+</section>
+</div>
+
+<script src=".reveal.js-3.2.0/lib/js/head.min.js"></script>
+<script src=".reveal.js-3.2.0/js/reveal.js"></script>
+
+<script>
+  // Full list of configuration options available here:
+  // https://github.com/hakimel/reveal.js#configuration
+  Reveal.initialize({
+    controls: true,
+    progress: true,
+    history: true,
+    center: false,
+
+  // available themes are in /css/theme
+      theme: Reveal.getQueryHash().theme || 'metropolis',
+  
+  // default/cube/page/concave/zoom/linear/fade/none
+      transition: Reveal.getQueryHash().transition || 'linear',
+  
+  // Optional libraries used to extend on reveal.js
+  dependencies: [
+    { src: '/.reveal.js-3.2.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+    { src: '/.reveal.js-3.2.0/plugin/markdown/showdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+    { src: '/.reveal.js-3.2.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+    { src: '/.reveal.js-3.2.0/plugin/highlight/highlight.js', async: true, callback: function() { hljs.initHighlightingOnLoad(); } },
+    { src: '/.reveal.js-3.2.0/plugin/zoom-js/zoom.js', async: true, condition: function() { return !!document.body.classList; } },
+    { src: '/.reveal.js-3.2.0/plugin/notes/notes.js', async: true, condition: function() { return !!document.body.classList; } }
+  ]
+  });
+
+</script>
+
+</body>
+</html>
--- a/druid/druid.beamer.pdf
+++ b/druid/druid.beamer.pdf
--- a/druid/druid.html
+++ b/druid/druid.html
@ -0,0 +1,534 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <meta name="generator" content="pandoc">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+  <meta name="author" content="Yann Esposito">
+  <title>Druid for real-time analysis</title>
+  <style type="text/css">code{white-space: pre;}</style>
+  <!--[if lt IE 9]>
+    <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+  <![endif]-->
+  <link rel="stylesheet" href="../styling.css">
+</head>
+<body>
+<header>
+<h1 class="title">Druid for real-time analysis</h1>
+<h2 class="author">Yann Esposito</h2>
+<h3 class="date">7 Avril 2016</h3>
+</header>
+<nav id="TOC">
+<ul>
+<li><a href="#druid-the-sales-pitch">Druid the Sales Pitch</a></li>
+<li><a href="#intro">Intro</a><ul>
+<li><a href="#experience">Experience</a></li>
+<li><a href="#real-time">Real Time?</a></li>
+<li><a href="#demand">Demand</a></li>
+<li><a href="#reality">Reality</a></li>
+</ul></li>
+<li><a href="#origin-php">Origin (PHP)</a></li>
+<li><a href="#st-refactoring-node.js">1st Refactoring (Node.js)</a></li>
+<li><a href="#return-of-experience">Return of Experience</a></li>
+<li><a href="#return-of-experience-1">Return of Experience</a></li>
+<li><a href="#nd-refactoring">2nd Refactoring</a></li>
+<li><a href="#nd-refactoring-ftw">2nd Refactoring (FTW!)</a></li>
+<li><a href="#nd-refactoring-return-of-experience">2nd Refactoring return of experience</a></li>
+<li><a href="#demo">Demo</a></li>
+<li><a href="#pre-considerations">Pre Considerations</a><ul>
+<li><a href="#discovered-vs-invented">Discovered vs Invented</a></li>
+<li><a href="#in-the-end">In the End</a></li>
+</ul></li>
+<li><a href="#druid">Druid</a><ul>
+<li><a href="#who">Who?</a></li>
+<li><a href="#goal">Goal</a></li>
+<li><a href="#concepts">Concepts</a></li>
+<li><a href="#key-features">Key Features</a></li>
+<li><a href="#right-for-me">Right for me?</a></li>
+</ul></li>
+<li><a href="#high-level-architecture">High Level Architecture</a><ul>
+<li><a href="#inspiration">Inspiration</a></li>
+<li><a href="#index-immutability">Index / Immutability</a></li>
+<li><a href="#storage">Storage</a></li>
+<li><a href="#specialized-nodes">Specialized Nodes</a></li>
+</ul></li>
+<li><a href="#druid-vs-x">Druid vs X</a><ul>
+<li><a href="#elasticsearch">Elasticsearch</a></li>
+<li><a href="#keyvalue-stores-hbasecassandraopentsdb">Key/Value Stores (HBase/Cassandra/OpenTSDB)</a></li>
+<li><a href="#spark">Spark</a></li>
+<li><a href="#sql-on-hadoop-impaladrillspark-sqlpresto">SQL-on-Hadoop (Impala/Drill/Spark SQL/Presto)</a></li>
+</ul></li>
+<li><a href="#data">Data</a><ul>
+<li><a href="#concepts-1">Concepts</a></li>
+</ul></li>
+<li><a href="#roll-up">Roll-up</a><ul>
+<li><a href="#example">Example</a></li>
+<li><a href="#as-sql">as SQL</a></li>
+</ul></li>
+<li><a href="#sharding">Sharding</a><ul>
+<li><a href="#segments">Segments</a></li>
+<li><a href="#core-data-structure">Core Data Structure</a></li>
+<li><a href="#dictionary">Dictionary</a></li>
+<li><a href="#columnn-data">Columnn Data</a></li>
+<li><a href="#bitmaps">Bitmaps</a></li>
+</ul></li>
+<li><a href="#data-1">Data</a><ul>
+<li><a href="#indexing-data">Indexing Data</a></li>
+<li><a href="#loading-data">Loading data</a></li>
+<li><a href="#querying-the-data">Querying the data</a></li>
+<li><a href="#columnar-storage">Columnar Storage</a></li>
+<li><a href="#index">Index</a></li>
+<li><a href="#data-segments">Data Segments</a></li>
+<li><a href="#real-time-ingestion">Real-time ingestion</a></li>
+<li><a href="#batch-ingestion">Batch Ingestion</a></li>
+<li><a href="#real-time-ingestion-1">Real-time Ingestion</a></li>
+</ul></li>
+<li><a href="#querying">Querying</a><ul>
+<li><a href="#query-types">Query types</a></li>
+<li><a href="#tip">Tip</a></li>
+<li><a href="#query-spec">Query Spec</a></li>
+<li><a href="#examples">Example(s)</a></li>
+<li><a href="#caching">Caching</a></li>
+<li><a href="#load-rules">Load Rules</a></li>
+</ul></li>
+<li><a href="#components">Components</a><ul>
+<li><a href="#druid-components">Druid Components</a></li>
+<li><a href="#coordinator">Coordinator</a></li>
+<li><a href="#real-time-nodes">Real-time Nodes</a></li>
+<li><a href="#historical-nodes">Historical Nodes</a></li>
+<li><a href="#overlord">Overlord</a></li>
+<li><a href="#middle-manager">Middle Manager</a></li>
+<li><a href="#broker-nodes">Broker Nodes</a></li>
+<li><a href="#deep-storage">Deep Storage</a></li>
+</ul></li>
+<li><a href="#considerations-tools">Considerations &amp; Tools</a><ul>
+<li><a href="#when-not-to-choose-druid">When <em>not</em> to choose Druid</a></li>
+<li><a href="#graphite-metrics">Graphite (metrics)</a></li>
+<li><a href="#pivot-exploring-data">Pivot (exploring data)</a></li>
+<li><a href="#caravel-exploring-data">Caravel (exploring data)</a></li>
+</ul></li>
+</ul>
+</nav>
+<h1 id="druid-the-sales-pitch">Druid the Sales Pitch</h1>
+<ul>
+<li>Sub-Second Queries</li>
+<li>Real-time Streams</li>
+<li>Scalable to Petabytes</li>
+<li>Deploy Anywhere</li>
+<li>Vibrant Community (Open Source)</li>
+</ul>
+<aside class="notes">
+<ul>
+<li>Ideal for powering user-facing analytic applications</li>
+<li>Deploy anywhere: cloud, on-premise, integrate with Haddop, Spark, Kafka, Storm, Samza</li>
+</ul>
+</aside>
+<h1 id="intro">Intro</h1>
+<h2 id="experience">Experience</h2>
+<ul>
+<li>Real Time Social Media Analytics</li>
+</ul>
+<h2 id="real-time">Real Time?</h2>
+<ul>
+<li>Ingestion Latency: seconds</li>
+<li>Query Latency: seconds</li>
+</ul>
+<h2 id="demand">Demand</h2>
+<ul>
+<li>Twitter: <code>20k msg/s</code>, <code>1msg = 10ko</code> during 24h</li>
+<li>Facebook public: 1000 to 2000 msg/s continuously</li>
+<li>Low Latency</li>
+</ul>
+<h2 id="reality">Reality</h2>
+<ul>
+<li>Twitter: 400 msg/s continuously, burst to 1500</li>
+<li>Facebook: 1000 to 2000 msg/s</li>
+</ul>
+<h1 id="origin-php">Origin (PHP)</h1>
+<p><img src="img/bad_php.jpg" alt="OH NOES PHP!" /> </p>
+<h1 id="st-refactoring-node.js">1st Refactoring (Node.js)</h1>
+<ul>
+<li>Ingestion still in PHP</li>
+<li>Node.js, Perl, Java &amp; R for sentiment analysis</li>
+<li>MongoDB</li>
+<li>Manually made time series (Incremental Map/Reduce)</li>
+<li>Manually coded HyperLogLog in js</li>
+</ul>
+<h1 id="return-of-experience">Return of Experience</h1>
+<p><img src="img/mongoDB.png" alt="MongoDB the destroyer" /> </p>
+<h1 id="return-of-experience-1">Return of Experience</h1>
+<ul>
+<li>Ingestion still in PHP (600 msg/s max)</li>
+<li>Node.js, Perl, Java (10 msg/s max)</li>
+</ul>
+<figure>
+<img src="img/bored.gif" alt="Too Slow, Bored" /><figcaption>Too Slow, Bored</figcaption>
+</figure>
+<h1 id="nd-refactoring">2nd Refactoring</h1>
+<ul>
+<li>Haskell</li>
+<li>Clojure / Clojurescript</li>
+<li>Kafka / Zookeeper</li>
+<li>Mesos / Marathon</li>
+<li>Elasticsearch</li>
+<li><strong>Druid</strong></li>
+</ul>
+<h1 id="nd-refactoring-ftw">2nd Refactoring (FTW!)</h1>
+<p><img src="img/talking.jpg" alt="Now we’re talking" /> </p>
+<h1 id="nd-refactoring-return-of-experience">2nd Refactoring return of experience</h1>
+<ul>
+<li>No limit, everything is scalable</li>
+<li>High availability</li>
+<li>Low latency: Ingestion &amp; User faced querying</li>
+<li>Cheap if done correctly</li>
+</ul>
+<p><strong>Thanks Druid!</strong></p>
+<h1 id="demo">Demo</h1>
+<ul>
+<li>Low Latency High Volume of Data Analysis</li>
+<li>Typically <code>pulse</code></li>
+</ul>
+<p><a href="http://pulse.vigiglo.be/#/vigiglobe/Earthquake/dashboard" target="_blank"> DEMO Time </a></p>
+<h1 id="pre-considerations">Pre Considerations</h1>
+<h2 id="discovered-vs-invented">Discovered vs Invented</h2>
+<p>Try to conceptualize a s.t.</p>
+<ul>
+<li>Ingest Events</li>
+<li>Real-Time Queries</li>
+<li>Scalable</li>
+<li>Highly Available</li>
+</ul>
+<p>Analytics: timeseries, alerting system, top N, etc…</p>
+<h2 id="in-the-end">In the End</h2>
+<p>Druid concepts are always emerging naturally</p>
+<h1 id="druid">Druid</h1>
+<h2 id="who">Who?</h2>
+<p>Metamarkets</p>
+<p><a href="http://druid.io/druid-powered.html"
+   target="_blank">Powered by Druid</a></p>
+<ul>
+<li>Alibaba, Cisco, Criteo, eBay, Hulu, Netflix, Paypal…</li>
+</ul>
+<h2 id="goal">Goal</h2>
+<blockquote>
+<p>Druid is an open source store designed for real-time exploratory analytics on large data sets.</p>
+</blockquote>
+<blockquote>
+<p>hosted dashboard that would allow users to arbitrarily explore and visualize event streams.</p>
+</blockquote>
+<h2 id="concepts">Concepts</h2>
+<ul>
+<li>Column-oriented storage layout</li>
+<li>distributed, shared-nothing architecture</li>
+<li>advanced indexing structure</li>
+</ul>
+<h2 id="key-features">Key Features</h2>
+<ul>
+<li>Sub-second OLAP Queries</li>
+<li>Real-time Streaming Ingestion</li>
+<li>Power Analytic Applications</li>
+<li>Cost Effective</li>
+<li>High Available</li>
+<li>Scalable</li>
+</ul>
+<h2 id="right-for-me">Right for me?</h2>
+<ul>
+<li>require fast aggregations</li>
+<li>exploratory analytics</li>
+<li>analysis in real-time</li>
+<li>lots of data (trillions of events, petabytes of data)</li>
+<li>no single point of failure</li>
+</ul>
+<h1 id="high-level-architecture">High Level Architecture</h1>
+<h2 id="inspiration">Inspiration</h2>
+<ul>
+<li>Google’s <a href="http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/36632.pdf">BigQuery/Dremel</a></li>
+<li>Google’s <a href="http://vldb.org/pvldb/vol5/p1436_alexanderhall_vldb2012.pdf">PowerDrill</a></li>
+</ul>
+<h2 id="index-immutability">Index / Immutability</h2>
+<p>Druid indexes data to create mostly immutable views.</p>
+<h2 id="storage">Storage</h2>
+<p>Store data in custom column format highly optimized for aggregation &amp; filter.</p>
+<h2 id="specialized-nodes">Specialized Nodes</h2>
+<ul>
+<li>A Druid cluster is composed of various type of nodes</li>
+<li>Each designed to do a small set of things very well</li>
+<li>Nodes don’t need to be deployed on individual hardware</li>
+<li>Many node types can be colocated in production</li>
+</ul>
+<h1 id="druid-vs-x">Druid vs X</h1>
+<h2 id="elasticsearch">Elasticsearch</h2>
+<ul>
+<li>resource requirement much higher for ingestion &amp; aggregation</li>
+<li>No data summarization (100x in real world data)</li>
+</ul>
+<h2 id="keyvalue-stores-hbasecassandraopentsdb">Key/Value Stores (HBase/Cassandra/OpenTSDB)</h2>
+<ul>
+<li>Must Pre-compute Result
+<ul>
+<li>Exponential storage</li>
+<li>Hours of pre-processing time</li>
+</ul></li>
+<li>Use the dimensions as key (like in OpenTSDB)
+<ul>
+<li>No filter index other than range</li>
+<li>Hard for complex predicates</li>
+</ul></li>
+</ul>
+<h2 id="spark">Spark</h2>
+<ul>
+<li>Druid can be used to accelerate OLAP queries in Spark</li>
+<li>Druid focuses on the latencies to ingest and serve queries</li>
+<li>Too long for end user to arbitrarily explore data</li>
+</ul>
+<h2 id="sql-on-hadoop-impaladrillspark-sqlpresto">SQL-on-Hadoop (Impala/Drill/Spark SQL/Presto)</h2>
+<ul>
+<li>Queries: more data transfer between nodes</li>
+<li>Data Ingestion: bottleneck by backing store</li>
+<li>Query Flexibility: more flexible (full joins)</li>
+</ul>
+<h1 id="data">Data</h1>
+<h2 id="concepts-1">Concepts</h2>
+<ul>
+<li><strong>Timestamp column</strong>: query centered on time axis</li>
+<li><strong>Dimension columns</strong>: strings (used to filter or to group)</li>
+<li><strong>Metric columns</strong>: used for aggregations (count, sum, mean, etc…)</li>
+</ul>
+<h1 id="roll-up">Roll-up</h1>
+<h2 id="example">Example</h2>
+<pre><code>timestamp             page          ... added  deleted
+2011-01-01T00:01:35Z  Justin Bieber     10      65
+2011-01-01T00:03:63Z  Justin Bieber     15      62
+2011-01-01T01:04:51Z  Justin Bieber     32      45
+2011-01-01T01:01:00Z  Ke$ha             17      87
+2011-01-01T01:02:00Z  Ke$ha             43      99
+2011-01-01T02:03:00Z  Ke$ha             12      53</code></pre>
+<pre><code>timestamp             page          ... nb added deleted
+2011-01-01T00:00:00Z  Justin Bieber      2 25    127
+2011-01-01T01:00:00Z  Justin Bieber      1 32    45
+2011-01-01T01:00:00Z  Ke$ha              2 60    186
+2011-01-01T02:00:00Z  Ke$ha              1 12    53</code></pre>
+<h2 id="as-sql">as SQL</h2>
+<pre><code>GROUP BY timestamp, page, nb, added, deleted
+  :: nb = COUNT(1)
+  ,  added = SUM(added)
+  ,  deleted = SUM(deleted)</code></pre>
+<p>In practice can dramatically reduce the size (up to x100)</p>
+<h1 id="sharding">Sharding</h1>
+<h2 id="segments">Segments</h2>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 20    45
+2011-01-01T01:00:00Z  Ke$ha              1 30    106</code></pre>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 12    45
+2011-01-01T01:00:00Z  Ke$ha              2 30    80</code></pre>
+<h2 id="core-data-structure">Core Data Structure</h2>
+<p><img src="img/druid-column-types.png" alt="Segment" /> </p>
+<ul>
+<li>dictionary</li>
+<li>a bitmap for each value</li>
+<li>a list of the columns values encoded using the dictionary</li>
+</ul>
+<h2 id="dictionary">Dictionary</h2>
+<pre><code>{ &quot;Justin Bieber&quot;: 0
+, &quot;Ke$ha&quot;: 1
+}</code></pre>
+<h2 id="columnn-data">Columnn Data</h2>
+<pre><code>[ 0
+, 0
+, 1
+, 1
+]</code></pre>
+<h2 id="bitmaps">Bitmaps</h2>
+<p>one for each value of the column</p>
+<pre><code>value=&quot;Justin Bieber&quot;: [1,1,0,0]
+value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
+<h1 id="data-1">Data</h1>
+<h2 id="indexing-data">Indexing Data</h2>
+<ul>
+<li>Immutable snapshots of data</li>
+<li>data structure highly optimized for analytic queries</li>
+<li>Each column is stored separately</li>
+<li>Indexes data on a per shard (segment) level</li>
+</ul>
+<h2 id="loading-data">Loading data</h2>
+<ul>
+<li>Real-Time</li>
+<li>Batch</li>
+</ul>
+<h2 id="querying-the-data">Querying the data</h2>
+<ul>
+<li>JSON over HTTP</li>
+<li>Single Table Operations, no joins.</li>
+</ul>
+<h2 id="columnar-storage">Columnar Storage</h2>
+<h2 id="index">Index</h2>
+<ul>
+<li>Values are dictionary encoded</li>
+</ul>
+<p><code>{&quot;USA&quot; 1, &quot;Canada&quot; 2, &quot;Mexico&quot; 3, ...}</code></p>
+<ul>
+<li>Bitmap for every dimension value (used by filters)</li>
+</ul>
+<p><code>&quot;USA&quot; -&gt; [0 1 0 0 1 1 0 0 0]</code></p>
+<ul>
+<li>Column values (used by aggergation queries)</li>
+</ul>
+<p><code>[2,1,3,15,1,1,2,8,7]</code></p>
+<h2 id="data-segments">Data Segments</h2>
+<ul>
+<li>Per time interval
+<ul>
+<li>skip segments when querying</li>
+</ul></li>
+<li>Immutable
+<ul>
+<li>Cache friendly</li>
+<li>No locking</li>
+</ul></li>
+<li>Versioned
+<ul>
+<li>No locking</li>
+<li>Read-write concurrency</li>
+</ul></li>
+</ul>
+<h2 id="real-time-ingestion">Real-time ingestion</h2>
+<ul>
+<li>Via Real-Time Node and Firehose
+<ul>
+<li>No redundancy or HA, thus not recommended</li>
+</ul></li>
+<li>Via Indexing Service and Tranquility API
+<ul>
+<li>Core API</li>
+<li>Integration with Streaming Frameworks</li>
+<li>HTTP Server</li>
+<li><strong>Kafka Consumer</strong></li>
+</ul></li>
+</ul>
+<h2 id="batch-ingestion">Batch Ingestion</h2>
+<ul>
+<li>File based (HDFS, S3, …)</li>
+</ul>
+<h2 id="real-time-ingestion-1">Real-time Ingestion</h2>
+<pre><code>Task 1: [   Interval   ][ Window ]
+Task 2:                 [              ]
+---------------------------------------&gt;
+                                time</code></pre>
+<p>Minimum indexing slots =<br />
+ Data Sources × Partitions × Replicas × 2</p>
+<h1 id="querying">Querying</h1>
+<h2 id="query-types">Query types</h2>
+<ul>
+<li>Group by: group by multiple dimensions</li>
+<li>Top N: like grouping by a single dimension</li>
+<li>Timeseries: without grouping over dimensions</li>
+<li>Search: Dimensions lookup</li>
+<li>Time Boundary: Find available data timeframe</li>
+<li>Metadata queries</li>
+</ul>
+<h2 id="tip">Tip</h2>
+<ul>
+<li>Prefer <code>topN</code> over <code>groupBy</code></li>
+<li>Prefer <code>timeseries</code> over <code>topN</code></li>
+<li>Use limits (and priorities)</li>
+</ul>
+<h2 id="query-spec">Query Spec</h2>
+<ul>
+<li>Data source</li>
+<li>Dimensions</li>
+<li>Interval</li>
+<li>Filters</li>
+<li>Aggergations</li>
+<li>Post Aggregations</li>
+<li>Granularity</li>
+<li>Context (query configuration)</li>
+<li>Limit</li>
+</ul>
+<h2 id="examples">Example(s)</h2>
+<p>TODO</p>
+<h2 id="caching">Caching</h2>
+<ul>
+<li>Historical node level
+<ul>
+<li>By segment</li>
+</ul></li>
+<li>Broker Level
+<ul>
+<li>By segment and query</li>
+<li><code>groupBy</code> is disabled on purpose!</li>
+</ul></li>
+<li>By default - local caching</li>
+</ul>
+<h2 id="load-rules">Load Rules</h2>
+<ul>
+<li>Can be defined</li>
+<li>What can be set</li>
+</ul>
+<h1 id="components">Components</h1>
+<h2 id="druid-components">Druid Components</h2>
+<ul>
+<li>Real-time Nodes</li>
+<li>Historical Nodes</li>
+<li>Broker Nodes</li>
+<li>Coordinator</li>
+<li>For indexing:
+<ul>
+<li>Overlord</li>
+<li>Middle Manager</li>
+</ul></li>
+<li>Deep Storage</li>
+<li><p>Metadata Storage</p></li>
+<li>Load Balancer</li>
+<li><p>Cache</p></li>
+</ul>
+<h2 id="coordinator">Coordinator</h2>
+<p>Manage Segments</p>
+<h2 id="real-time-nodes">Real-time Nodes</h2>
+<ul>
+<li>Pulling data in real-time</li>
+<li>Indexing it</li>
+</ul>
+<h2 id="historical-nodes">Historical Nodes</h2>
+<ul>
+<li>Keep historical segments</li>
+</ul>
+<h2 id="overlord">Overlord</h2>
+<ul>
+<li>Accepts tasks and distributes them to middle manager</li>
+</ul>
+<h2 id="middle-manager">Middle Manager</h2>
+<ul>
+<li>Execute submitted tasks via Peons</li>
+</ul>
+<h2 id="broker-nodes">Broker Nodes</h2>
+<ul>
+<li>Route query to Real-time and Historical nodes</li>
+<li>Merge results</li>
+</ul>
+<h2 id="deep-storage">Deep Storage</h2>
+<ul>
+<li>Segments backup (HDFS, S3, …)</li>
+</ul>
+<h1 id="considerations-tools">Considerations &amp; Tools</h1>
+<h2 id="when-not-to-choose-druid">When <em>not</em> to choose Druid</h2>
+<ul>
+<li>Data is not time-series</li>
+<li>Cardinality is <em>very</em> high</li>
+<li>Number of dimensions is high</li>
+<li>Setup cost must be avoided</li>
+</ul>
+<h2 id="graphite-metrics">Graphite (metrics)</h2>
+<p><img src="img/graphite.png" alt="Graphite" />__</p>
+<p><a href="http://graphite.wikidot.com">Graphite</a></p>
+<h2 id="pivot-exploring-data">Pivot (exploring data)</h2>
+<p><img src="img/pivot.gif" alt="Pivot" /> </p>
+<p><a href="https://github.com/implydata/pivot">Pivot</a></p>
+<h2 id="caravel-exploring-data">Caravel (exploring data)</h2>
+<p><img src="img/caravel.png" alt="caravel" /> </p>
+<p><a href="https://github.com/airbnb/caravel">Caravel</a></p>
+<div id="footer">
+<a href="yannesposito.com">Y</a>
+</div>
+</body>
+</html>
--- a/druid/druid.pdf
+++ b/druid/druid.pdf
--- a/druid/druid.reveal.html
+++ b/druid/druid.reveal.html
@ -0,0 +1,643 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>Druid for real-time analysis</title>
+<meta name="description" content="Druid for real-time analysis">
+  <meta name="author" content="Yann Esposito" />
+<meta name="apple-mobile-web-app-capable" content="yes" />
+<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
+<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
+<link rel="stylesheet" href="../.reveal.js-3.2.0/css/reveal.css">
+  <link rel="stylesheet" href="../.reveal.js-3.2.0/css/theme/solarized-dark.css" id="theme">
+<!-- For syntax highlighting -->
+  <link rel="stylesheet" href="../.reveal.js-3.2.0/lib/css/solarized-dark.css">
+<!-- If the query includes 'print-pdf', use the PDF print sheet -->
+<script>
+  document.write( '<link rel="stylesheet" href="../.reveal.js-3.2.0/css/print/' +
+    ( window.location.search.match( /print-pdf/gi ) ? 'pdf' : 'paper' ) +
+    '.css" type="text/css" media="print">' );
+</script>
+<!--[if lt IE 9]>
+<script src="../.reveal.js-3.2.0/lib/js/html5shiv.js"></script>
+<![endif]-->
+</head>
+<body>
+
+<div class="reveal">
+
+<!-- Any section element inside of this container is displayed as a slide -->
+<div class="slides">
+
+<section>
+<h1>Druid for real-time analysis</h1>
+<h3>Yann Esposito</h3>
+<p>
+<h4>7 Avril 2016</h4>
+</p>
+</section>
+
+
+<section id="druid-the-sales-pitch" class="level1">
+<h1>Druid the Sales Pitch</h1>
+<ul>
+<li>Sub-Second Queries</li>
+<li>Real-time Streams</li>
+<li>Scalable to Petabytes</li>
+<li>Deploy Anywhere</li>
+<li>Vibrant Community (Open Source)</li>
+</ul>
+<aside class="notes">
+<ul>
+<li>Ideal for powering user-facing analytic applications</li>
+<li>Deploy anywhere: cloud, on-premise, integrate with Haddop, Spark, Kafka, Storm, Samza</li>
+</ul>
+</aside>
+</section>
+<section id="intro" class="level1">
+<h1>Intro</h1>
+<section id="experience" class="level2">
+<h2>Experience</h2>
+<ul>
+<li>Real Time Social Media Analytics</li>
+</ul>
+</section>
+<section id="real-time" class="level2">
+<h2>Real Time?</h2>
+<ul>
+<li>Ingestion Latency: seconds</li>
+<li>Query Latency: seconds</li>
+</ul>
+</section>
+<section id="demand" class="level2">
+<h2>Demand</h2>
+<ul>
+<li>Twitter: <code>20k msg/s</code>, <code>1msg = 10ko</code> during 24h</li>
+<li>Facebook public: 1000 to 2000 msg/s continuously</li>
+<li>Low Latency</li>
+</ul>
+</section>
+<section id="reality" class="level2">
+<h2>Reality</h2>
+<ul>
+<li>Twitter: 400 msg/s continuously, burst to 1500</li>
+<li>Facebook: 1000 to 2000 msg/s</li>
+</ul>
+</section>
+</section>
+<section id="origin-php" class="level1">
+<h1>Origin (PHP)</h1>
+<p><img src="img/bad_php.jpg" alt="OH NOES PHP!" /> </p>
+</section>
+<section id="st-refactoring-node.js" class="level1">
+<h1>1st Refactoring (Node.js)</h1>
+<ul>
+<li>Ingestion still in PHP</li>
+<li>Node.js, Perl, Java &amp; R for sentiment analysis</li>
+<li>MongoDB</li>
+<li>Manually made time series (Incremental Map/Reduce)</li>
+<li>Manually coded HyperLogLog in js</li>
+</ul>
+</section>
+<section id="return-of-experience" class="level1">
+<h1>Return of Experience</h1>
+<p><img src="img/mongoDB.png" alt="MongoDB the destroyer" /> </p>
+</section>
+<section id="return-of-experience-1" class="level1">
+<h1>Return of Experience</h1>
+<ul>
+<li>Ingestion still in PHP (600 msg/s max)</li>
+<li>Node.js, Perl, Java (10 msg/s max)</li>
+</ul>
+<figure>
+<img src="img/bored.gif" alt="Too Slow, Bored" /><figcaption>Too Slow, Bored</figcaption>
+</figure>
+</section>
+<section id="nd-refactoring" class="level1">
+<h1>2nd Refactoring</h1>
+<ul>
+<li>Haskell</li>
+<li>Clojure / Clojurescript</li>
+<li>Kafka / Zookeeper</li>
+<li>Mesos / Marathon</li>
+<li>Elasticsearch</li>
+<li><strong>Druid</strong></li>
+</ul>
+</section>
+<section id="nd-refactoring-ftw" class="level1">
+<h1>2nd Refactoring (FTW!)</h1>
+<p><img src="img/talking.jpg" alt="Now we&#39;re talking" /> </p>
+</section>
+<section id="nd-refactoring-return-of-experience" class="level1">
+<h1>2nd Refactoring return of experience</h1>
+<ul>
+<li>No limit, everything is scalable</li>
+<li>High availability</li>
+<li>Low latency: Ingestion &amp; User faced querying</li>
+<li>Cheap if done correctly</li>
+</ul>
+<p><strong>Thanks Druid!</strong></p>
+</section>
+<section id="demo" class="level1">
+<h1>Demo</h1>
+<ul>
+<li>Low Latency High Volume of Data Analysis</li>
+<li>Typically <code>pulse</code></li>
+</ul>
+<p><a href="http://pulse.vigiglo.be/#/vigiglobe/Earthquake/dashboard" target="_blank"> DEMO Time </a></p>
+</section>
+<section id="pre-considerations" class="level1">
+<h1>Pre Considerations</h1>
+<section id="discovered-vs-invented" class="level2">
+<h2>Discovered vs Invented</h2>
+<p>Try to conceptualize a s.t.</p>
+<ul>
+<li>Ingest Events</li>
+<li>Real-Time Queries</li>
+<li>Scalable</li>
+<li>Highly Available</li>
+</ul>
+<p>Analytics: timeseries, alerting system, top N, etc...</p>
+</section>
+<section id="in-the-end" class="level2">
+<h2>In the End</h2>
+<p>Druid concepts are always emerging naturally</p>
+</section>
+</section>
+<section id="druid" class="level1">
+<h1>Druid</h1>
+<section id="who" class="level2">
+<h2>Who?</h2>
+<p>Metamarkets</p>
+<p><a href="http://druid.io/druid-powered.html"
+   target="_blank">Powered by Druid</a></p>
+<ul>
+<li>Alibaba, Cisco, Criteo, eBay, Hulu, Netflix, Paypal...</li>
+</ul>
+</section>
+<section id="goal" class="level2">
+<h2>Goal</h2>
+<blockquote>
+<p>Druid is an open source store designed for real-time exploratory analytics on large data sets.</p>
+</blockquote>
+<blockquote>
+<p>hosted dashboard that would allow users to arbitrarily explore and visualize event streams.</p>
+</blockquote>
+</section>
+<section id="concepts" class="level2">
+<h2>Concepts</h2>
+<ul>
+<li>Column-oriented storage layout</li>
+<li>distributed, shared-nothing architecture</li>
+<li>advanced indexing structure</li>
+</ul>
+</section>
+<section id="key-features" class="level2">
+<h2>Key Features</h2>
+<ul>
+<li>Sub-second OLAP Queries</li>
+<li>Real-time Streaming Ingestion</li>
+<li>Power Analytic Applications</li>
+<li>Cost Effective</li>
+<li>High Available</li>
+<li>Scalable</li>
+</ul>
+</section>
+<section id="right-for-me" class="level2">
+<h2>Right for me?</h2>
+<ul>
+<li>require fast aggregations</li>
+<li>exploratory analytics</li>
+<li>analysis in real-time</li>
+<li>lots of data (trillions of events, petabytes of data)</li>
+<li>no single point of failure</li>
+</ul>
+</section>
+</section>
+<section id="high-level-architecture" class="level1">
+<h1>High Level Architecture</h1>
+<section id="inspiration" class="level2">
+<h2>Inspiration</h2>
+<ul>
+<li>Google's <a href="http://static.googleusercontent.com/media/research.google.com/en/us/pubs/archive/36632.pdf">BigQuery/Dremel</a></li>
+<li>Google's <a href="http://vldb.org/pvldb/vol5/p1436_alexanderhall_vldb2012.pdf">PowerDrill</a></li>
+</ul>
+</section>
+<section id="index-immutability" class="level2">
+<h2>Index / Immutability</h2>
+<p>Druid indexes data to create mostly immutable views.</p>
+</section>
+<section id="storage" class="level2">
+<h2>Storage</h2>
+<p>Store data in custom column format highly optimized for aggregation &amp; filter.</p>
+</section>
+<section id="specialized-nodes" class="level2">
+<h2>Specialized Nodes</h2>
+<ul>
+<li>A Druid cluster is composed of various type of nodes</li>
+<li>Each designed to do a small set of things very well</li>
+<li>Nodes don't need to be deployed on individual hardware</li>
+<li>Many node types can be colocated in production</li>
+</ul>
+</section>
+</section>
+<section id="druid-vs-x" class="level1">
+<h1>Druid vs X</h1>
+<section id="elasticsearch" class="level2">
+<h2>Elasticsearch</h2>
+<ul>
+<li>resource requirement much higher for ingestion &amp; aggregation</li>
+<li>No data summarization (100x in real world data)</li>
+</ul>
+</section>
+<section id="keyvalue-stores-hbasecassandraopentsdb" class="level2">
+<h2>Key/Value Stores (HBase/Cassandra/OpenTSDB)</h2>
+<ul>
+<li>Must Pre-compute Result
+<ul>
+<li>Exponential storage</li>
+<li>Hours of pre-processing time</li>
+</ul></li>
+<li>Use the dimensions as key (like in OpenTSDB)
+<ul>
+<li>No filter index other than range</li>
+<li>Hard for complex predicates</li>
+</ul></li>
+</ul>
+</section>
+<section id="spark" class="level2">
+<h2>Spark</h2>
+<ul>
+<li>Druid can be used to accelerate OLAP queries in Spark</li>
+<li>Druid focuses on the latencies to ingest and serve queries</li>
+<li>Too long for end user to arbitrarily explore data</li>
+</ul>
+</section>
+<section id="sql-on-hadoop-impaladrillspark-sqlpresto" class="level2">
+<h2>SQL-on-Hadoop (Impala/Drill/Spark SQL/Presto)</h2>
+<ul>
+<li>Queries: more data transfer between nodes</li>
+<li>Data Ingestion: bottleneck by backing store</li>
+<li>Query Flexibility: more flexible (full joins)</li>
+</ul>
+</section>
+</section>
+<section id="data" class="level1">
+<h1>Data</h1>
+<section id="concepts-1" class="level2">
+<h2>Concepts</h2>
+<ul>
+<li><strong>Timestamp column</strong>: query centered on time axis</li>
+<li><strong>Dimension columns</strong>: strings (used to filter or to group)</li>
+<li><strong>Metric columns</strong>: used for aggregations (count, sum, mean, etc...)</li>
+</ul>
+</section>
+</section>
+<section id="roll-up" class="level1">
+<h1>Roll-up</h1>
+<section id="example" class="level2">
+<h2>Example</h2>
+<pre><code>timestamp             page          ... added  deleted
+2011-01-01T00:01:35Z  Justin Bieber     10      65
+2011-01-01T00:03:63Z  Justin Bieber     15      62
+2011-01-01T01:04:51Z  Justin Bieber     32      45
+2011-01-01T01:01:00Z  Ke$ha             17      87
+2011-01-01T01:02:00Z  Ke$ha             43      99
+2011-01-01T02:03:00Z  Ke$ha             12      53</code></pre>
+<pre><code>timestamp             page          ... nb added deleted
+2011-01-01T00:00:00Z  Justin Bieber      2 25    127
+2011-01-01T01:00:00Z  Justin Bieber      1 32    45
+2011-01-01T01:00:00Z  Ke$ha              2 60    186
+2011-01-01T02:00:00Z  Ke$ha              1 12    53</code></pre>
+</section>
+<section id="as-sql" class="level2">
+<h2>as SQL</h2>
+<pre><code>GROUP BY timestamp, page, nb, added, deleted
+  :: nb = COUNT(1)
+  ,  added = SUM(added)
+  ,  deleted = SUM(deleted)</code></pre>
+<p>In practice can dramatically reduce the size (up to x100)</p>
+</section>
+</section>
+<section id="sharding" class="level1">
+<h1>Sharding</h1>
+<section id="segments" class="level2">
+<h2>Segments</h2>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 20    45
+2011-01-01T01:00:00Z  Ke$ha              1 30    106</code></pre>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 12    45
+2011-01-01T01:00:00Z  Ke$ha              2 30    80</code></pre>
+</section>
+<section id="core-data-structure" class="level2">
+<h2>Core Data Structure</h2>
+<p><img src="img/druid-column-types.png" alt="Segment" /> </p>
+<ul>
+<li>dictionary</li>
+<li>a bitmap for each value</li>
+<li>a list of the columns values encoded using the dictionary</li>
+</ul>
+</section>
+<section id="dictionary" class="level2">
+<h2>Dictionary</h2>
+<pre><code>{ &quot;Justin Bieber&quot;: 0
+, &quot;Ke$ha&quot;: 1
+}</code></pre>
+</section>
+<section id="columnn-data" class="level2">
+<h2>Columnn Data</h2>
+<pre><code>[ 0
+, 0
+, 1
+, 1
+]</code></pre>
+</section>
+<section id="bitmaps" class="level2">
+<h2>Bitmaps</h2>
+<p>one for each value of the column</p>
+<pre><code>value=&quot;Justin Bieber&quot;: [1,1,0,0]
+value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
+</section>
+</section>
+<section id="data-1" class="level1">
+<h1>Data</h1>
+<section id="indexing-data" class="level2">
+<h2>Indexing Data</h2>
+<ul>
+<li>Immutable snapshots of data</li>
+<li>data structure highly optimized for analytic queries</li>
+<li>Each column is stored separately</li>
+<li>Indexes data on a per shard (segment) level</li>
+</ul>
+</section>
+<section id="loading-data" class="level2">
+<h2>Loading data</h2>
+<ul>
+<li>Real-Time</li>
+<li>Batch</li>
+</ul>
+</section>
+<section id="querying-the-data" class="level2">
+<h2>Querying the data</h2>
+<ul>
+<li>JSON over HTTP</li>
+<li>Single Table Operations, no joins.</li>
+</ul>
+</section>
+<section id="columnar-storage" class="level2">
+<h2>Columnar Storage</h2>
+</section>
+<section id="index" class="level2">
+<h2>Index</h2>
+<ul>
+<li>Values are dictionary encoded</li>
+</ul>
+<p><code>{&quot;USA&quot; 1, &quot;Canada&quot; 2, &quot;Mexico&quot; 3, ...}</code></p>
+<ul>
+<li>Bitmap for every dimension value (used by filters)</li>
+</ul>
+<p><code>&quot;USA&quot; -&gt; [0 1 0 0 1 1 0 0 0]</code></p>
+<ul>
+<li>Column values (used by aggergation queries)</li>
+</ul>
+<p><code>[2,1,3,15,1,1,2,8,7]</code></p>
+</section>
+<section id="data-segments" class="level2">
+<h2>Data Segments</h2>
+<ul>
+<li>Per time interval
+<ul>
+<li>skip segments when querying</li>
+</ul></li>
+<li>Immutable
+<ul>
+<li>Cache friendly</li>
+<li>No locking</li>
+</ul></li>
+<li>Versioned
+<ul>
+<li>No locking</li>
+<li>Read-write concurrency</li>
+</ul></li>
+</ul>
+</section>
+<section id="real-time-ingestion" class="level2">
+<h2>Real-time ingestion</h2>
+<ul>
+<li>Via Real-Time Node and Firehose
+<ul>
+<li>No redundancy or HA, thus not recommended</li>
+</ul></li>
+<li>Via Indexing Service and Tranquility API
+<ul>
+<li>Core API</li>
+<li>Integration with Streaming Frameworks</li>
+<li>HTTP Server</li>
+<li><strong>Kafka Consumer</strong></li>
+</ul></li>
+</ul>
+</section>
+<section id="batch-ingestion" class="level2">
+<h2>Batch Ingestion</h2>
+<ul>
+<li>File based (HDFS, S3, ...)</li>
+</ul>
+</section>
+<section id="real-time-ingestion-1" class="level2">
+<h2>Real-time Ingestion</h2>
+<pre><code>Task 1: [   Interval   ][ Window ]
+Task 2:                 [              ]
+---------------------------------------&gt;
+                                time</code></pre>
+<p>Minimum indexing slots =<br />
+ Data Sources × Partitions × Replicas × 2</p>
+</section>
+</section>
+<section id="querying" class="level1">
+<h1>Querying</h1>
+<section id="query-types" class="level2">
+<h2>Query types</h2>
+<ul>
+<li>Group by: group by multiple dimensions</li>
+<li>Top N: like grouping by a single dimension</li>
+<li>Timeseries: without grouping over dimensions</li>
+<li>Search: Dimensions lookup</li>
+<li>Time Boundary: Find available data timeframe</li>
+<li>Metadata queries</li>
+</ul>
+</section>
+<section id="tip" class="level2">
+<h2>Tip</h2>
+<ul>
+<li>Prefer <code>topN</code> over <code>groupBy</code></li>
+<li>Prefer <code>timeseries</code> over <code>topN</code></li>
+<li>Use limits (and priorities)</li>
+</ul>
+</section>
+<section id="query-spec" class="level2">
+<h2>Query Spec</h2>
+<ul>
+<li>Data source</li>
+<li>Dimensions</li>
+<li>Interval</li>
+<li>Filters</li>
+<li>Aggergations</li>
+<li>Post Aggregations</li>
+<li>Granularity</li>
+<li>Context (query configuration)</li>
+<li>Limit</li>
+</ul>
+</section>
+<section id="examples" class="level2">
+<h2>Example(s)</h2>
+<p>TODO</p>
+</section>
+<section id="caching" class="level2">
+<h2>Caching</h2>
+<ul>
+<li>Historical node level
+<ul>
+<li>By segment</li>
+</ul></li>
+<li>Broker Level
+<ul>
+<li>By segment and query</li>
+<li><code>groupBy</code> is disabled on purpose!</li>
+</ul></li>
+<li>By default - local caching</li>
+</ul>
+</section>
+<section id="load-rules" class="level2">
+<h2>Load Rules</h2>
+<ul>
+<li>Can be defined</li>
+<li>What can be set</li>
+</ul>
+</section>
+</section>
+<section id="components" class="level1">
+<h1>Components</h1>
+<section id="druid-components" class="level2">
+<h2>Druid Components</h2>
+<ul>
+<li>Real-time Nodes</li>
+<li>Historical Nodes</li>
+<li>Broker Nodes</li>
+<li>Coordinator</li>
+<li>For indexing:
+<ul>
+<li>Overlord</li>
+<li>Middle Manager</li>
+</ul></li>
+<li>Deep Storage</li>
+<li><p>Metadata Storage</p></li>
+<li>Load Balancer</li>
+<li><p>Cache</p></li>
+</ul>
+</section>
+<section id="coordinator" class="level2">
+<h2>Coordinator</h2>
+<p>Manage Segments</p>
+</section>
+<section id="real-time-nodes" class="level2">
+<h2>Real-time Nodes</h2>
+<ul>
+<li>Pulling data in real-time</li>
+<li>Indexing it</li>
+</ul>
+</section>
+<section id="historical-nodes" class="level2">
+<h2>Historical Nodes</h2>
+<ul>
+<li>Keep historical segments</li>
+</ul>
+</section>
+<section id="overlord" class="level2">
+<h2>Overlord</h2>
+<ul>
+<li>Accepts tasks and distributes them to middle manager</li>
+</ul>
+</section>
+<section id="middle-manager" class="level2">
+<h2>Middle Manager</h2>
+<ul>
+<li>Execute submitted tasks via Peons</li>
+</ul>
+</section>
+<section id="broker-nodes" class="level2">
+<h2>Broker Nodes</h2>
+<ul>
+<li>Route query to Real-time and Historical nodes</li>
+<li>Merge results</li>
+</ul>
+</section>
+<section id="deep-storage" class="level2">
+<h2>Deep Storage</h2>
+<ul>
+<li>Segments backup (HDFS, S3, ...)</li>
+</ul>
+</section>
+</section>
+<section id="considerations-tools" class="level1">
+<h1>Considerations &amp; Tools</h1>
+<section id="when-not-to-choose-druid" class="level2">
+<h2>When <em>not</em> to choose Druid</h2>
+<ul>
+<li>Data is not time-series</li>
+<li>Cardinality is <em>very</em> high</li>
+<li>Number of dimensions is high</li>
+<li>Setup cost must be avoided</li>
+</ul>
+</section>
+<section id="graphite-metrics" class="level2">
+<h2>Graphite (metrics)</h2>
+<p><img src="img/graphite.png" alt="Graphite" />__</p>
+<p><a href="http://graphite.wikidot.com">Graphite</a></p>
+</section>
+<section id="pivot-exploring-data" class="level2">
+<h2>Pivot (exploring data)</h2>
+<p><img src="img/pivot.gif" alt="Pivot" /> </p>
+<p><a href="https://github.com/implydata/pivot">Pivot</a></p>
+</section>
+<section id="caravel-exploring-data" class="level2">
+<h2>Caravel (exploring data)</h2>
+<p><img src="img/caravel.png" alt="caravel" /> </p>
+<p><a href="https://github.com/airbnb/caravel">Caravel</a></p>
+</section>
+</section>
+</div>
+
+<script src="../.reveal.js-3.2.0/lib/js/head.min.js"></script>
+<script src="../.reveal.js-3.2.0/js/reveal.js"></script>
+
+<script>
+  // Full list of configuration options available here:
+  // https://github.com/hakimel/reveal.js#configuration
+  Reveal.initialize({
+    controls: true,
+    progress: true,
+    history: true,
+    center: false,
+
+  // available themes are in /css/theme
+      theme: Reveal.getQueryHash().theme || 'solarized-dark',
+  
+  // default/cube/page/concave/zoom/linear/fade/none
+      transition: Reveal.getQueryHash().transition || 'linear',
+  
+  // Optional libraries used to extend on reveal.js
+  dependencies: [
+    { src: '..//.reveal.js-3.2.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+    { src: '..//.reveal.js-3.2.0/plugin/markdown/showdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+    { src: '..//.reveal.js-3.2.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+    { src: '..//.reveal.js-3.2.0/plugin/highlight/highlight.js', async: true, callback: function() { hljs.initHighlightingOnLoad(); } },
+    { src: '..//.reveal.js-3.2.0/plugin/zoom-js/zoom.js', async: true, condition: function() { return !!document.body.classList; } },
+    { src: '..//.reveal.js-3.2.0/plugin/notes/notes.js', async: true, condition: function() { return !!document.body.classList; } }
+  ]
+  });
+
+</script>
+
+</body>
+</html>