latest commit

2016-04-06 00:45:18 +02:00 · 2016-04-06 00:45:18 +02:00 · 585d84325c
commit 585d84325c
parent 3602f53cfc
17 changed files with 584 additions and 506 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,4 @@ build/
 compile
 *.hi
 *.o
+.DS_Store
--- a/.reveal.js-3.2.0/css/theme/solarized-dark.css
+++ b/.reveal.js-3.2.0/css/theme/solarized-dark.css
@ -84,7 +84,8 @@ body {

 .reveal strong,
 .reveal b {
-  font-weight: bold; }
+  font-weight: bold;
+  color: #b58900; }

 .reveal em {
  font-style: italic; }
--- a/README.beamer.pdf
+++ b/README.beamer.pdf
--- a/README.pdf
+++ b/README.pdf
--- a/druid/druid.beamer.pdf
+++ b/druid/druid.beamer.pdf
--- a/druid/druid.html
+++ b/druid/druid.html
@ -60,52 +60,43 @@
 </ul></li>
 <li><a href="#data">Data</a><ul>
 <li><a href="#concepts-1">Concepts</a></li>
+<li><a href="#indexing">Indexing</a></li>
+<li><a href="#loading">Loading</a></li>
+<li><a href="#querying">Querying</a></li>
+<li><a href="#segments">Segments</a></li>
 </ul></li>
 <li><a href="#roll-up">Roll-up</a><ul>
 <li><a href="#example">Example</a></li>
 <li><a href="#as-sql">as SQL</a></li>
 </ul></li>
-<li><a href="#sharding">Sharding</a><ul>
-<li><a href="#segments">Segments</a></li>
+<li><a href="#segments-1">Segments</a><ul>
+<li><a href="#sharding">Sharding</a></li>
 <li><a href="#core-data-structure">Core Data Structure</a></li>
-<li><a href="#dictionary">Dictionary</a></li>
-<li><a href="#columnn-data">Columnn Data</a></li>
-<li><a href="#bitmaps">Bitmaps</a></li>
-</ul></li>
-<li><a href="#data-1">Data</a><ul>
-<li><a href="#indexing-data">Indexing Data</a></li>
-<li><a href="#loading-data">Loading data</a></li>
-<li><a href="#querying-the-data">Querying the data</a></li>
-<li><a href="#columnar-storage">Columnar Storage</a></li>
-<li><a href="#index">Index</a></li>
-<li><a href="#data-segments">Data Segments</a></li>
+<li><a href="#example-1">Example</a></li>
+<li><a href="#example-multiple-matches">Example (multiple matches)</a></li>
 <li><a href="#real-time-ingestion">Real-time ingestion</a></li>
 <li><a href="#batch-ingestion">Batch Ingestion</a></li>
 <li><a href="#real-time-ingestion-1">Real-time Ingestion</a></li>
 </ul></li>
-<li><a href="#querying">Querying</a><ul>
+<li><a href="#querying-1">Querying</a><ul>
 <li><a href="#query-types">Query types</a></li>
-<li><a href="#tip">Tip</a></li>
-<li><a href="#query-spec">Query Spec</a></li>
 <li><a href="#examples">Example(s)</a></li>
+<li><a href="#result">Result</a></li>
 <li><a href="#caching">Caching</a></li>
-<li><a href="#load-rules">Load Rules</a></li>
 </ul></li>
-<li><a href="#components">Components</a><ul>
-<li><a href="#druid-components">Druid Components</a></li>
+<li><a href="#druid-components">Druid Components</a><ul>
+<li><a href="#druid-1">Druid</a></li>
+<li><a href="#also">Also</a></li>
 <li><a href="#coordinator">Coordinator</a></li>
-<li><a href="#real-time-nodes">Real-time Nodes</a></li>
-<li><a href="#historical-nodes">Historical Nodes</a></li>
-<li><a href="#overlord">Overlord</a></li>
-<li><a href="#middle-manager">Middle Manager</a></li>
-<li><a href="#broker-nodes">Broker Nodes</a></li>
-<li><a href="#deep-storage">Deep Storage</a></li>
 </ul></li>
-<li><a href="#considerations-tools">Considerations &amp; Tools</a><ul>
 <li><a href="#when-not-to-choose-druid">When <em>not</em> to choose Druid</a></li>
 <li><a href="#graphite-metrics">Graphite (metrics)</a></li>
 <li><a href="#pivot-exploring-data">Pivot (exploring data)</a></li>
-<li><a href="#caravel-exploring-data">Caravel (exploring data)</a></li>
+<li><a href="#caravel">Caravel</a></li>
+<li><a href="#conclusions">Conclusions</a><ul>
+<li><a href="#precompute-your-time-series">Precompute your time series?</a></li>
+<li><a href="#dont-reinvent-it">Don’t reinvent it</a></li>
+<li><a href="#druid-way-is-the-right-way">Druid way is the right way!</a></li>
 </ul></li>
 </ul>
 </nav>
@ -155,7 +146,7 @@
 <li>Manually coded HyperLogLog in js</li>
 </ul>
 <h1 id="return-of-experience">Return of Experience</h1>
-<p><img src="img/mongoDB.png" alt="MongoDB the destroyer" /> </p>
+<p><img src="img/MongoDB.png" alt="MongoDB the destroyer" /> </p>
 <h1 id="return-of-experience-1">Return of Experience</h1>
 <ul>
 <li>Ingestion still in PHP (600 msg/s max)</li>
@ -294,88 +285,24 @@
 <li><strong>Dimension columns</strong>: strings (used to filter or to group)</li>
 <li><strong>Metric columns</strong>: used for aggregations (count, sum, mean, etc…)</li>
 </ul>
-<h1 id="roll-up">Roll-up</h1>
-<h2 id="example">Example</h2>
-<pre><code>timestamp             page          ... added  deleted
-2011-01-01T00:01:35Z  Justin Bieber     10      65
-2011-01-01T00:03:63Z  Justin Bieber     15      62
-2011-01-01T01:04:51Z  Justin Bieber     32      45
-2011-01-01T01:01:00Z  Ke$ha             17      87
-2011-01-01T01:02:00Z  Ke$ha             43      99
-2011-01-01T02:03:00Z  Ke$ha             12      53</code></pre>
-<pre><code>timestamp             page          ... nb added deleted
-2011-01-01T00:00:00Z  Justin Bieber      2 25    127
-2011-01-01T01:00:00Z  Justin Bieber      1 32    45
-2011-01-01T01:00:00Z  Ke$ha              2 60    186
-2011-01-01T02:00:00Z  Ke$ha              1 12    53</code></pre>
-<h2 id="as-sql">as SQL</h2>
-<pre><code>GROUP BY timestamp, page, nb, added, deleted
-  :: nb = COUNT(1)
-  ,  added = SUM(added)
-  ,  deleted = SUM(deleted)</code></pre>
-<p>In practice can dramatically reduce the size (up to x100)</p>
-<h1 id="sharding">Sharding</h1>
-<h2 id="segments">Segments</h2>
-<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
-<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 20    45
-2011-01-01T01:00:00Z  Ke$ha              1 30    106</code></pre>
-<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
-<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 12    45
-2011-01-01T01:00:00Z  Ke$ha              2 30    80</code></pre>
-<h2 id="core-data-structure">Core Data Structure</h2>
-<p><img src="img/druid-column-types.png" alt="Segment" /> </p>
-<ul>
-<li>dictionary</li>
-<li>a bitmap for each value</li>
-<li>a list of the columns values encoded using the dictionary</li>
-</ul>
-<h2 id="dictionary">Dictionary</h2>
-<pre><code>{ &quot;Justin Bieber&quot;: 0
-, &quot;Ke$ha&quot;: 1
-}</code></pre>
-<h2 id="columnn-data">Columnn Data</h2>
-<pre><code>[ 0
-, 0
-, 1
-, 1
-]</code></pre>
-<h2 id="bitmaps">Bitmaps</h2>
-<p>one for each value of the column</p>
-<pre><code>value=&quot;Justin Bieber&quot;: [1,1,0,0]
-value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
-<h1 id="data-1">Data</h1>
-<h2 id="indexing-data">Indexing Data</h2>
+<h2 id="indexing">Indexing</h2>
 <ul>
 <li>Immutable snapshots of data</li>
 <li>data structure highly optimized for analytic queries</li>
 <li>Each column is stored separately</li>
 <li>Indexes data on a per shard (segment) level</li>
 </ul>
-<h2 id="loading-data">Loading data</h2>
+<h2 id="loading">Loading</h2>
 <ul>
 <li>Real-Time</li>
 <li>Batch</li>
 </ul>
-<h2 id="querying-the-data">Querying the data</h2>
+<h2 id="querying">Querying</h2>
 <ul>
 <li>JSON over HTTP</li>
 <li>Single Table Operations, no joins.</li>
 </ul>
-<h2 id="columnar-storage">Columnar Storage</h2>
-<h2 id="index">Index</h2>
-<ul>
-<li>Values are dictionary encoded</li>
-</ul>
-<p><code>{&quot;USA&quot; 1, &quot;Canada&quot; 2, &quot;Mexico&quot; 3, ...}</code></p>
-<ul>
-<li>Bitmap for every dimension value (used by filters)</li>
-</ul>
-<p><code>&quot;USA&quot; -&gt; [0 1 0 0 1 1 0 0 0]</code></p>
-<ul>
-<li>Column values (used by aggergation queries)</li>
-</ul>
-<p><code>[2,1,3,15,1,1,2,8,7]</code></p>
-<h2 id="data-segments">Data Segments</h2>
+<h2 id="segments">Segments</h2>
 <ul>
 <li>Per time interval
 <ul>
@ -392,6 +319,61 @@ value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
 <li>Read-write concurrency</li>
 </ul></li>
 </ul>
+<h1 id="roll-up">Roll-up</h1>
+<h2 id="example">Example</h2>
+<pre><code>timestamp             page    ... added  deleted
+2011-01-01T00:01:35Z  Cthulhu     10      65
+2011-01-01T00:03:63Z  Cthulhu     15      62
+2011-01-01T01:04:51Z  Cthulhu     32      45
+2011-01-01T01:01:00Z  Azatoth     17      87
+2011-01-01T01:02:00Z  Azatoth     43      99
+2011-01-01T02:03:00Z  Azatoth     12      53</code></pre>
+<pre><code>timestamp             page    ... nb added deleted
+2011-01-01T00:00:00Z  Cthulhu      2 25    127
+2011-01-01T01:00:00Z  Cthulhu      1 32    45
+2011-01-01T01:00:00Z  Azatoth      2 60    186
+2011-01-01T02:00:00Z  Azatoth      1 12    53</code></pre>
+<h2 id="as-sql">as SQL</h2>
+<pre><code>GROUP BY timestamp, page, nb, added, deleted
+  :: nb = COUNT(1)
+  ,  added = SUM(added)
+  ,  deleted = SUM(deleted)</code></pre>
+<p>In practice can dramatically reduce the size (up to x100)</p>
+<h1 id="segments-1">Segments</h1>
+<h2 id="sharding">Sharding</h2>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>timestamp             page    ... nb added deleted
+2011-01-01T01:00:00Z  Cthulhu      1 20    45
+2011-01-01T01:00:00Z  Azatoth      1 30    106</code></pre>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>timestamp             page    ... nb added deleted
+2011-01-01T01:00:00Z  Cthulhu      1 12    45
+2011-01-01T01:00:00Z  Azatoth      2 30    80</code></pre>
+<h2 id="core-data-structure">Core Data Structure</h2>
+<p><img src="img/druid-column-types.png" alt="Segment" /> </p>
+<ul>
+<li>dictionary</li>
+<li>a bitmap for each value</li>
+<li>a list of the columns values encoded using the dictionary</li>
+</ul>
+<h2 id="example-1">Example</h2>
+<pre><code>dictionary: { &quot;Cthulhu&quot;: 0
+            , &quot;Azatoth&quot;: 1 }
+
+column data: [0, 0, 1, 1]
+
+bitmaps (one for each value of the column):
+value=&quot;Cthulhu&quot;: [1,1,0,0]
+value=&quot;Azatoth&quot;: [0,0,1,1]</code></pre>
+<h2 id="example-multiple-matches">Example (multiple matches)</h2>
+<pre><code>dictionary: { &quot;Cthulhu&quot;: 0
+            , &quot;Azatoth&quot;: 1 }
+
+column data: [0, [0,1], 1, 1]
+
+bitmaps (one for each value of the column):
+value=&quot;Cthulhu&quot;: [1,1,0,0]
+value=&quot;Azatoth&quot;: [0,1,1,1]</code></pre>
 <h2 id="real-time-ingestion">Real-time ingestion</h2>
 <ul>
 <li>Via Real-Time Node and Firehose
@ -411,13 +393,11 @@ value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
 <li>File based (HDFS, S3, …)</li>
 </ul>
 <h2 id="real-time-ingestion-1">Real-time Ingestion</h2>
-<pre><code>Task 1: [   Interval   ][ Window ]
-Task 2:                 [              ]
---------------------------------------&gt;
-                                time</code></pre>
-<p>Minimum indexing slots =<br />
- Data Sources × Partitions × Replicas × 2</p>
-<h1 id="querying">Querying</h1>
+<pre><code>Task 1: [   Interval          ][ Window ]
+Task 2:                        [                     ]
+-----------------------------------------------------&gt;
+                                                  time</code></pre>
+<h1 id="querying-1">Querying</h1>
 <h2 id="query-types">Query types</h2>
 <ul>
 <li>Group by: group by multiple dimensions</li>
@ -427,26 +407,27 @@ Task 2:                 [              ]
 <li>Time Boundary: Find available data timeframe</li>
 <li>Metadata queries</li>
 </ul>
-<h2 id="tip">Tip</h2>
-<ul>
-<li>Prefer <code>topN</code> over <code>groupBy</code></li>
-<li>Prefer <code>timeseries</code> over <code>topN</code></li>
-<li>Use limits (and priorities)</li>
-</ul>
-<h2 id="query-spec">Query Spec</h2>
-<ul>
-<li>Data source</li>
-<li>Dimensions</li>
-<li>Interval</li>
-<li>Filters</li>
-<li>Aggergations</li>
-<li>Post Aggregations</li>
-<li>Granularity</li>
-<li>Context (query configuration)</li>
-<li>Limit</li>
-</ul>
 <h2 id="examples">Example(s)</h2>
-<p>TODO</p>
+<pre><code>{&quot;queryType&quot;: &quot;groupBy&quot;,
+ &quot;dataSource&quot;: &quot;druidtest&quot;,
+ &quot;granularity&quot;: &quot;all&quot;,
+ &quot;dimensions&quot;: [],
+ &quot;aggregations&quot;: [
+     {&quot;type&quot;: &quot;count&quot;, &quot;name&quot;: &quot;rows&quot;},
+     {&quot;type&quot;: &quot;longSum&quot;, &quot;name&quot;: &quot;imps&quot;, &quot;fieldName&quot;: &quot;impressions&quot;},
+     {&quot;type&quot;: &quot;doubleSum&quot;, &quot;name&quot;: &quot;wp&quot;, &quot;fieldName&quot;: &quot;wp&quot;}
+ ],
+ &quot;intervals&quot;: [&quot;2010-01-01T00:00/2020-01-01T00&quot;]}</code></pre>
+<h2 id="result">Result</h2>
+<pre><code>[ {
+  &quot;version&quot; : &quot;v1&quot;,
+  &quot;timestamp&quot; : &quot;2010-01-01T00:00:00.000Z&quot;,
+  &quot;event&quot; : {
+    &quot;imps&quot; : 5,
+    &quot;wp&quot; : 15000.0,
+    &quot;rows&quot; : 5
+  }
+} ]</code></pre>
 <h2 id="caching">Caching</h2>
 <ul>
 <li>Historical node level
@ -458,15 +439,10 @@ Task 2:                 [              ]
 <li>By segment and query</li>
 <li><code>groupBy</code> is disabled on purpose!</li>
 </ul></li>
-<li>By default - local caching</li>
+<li>By default: local caching</li>
 </ul>
-<h2 id="load-rules">Load Rules</h2>
-<ul>
-<li>Can be defined</li>
-<li>What can be set</li>
-</ul>
-<h1 id="components">Components</h1>
-<h2 id="druid-components">Druid Components</h2>
+<h1 id="druid-components">Druid Components</h1>
+<h2 id="druid-1">Druid</h2>
 <ul>
 <li>Real-time Nodes</li>
 <li>Historical Nodes</li>
@ -477,56 +453,60 @@ Task 2:                 [              ]
 <li>Overlord</li>
 <li>Middle Manager</li>
 </ul></li>
-<li>Deep Storage</li>
-<li><p>Metadata Storage</p></li>
+</ul>
+<h2 id="also">Also</h2>
+<ul>
+<li>Deep Storage (S3, HDFS, …)</li>
+<li>Metadata Storage (SQL)</li>
 <li>Load Balancer</li>
-<li><p>Cache</p></li>
+<li>Cache</li>
 </ul>
 <h2 id="coordinator">Coordinator</h2>
-<p>Manage Segments</p>
-<h2 id="real-time-nodes">Real-time Nodes</h2>
 <ul>
-<li>Pulling data in real-time</li>
-<li>Indexing it</li>
-</ul>
-<h2 id="historical-nodes">Historical Nodes</h2>
+<li>Real-time Nodes (pull data, index it)</li>
+<li>Historical Nodes (keep old segments)</li>
+<li>Broker Nodes (route queries to RT &amp; Hist. nodes, merge)</li>
+<li>Coordinator (manage segemnts)</li>
+<li>For indexing:
 <ul>
-<li>Keep historical segments</li>
+<li>Overlord (distribute task to the middle manager)</li>
+<li>Middle Manager (execute tasks via Peons)</li>
+</ul></li>
 </ul>
-<h2 id="overlord">Overlord</h2>
-<ul>
-<li>Accepts tasks and distributes them to middle manager</li>
-</ul>
-<h2 id="middle-manager">Middle Manager</h2>
-<ul>
-<li>Execute submitted tasks via Peons</li>
-</ul>
-<h2 id="broker-nodes">Broker Nodes</h2>
-<ul>
-<li>Route query to Real-time and Historical nodes</li>
-<li>Merge results</li>
-</ul>
-<h2 id="deep-storage">Deep Storage</h2>
-<ul>
-<li>Segments backup (HDFS, S3, …)</li>
-</ul>
-<h1 id="considerations-tools">Considerations &amp; Tools</h1>
-<h2 id="when-not-to-choose-druid">When <em>not</em> to choose Druid</h2>
+<h1 id="when-not-to-choose-druid">When <em>not</em> to choose Druid</h1>
 <ul>
 <li>Data is not time-series</li>
 <li>Cardinality is <em>very</em> high</li>
 <li>Number of dimensions is high</li>
 <li>Setup cost must be avoided</li>
 </ul>
-<h2 id="graphite-metrics">Graphite (metrics)</h2>
+<h1 id="graphite-metrics">Graphite (metrics)</h1>
 <p><img src="img/graphite.png" alt="Graphite" />__</p>
 <p><a href="http://graphite.wikidot.com">Graphite</a></p>
-<h2 id="pivot-exploring-data">Pivot (exploring data)</h2>
+<h1 id="pivot-exploring-data">Pivot (exploring data)</h1>
 <p><img src="img/pivot.gif" alt="Pivot" /> </p>
 <p><a href="https://github.com/implydata/pivot">Pivot</a></p>
-<h2 id="caravel-exploring-data">Caravel (exploring data)</h2>
+<h1 id="caravel">Caravel</h1>
 <p><img src="img/caravel.png" alt="caravel" /> </p>
 <p><a href="https://github.com/airbnb/caravel">Caravel</a></p>
+<h1 id="conclusions">Conclusions</h1>
+<h2 id="precompute-your-time-series">Precompute your time series?</h2>
+<p><img src="img/wrong.jpg" alt="You’re doing it wrong" /> </p>
+<h2 id="dont-reinvent-it">Don’t reinvent it</h2>
+<ul>
+<li>need a user facing API</li>
+<li>need time series on many dimensions</li>
+<li>need real-time</li>
+<li>big volume of data</li>
+</ul>
+<h2 id="druid-way-is-the-right-way">Druid way is the right way!</h2>
+<ol type="1">
+<li>Push in kafka</li>
+<li>Add the right dimensions</li>
+<li>Push in druid</li>
+<li>???</li>
+<li>Profit!</li>
+</ol>
 <div id="footer">
 <a href="yannesposito.com">Y</a>
 </div>
--- a/druid/druid.md
+++ b/druid/druid.md
@ -57,7 +57,7 @@ date: 7 Avril 2016

 # Return of Experience

-![MongoDB the destroyer](img/mongoDB.png)\  
+![MongoDB the destroyer](img/MongoDB.png)\  

 # Return of Experience

@ -214,26 +214,54 @@ Store data in custom column format highly optimized for aggregation & filter.
 - **Dimension columns**: strings (used to filter or to group)
 - **Metric columns**: used for aggregations (count, sum, mean, etc...)

+## Indexing
+
+- Immutable snapshots of data
+- data structure highly optimized for analytic queries
+- Each column is stored separately
+- Indexes data on a per shard (segment) level
+
+## Loading
+
+- Real-Time 
+- Batch
+
+## Querying
+
+- JSON over HTTP
+- Single Table Operations, no joins.
+
+## Segments
+
+- Per time interval
+    - skip segments when querying
+- Immutable
+    - Cache friendly
+    - No locking
+- Versioned
+    - No locking
+    - Read-write concurrency
+
 # Roll-up

 ## Example

 ~~~
-timestamp             page          ... added  deleted
-2011-01-01T00:01:35Z  Justin Bieber     10      65
-2011-01-01T00:03:63Z  Justin Bieber     15      62
-2011-01-01T01:04:51Z  Justin Bieber     32      45
-2011-01-01T01:01:00Z  Ke$ha             17      87
-2011-01-01T01:02:00Z  Ke$ha             43      99
-2011-01-01T02:03:00Z  Ke$ha             12      53
+timestamp             page    ... added  deleted
+2011-01-01T00:01:35Z  Cthulhu     10      65
+2011-01-01T00:03:63Z  Cthulhu     15      62
+2011-01-01T01:04:51Z  Cthulhu     32      45
+2011-01-01T01:01:00Z  Azatoth     17      87
+2011-01-01T01:02:00Z  Azatoth     43      99
+2011-01-01T02:03:00Z  Azatoth     12      53
 ~~~

 ~~~
-timestamp             page          ... nb added deleted
-2011-01-01T00:00:00Z  Justin Bieber      2 25    127
-2011-01-01T01:00:00Z  Justin Bieber      1 32    45
-2011-01-01T01:00:00Z  Ke$ha              2 60    186
-2011-01-01T02:00:00Z  Ke$ha              1 12    53
+timestamp             page    ... nb added deleted
+2011-01-01T00:00:00Z  Cthulhu      2 25    127
+2011-01-01T01:00:00Z  Cthulhu      1 32    45
+2011-01-01T01:00:00Z  Azatoth      2 60    186
+2011-01-01T02:00:00Z  Azatoth      1 12    53
 ~~~

 ## as SQL
@ -247,22 +275,25 @@ GROUP BY timestamp, page, nb, added, deleted

 In practice can dramatically reduce the size (up to x100)

-# Sharding

-## Segments
+# Segments
+
+## Sharding

 <small>`sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0`</small>

 ~~~
-2011-01-01T01:00:00Z  Justin Bieber      1 20    45
-2011-01-01T01:00:00Z  Ke$ha              1 30    106
+timestamp             page    ... nb added deleted
+2011-01-01T01:00:00Z  Cthulhu      1 20    45
+2011-01-01T01:00:00Z  Azatoth      1 30    106
 ~~~

 <small>`sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0`</small>

 ~~~
-2011-01-01T01:00:00Z  Justin Bieber      1 12    45
-2011-01-01T01:00:00Z  Ke$ha              2 30    80
+timestamp             page    ... nb added deleted
+2011-01-01T01:00:00Z  Cthulhu      1 12    45
+2011-01-01T01:00:00Z  Azatoth      2 30    80
 ~~~

 ## Core Data Structure
@ -273,78 +304,32 @@ In practice can dramatically reduce the size (up to x100)
 - a bitmap for each value
 - a list of the columns values encoded using the dictionary

-## Dictionary
+## Example

 ~~~
-{ "Justin Bieber": 0
-, "Ke$ha": 1
-}
+dictionary: { "Cthulhu": 0
+            , "Azatoth": 1 }
+
+column data: [0, 0, 1, 1]
+
+bitmaps (one for each value of the column):
+value="Cthulhu": [1,1,0,0]
+value="Azatoth": [0,0,1,1]
 ~~~

-## Columnn Data
+## Example (multiple matches)

 ~~~
-[ 0
-, 0
-, 1
-, 1
-]
+dictionary: { "Cthulhu": 0
+            , "Azatoth": 1 }
+
+column data: [0, [0,1], 1, 1]
+
+bitmaps (one for each value of the column):
+value="Cthulhu": [1,1,0,0]
+value="Azatoth": [0,1,1,1]
 ~~~

-## Bitmaps
-
-one for each value of the column
-
-~~~
-value="Justin Bieber": [1,1,0,0]
-value="Ke$ha": [0,0,1,1]
-~~~
-
-# Data
-
-## Indexing Data
-
- Immutable snapshots of data
- data structure highly optimized for analytic queries
- Each column is stored separately
- Indexes data on a per shard (segment) level
-
-## Loading data
-
- Real-Time 
- Batch
-
-## Querying the data
-
- JSON over HTTP
- Single Table Operations, no joins.
-
-## Columnar Storage
-
-## Index
-
- Values are dictionary encoded
-
-`{"USA" 1, "Canada" 2, "Mexico" 3, ...}`
-
- Bitmap for every dimension value (used by filters)
-
-`"USA" -> [0 1 0 0 1 1 0 0 0]`
-
- Column values (used by aggergation queries)
-
-`[2,1,3,15,1,1,2,8,7]`
-
-## Data Segments
-
- Per time interval
-    - skip segments when querying
- Immutable
-    - Cache friendly
-    - No locking
- Versioned
-    - No locking
-    - Read-write concurrency
  
 ## Real-time ingestion

@ -363,15 +348,12 @@ value="Ke$ha": [0,0,1,1]
 ## Real-time Ingestion

 ~~~
-Task 1: [   Interval   ][ Window ]
-Task 2:                 [              ]
--------------------------------------->
-                                time
+Task 1: [   Interval          ][ Window ]
+Task 2:                        [                     ]
+----------------------------------------------------->
+                                                  time
 ~~~

-Minimum indexing slots =  
-  Data Sources × Partitions × Replicas × 2
-
 # Querying

 ## Query types
@ -383,27 +365,34 @@ Minimum indexing slots =
 - Time Boundary: Find available data timeframe
 - Metadata queries

-## Tip
-
- Prefer `topN` over `groupBy`
- Prefer `timeseries` over `topN`
- Use limits (and priorities)
-
-## Query Spec
-
- Data source
- Dimensions
- Interval
- Filters
- Aggergations
- Post Aggregations
- Granularity
- Context (query configuration)
- Limit
-
 ## Example(s)

-TODO
+~~~
+{"queryType": "groupBy",
+ "dataSource": "druidtest",
+ "granularity": "all",
+ "dimensions": [],
+ "aggregations": [
+     {"type": "count", "name": "rows"},
+     {"type": "longSum", "name": "imps", "fieldName": "impressions"},
+     {"type": "doubleSum", "name": "wp", "fieldName": "wp"}
+ ],
+ "intervals": ["2010-01-01T00:00/2020-01-01T00"]}
+~~~
+
+## Result
+
+~~~
+[ {
+  "version" : "v1",
+  "timestamp" : "2010-01-01T00:00:00.000Z",
+  "event" : {
+    "imps" : 5,
+    "wp" : 15000.0,
+    "rows" : 5
+  }
+} ]
+~~~

 ## Caching

@ -412,16 +401,11 @@ TODO
 - Broker Level
    - By segment and query
    - `groupBy` is disabled on purpose!
- By default - local caching
+- By default: local caching

-## Load Rules
+# Druid Components

- Can be defined
- What can be set
-
-# Components
-
-## Druid Components
+## Druid

 - Real-time Nodes
 - Historical Nodes
@ -431,65 +415,65 @@ TODO
    - Overlord
    - Middle Manager

-+ Deep Storage
-+ Metadata Storage
+## Also

-+ Load Balancer
-+ Cache
+- Deep Storage (S3, HDFS, ...)
+- Metadata Storage (SQL)
+- Load Balancer
+- Cache

 ## Coordinator

-Manage Segments
+- Real-time Nodes (pull data, index it)
+- Historical Nodes (keep old segments)
+- Broker Nodes (route queries to RT & Hist. nodes, merge)
+- Coordinator (manage segemnts)
+- For indexing:
+    - Overlord (distribute task to the middle manager)
+    - Middle Manager (execute tasks via Peons)

-## Real-time Nodes
-
- Pulling data in real-time
- Indexing it
-
-## Historical Nodes
-
- Keep historical segments
-
-## Overlord
-
- Accepts tasks and distributes them to middle manager
-
-## Middle Manager
-
- Execute submitted tasks via Peons
-
-## Broker Nodes
-
- Route query to Real-time and Historical nodes
- Merge results
-
-## Deep Storage
-
- Segments backup (HDFS, S3, ...)
-
-# Considerations & Tools
-
-## When *not* to choose Druid
+# When *not* to choose Druid

 - Data is not time-series
 - Cardinality is _very_ high
 - Number of dimensions is high
 - Setup cost must be avoided

-## Graphite (metrics)
+# Graphite (metrics)

 ![Graphite](img/graphite.png)\__

 [Graphite](http://graphite.wikidot.com)

-## Pivot (exploring data)
+# Pivot (exploring data)

 ![Pivot](img/pivot.gif)\  

 [Pivot](https://github.com/implydata/pivot)

-## Caravel (exploring data)
+# Caravel

 ![caravel](img/caravel.png)\  

 [Caravel](https://github.com/airbnb/caravel)
+
+# Conclusions
+
+## Precompute your time series?
+
+![You're doing it wrong](img/wrong.jpg)\  
+
+## Don't reinvent it
+
+- need a user facing API
+- need time series on many dimensions
+- need real-time
+- big volume of data
+
+## Druid way is the right way!
+
+1. Push in kafka
+2. Add the right dimensions
+3. Push in druid
+4. ???
+5. Profit!
--- a/druid/druid.pdf
+++ b/druid/druid.pdf
--- a/druid/druid.reveal.html
+++ b/druid/druid.reveal.html
@ -101,7 +101,7 @@
 </section>
 <section id="return-of-experience" class="level1">
 <h1>Return of Experience</h1>
-<p><img src="img/mongoDB.png" alt="MongoDB the destroyer" /> </p>
+<p><img src="img/MongoDB.png" alt="MongoDB the destroyer" /> </p>
 </section>
 <section id="return-of-experience-1" class="level1">
 <h1>Return of Experience</h1>
@ -292,78 +292,8 @@
 <li><strong>Metric columns</strong>: used for aggregations (count, sum, mean, etc...)</li>
 </ul>
 </section>
-</section>
-<section id="roll-up" class="level1">
-<h1>Roll-up</h1>
-<section id="example" class="level2">
-<h2>Example</h2>
-<pre><code>timestamp             page          ... added  deleted
-2011-01-01T00:01:35Z  Justin Bieber     10      65
-2011-01-01T00:03:63Z  Justin Bieber     15      62
-2011-01-01T01:04:51Z  Justin Bieber     32      45
-2011-01-01T01:01:00Z  Ke$ha             17      87
-2011-01-01T01:02:00Z  Ke$ha             43      99
-2011-01-01T02:03:00Z  Ke$ha             12      53</code></pre>
-<pre><code>timestamp             page          ... nb added deleted
-2011-01-01T00:00:00Z  Justin Bieber      2 25    127
-2011-01-01T01:00:00Z  Justin Bieber      1 32    45
-2011-01-01T01:00:00Z  Ke$ha              2 60    186
-2011-01-01T02:00:00Z  Ke$ha              1 12    53</code></pre>
-</section>
-<section id="as-sql" class="level2">
-<h2>as SQL</h2>
-<pre><code>GROUP BY timestamp, page, nb, added, deleted
-  :: nb = COUNT(1)
-  ,  added = SUM(added)
-  ,  deleted = SUM(deleted)</code></pre>
-<p>In practice can dramatically reduce the size (up to x100)</p>
-</section>
-</section>
-<section id="sharding" class="level1">
-<h1>Sharding</h1>
-<section id="segments" class="level2">
-<h2>Segments</h2>
-<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
-<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 20    45
-2011-01-01T01:00:00Z  Ke$ha              1 30    106</code></pre>
-<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
-<pre><code>2011-01-01T01:00:00Z  Justin Bieber      1 12    45
-2011-01-01T01:00:00Z  Ke$ha              2 30    80</code></pre>
-</section>
-<section id="core-data-structure" class="level2">
-<h2>Core Data Structure</h2>
-<p><img src="img/druid-column-types.png" alt="Segment" /> </p>
-<ul>
-<li>dictionary</li>
-<li>a bitmap for each value</li>
-<li>a list of the columns values encoded using the dictionary</li>
-</ul>
-</section>
-<section id="dictionary" class="level2">
-<h2>Dictionary</h2>
-<pre><code>{ &quot;Justin Bieber&quot;: 0
-, &quot;Ke$ha&quot;: 1
-}</code></pre>
-</section>
-<section id="columnn-data" class="level2">
-<h2>Columnn Data</h2>
-<pre><code>[ 0
-, 0
-, 1
-, 1
-]</code></pre>
-</section>
-<section id="bitmaps" class="level2">
-<h2>Bitmaps</h2>
-<p>one for each value of the column</p>
-<pre><code>value=&quot;Justin Bieber&quot;: [1,1,0,0]
-value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
-</section>
-</section>
-<section id="data-1" class="level1">
-<h1>Data</h1>
-<section id="indexing-data" class="level2">
-<h2>Indexing Data</h2>
+<section id="indexing" class="level2">
+<h2>Indexing</h2>
 <ul>
 <li>Immutable snapshots of data</li>
 <li>data structure highly optimized for analytic queries</li>
@ -371,40 +301,22 @@ value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
 <li>Indexes data on a per shard (segment) level</li>
 </ul>
 </section>
-<section id="loading-data" class="level2">
-<h2>Loading data</h2>
+<section id="loading" class="level2">
+<h2>Loading</h2>
 <ul>
 <li>Real-Time</li>
 <li>Batch</li>
 </ul>
 </section>
-<section id="querying-the-data" class="level2">
-<h2>Querying the data</h2>
+<section id="querying" class="level2">
+<h2>Querying</h2>
 <ul>
 <li>JSON over HTTP</li>
 <li>Single Table Operations, no joins.</li>
 </ul>
 </section>
-<section id="columnar-storage" class="level2">
-<h2>Columnar Storage</h2>
-</section>
-<section id="index" class="level2">
-<h2>Index</h2>
-<ul>
-<li>Values are dictionary encoded</li>
-</ul>
-<p><code>{&quot;USA&quot; 1, &quot;Canada&quot; 2, &quot;Mexico&quot; 3, ...}</code></p>
-<ul>
-<li>Bitmap for every dimension value (used by filters)</li>
-</ul>
-<p><code>&quot;USA&quot; -&gt; [0 1 0 0 1 1 0 0 0]</code></p>
-<ul>
-<li>Column values (used by aggergation queries)</li>
-</ul>
-<p><code>[2,1,3,15,1,1,2,8,7]</code></p>
-</section>
-<section id="data-segments" class="level2">
-<h2>Data Segments</h2>
+<section id="segments" class="level2">
+<h2>Segments</h2>
 <ul>
 <li>Per time interval
 <ul>
@ -422,6 +334,77 @@ value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
 </ul></li>
 </ul>
 </section>
+</section>
+<section id="roll-up" class="level1">
+<h1>Roll-up</h1>
+<section id="example" class="level2">
+<h2>Example</h2>
+<pre><code>timestamp             page    ... added  deleted
+2011-01-01T00:01:35Z  Cthulhu     10      65
+2011-01-01T00:03:63Z  Cthulhu     15      62
+2011-01-01T01:04:51Z  Cthulhu     32      45
+2011-01-01T01:01:00Z  Azatoth     17      87
+2011-01-01T01:02:00Z  Azatoth     43      99
+2011-01-01T02:03:00Z  Azatoth     12      53</code></pre>
+<pre><code>timestamp             page    ... nb added deleted
+2011-01-01T00:00:00Z  Cthulhu      2 25    127
+2011-01-01T01:00:00Z  Cthulhu      1 32    45
+2011-01-01T01:00:00Z  Azatoth      2 60    186
+2011-01-01T02:00:00Z  Azatoth      1 12    53</code></pre>
+</section>
+<section id="as-sql" class="level2">
+<h2>as SQL</h2>
+<pre><code>GROUP BY timestamp, page, nb, added, deleted
+  :: nb = COUNT(1)
+  ,  added = SUM(added)
+  ,  deleted = SUM(deleted)</code></pre>
+<p>In practice can dramatically reduce the size (up to x100)</p>
+</section>
+</section>
+<section id="segments-1" class="level1">
+<h1>Segments</h1>
+<section id="sharding" class="level2">
+<h2>Sharding</h2>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>timestamp             page    ... nb added deleted
+2011-01-01T01:00:00Z  Cthulhu      1 20    45
+2011-01-01T01:00:00Z  Azatoth      1 30    106</code></pre>
+<p><small><code>sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0</code></small></p>
+<pre><code>timestamp             page    ... nb added deleted
+2011-01-01T01:00:00Z  Cthulhu      1 12    45
+2011-01-01T01:00:00Z  Azatoth      2 30    80</code></pre>
+</section>
+<section id="core-data-structure" class="level2">
+<h2>Core Data Structure</h2>
+<p><img src="img/druid-column-types.png" alt="Segment" /> </p>
+<ul>
+<li>dictionary</li>
+<li>a bitmap for each value</li>
+<li>a list of the columns values encoded using the dictionary</li>
+</ul>
+</section>
+<section id="example-1" class="level2">
+<h2>Example</h2>
+<pre><code>dictionary: { &quot;Cthulhu&quot;: 0
+            , &quot;Azatoth&quot;: 1 }
+
+column data: [0, 0, 1, 1]
+
+bitmaps (one for each value of the column):
+value=&quot;Cthulhu&quot;: [1,1,0,0]
+value=&quot;Azatoth&quot;: [0,0,1,1]</code></pre>
+</section>
+<section id="example-multiple-matches" class="level2">
+<h2>Example (multiple matches)</h2>
+<pre><code>dictionary: { &quot;Cthulhu&quot;: 0
+            , &quot;Azatoth&quot;: 1 }
+
+column data: [0, [0,1], 1, 1]
+
+bitmaps (one for each value of the column):
+value=&quot;Cthulhu&quot;: [1,1,0,0]
+value=&quot;Azatoth&quot;: [0,1,1,1]</code></pre>
+</section>
 <section id="real-time-ingestion" class="level2">
 <h2>Real-time ingestion</h2>
 <ul>
@ -446,15 +429,13 @@ value=&quot;Ke$ha&quot;: [0,0,1,1]</code></pre>
 </section>
 <section id="real-time-ingestion-1" class="level2">
 <h2>Real-time Ingestion</h2>
-<pre><code>Task 1: [   Interval   ][ Window ]
-Task 2:                 [              ]
---------------------------------------&gt;
-                                time</code></pre>
-<p>Minimum indexing slots =<br />
- Data Sources × Partitions × Replicas × 2</p>
+<pre><code>Task 1: [   Interval          ][ Window ]
+Task 2:                        [                     ]
+-----------------------------------------------------&gt;
+                                                  time</code></pre>
 </section>
 </section>
-<section id="querying" class="level1">
+<section id="querying-1" class="level1">
 <h1>Querying</h1>
 <section id="query-types" class="level2">
 <h2>Query types</h2>
@ -467,31 +448,30 @@ Task 2:                 [              ]
 <li>Metadata queries</li>
 </ul>
 </section>
-<section id="tip" class="level2">
-<h2>Tip</h2>
-<ul>
-<li>Prefer <code>topN</code> over <code>groupBy</code></li>
-<li>Prefer <code>timeseries</code> over <code>topN</code></li>
-<li>Use limits (and priorities)</li>
-</ul>
-</section>
-<section id="query-spec" class="level2">
-<h2>Query Spec</h2>
-<ul>
-<li>Data source</li>
-<li>Dimensions</li>
-<li>Interval</li>
-<li>Filters</li>
-<li>Aggergations</li>
-<li>Post Aggregations</li>
-<li>Granularity</li>
-<li>Context (query configuration)</li>
-<li>Limit</li>
-</ul>
-</section>
 <section id="examples" class="level2">
 <h2>Example(s)</h2>
-<p>TODO</p>
+<pre><code>{&quot;queryType&quot;: &quot;groupBy&quot;,
+ &quot;dataSource&quot;: &quot;druidtest&quot;,
+ &quot;granularity&quot;: &quot;all&quot;,
+ &quot;dimensions&quot;: [],
+ &quot;aggregations&quot;: [
+     {&quot;type&quot;: &quot;count&quot;, &quot;name&quot;: &quot;rows&quot;},
+     {&quot;type&quot;: &quot;longSum&quot;, &quot;name&quot;: &quot;imps&quot;, &quot;fieldName&quot;: &quot;impressions&quot;},
+     {&quot;type&quot;: &quot;doubleSum&quot;, &quot;name&quot;: &quot;wp&quot;, &quot;fieldName&quot;: &quot;wp&quot;}
+ ],
+ &quot;intervals&quot;: [&quot;2010-01-01T00:00/2020-01-01T00&quot;]}</code></pre>
+</section>
+<section id="result" class="level2">
+<h2>Result</h2>
+<pre><code>[ {
+  &quot;version&quot; : &quot;v1&quot;,
+  &quot;timestamp&quot; : &quot;2010-01-01T00:00:00.000Z&quot;,
+  &quot;event&quot; : {
+    &quot;imps&quot; : 5,
+    &quot;wp&quot; : 15000.0,
+    &quot;rows&quot; : 5
+  }
+} ]</code></pre>
 </section>
 <section id="caching" class="level2">
 <h2>Caching</h2>
@ -505,21 +485,14 @@ Task 2:                 [              ]
 <li>By segment and query</li>
 <li><code>groupBy</code> is disabled on purpose!</li>
 </ul></li>
-<li>By default - local caching</li>
-</ul>
-</section>
-<section id="load-rules" class="level2">
-<h2>Load Rules</h2>
-<ul>
-<li>Can be defined</li>
-<li>What can be set</li>
+<li>By default: local caching</li>
 </ul>
 </section>
 </section>
-<section id="components" class="level1">
-<h1>Components</h1>
-<section id="druid-components" class="level2">
-<h2>Druid Components</h2>
+<section id="druid-components" class="level1">
+<h1>Druid Components</h1>
+<section id="druid-1" class="level2">
+<h2>Druid</h2>
 <ul>
 <li>Real-time Nodes</li>
 <li>Historical Nodes</li>
@ -530,59 +503,34 @@ Task 2:                 [              ]
 <li>Overlord</li>
 <li>Middle Manager</li>
 </ul></li>
-<li>Deep Storage</li>
-<li><p>Metadata Storage</p></li>
+</ul>
+</section>
+<section id="also" class="level2">
+<h2>Also</h2>
+<ul>
+<li>Deep Storage (S3, HDFS, ...)</li>
+<li>Metadata Storage (SQL)</li>
 <li>Load Balancer</li>
-<li><p>Cache</p></li>
+<li>Cache</li>
 </ul>
 </section>
 <section id="coordinator" class="level2">
 <h2>Coordinator</h2>
-<p>Manage Segments</p>
-</section>
-<section id="real-time-nodes" class="level2">
-<h2>Real-time Nodes</h2>
 <ul>
-<li>Pulling data in real-time</li>
-<li>Indexing it</li>
-</ul>
-</section>
-<section id="historical-nodes" class="level2">
-<h2>Historical Nodes</h2>
+<li>Real-time Nodes (pull data, index it)</li>
+<li>Historical Nodes (keep old segments)</li>
+<li>Broker Nodes (route queries to RT &amp; Hist. nodes, merge)</li>
+<li>Coordinator (manage segemnts)</li>
+<li>For indexing:
 <ul>
-<li>Keep historical segments</li>
-</ul>
-</section>
-<section id="overlord" class="level2">
-<h2>Overlord</h2>
-<ul>
-<li>Accepts tasks and distributes them to middle manager</li>
-</ul>
-</section>
-<section id="middle-manager" class="level2">
-<h2>Middle Manager</h2>
-<ul>
-<li>Execute submitted tasks via Peons</li>
-</ul>
-</section>
-<section id="broker-nodes" class="level2">
-<h2>Broker Nodes</h2>
-<ul>
-<li>Route query to Real-time and Historical nodes</li>
-<li>Merge results</li>
-</ul>
-</section>
-<section id="deep-storage" class="level2">
-<h2>Deep Storage</h2>
-<ul>
-<li>Segments backup (HDFS, S3, ...)</li>
+<li>Overlord (distribute task to the middle manager)</li>
+<li>Middle Manager (execute tasks via Peons)</li>
+</ul></li>
 </ul>
 </section>
 </section>
-<section id="considerations-tools" class="level1">
-<h1>Considerations &amp; Tools</h1>
-<section id="when-not-to-choose-druid" class="level2">
-<h2>When <em>not</em> to choose Druid</h2>
+<section id="when-not-to-choose-druid" class="level1">
+<h1>When <em>not</em> to choose Druid</h1>
 <ul>
 <li>Data is not time-series</li>
 <li>Cardinality is <em>very</em> high</li>
@ -590,21 +538,46 @@ Task 2:                 [              ]
 <li>Setup cost must be avoided</li>
 </ul>
 </section>
-<section id="graphite-metrics" class="level2">
-<h2>Graphite (metrics)</h2>
+<section id="graphite-metrics" class="level1">
+<h1>Graphite (metrics)</h1>
 <p><img src="img/graphite.png" alt="Graphite" />__</p>
 <p><a href="http://graphite.wikidot.com">Graphite</a></p>
 </section>
-<section id="pivot-exploring-data" class="level2">
-<h2>Pivot (exploring data)</h2>
+<section id="pivot-exploring-data" class="level1">
+<h1>Pivot (exploring data)</h1>
 <p><img src="img/pivot.gif" alt="Pivot" /> </p>
 <p><a href="https://github.com/implydata/pivot">Pivot</a></p>
 </section>
-<section id="caravel-exploring-data" class="level2">
-<h2>Caravel (exploring data)</h2>
+<section id="caravel" class="level1">
+<h1>Caravel</h1>
 <p><img src="img/caravel.png" alt="caravel" /> </p>
 <p><a href="https://github.com/airbnb/caravel">Caravel</a></p>
 </section>
+<section id="conclusions" class="level1">
+<h1>Conclusions</h1>
+<section id="precompute-your-time-series" class="level2">
+<h2>Precompute your time series?</h2>
+<p><img src="img/wrong.jpg" alt="You&#39;re doing it wrong" /> </p>
+</section>
+<section id="dont-reinvent-it" class="level2">
+<h2>Don't reinvent it</h2>
+<ul>
+<li>need a user facing API</li>
+<li>need time series on many dimensions</li>
+<li>need real-time</li>
+<li>big volume of data</li>
+</ul>
+</section>
+<section id="druid-way-is-the-right-way" class="level2">
+<h2>Druid way is the right way!</h2>
+<ol type="1">
+<li>Push in kafka</li>
+<li>Add the right dimensions</li>
+<li>Push in druid</li>
+<li>???</li>
+<li>Profit!</li>
+</ol>
+</section>
 </section>
 </div>

--- a/druid/img/caravel.png
+++ b/druid/img/caravel.png
--- a/druid/img/wrong.jpg
+++ b/druid/img/wrong.jpg
--- a/footer.html
+++ b/footer.html
@ -1,3 +1,3 @@
 <div id="footer">
-<a href="yannesposito.com">Y</a>
+<a href="http://yannesposito.com">Y</a>
 </div>
--- a/index.beamer.pdf
+++ b/index.beamer.pdf
--- a/index.html
+++ b/index.html
@ -0,0 +1,36 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  <meta name="generator" content="pandoc">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+  <title></title>
+  <style type="text/css">code{white-space: pre;}</style>
+  <!--[if lt IE 9]>
+    <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
+  <![endif]-->
+  <link rel="stylesheet" href="styling.css">
+</head>
+<body>
+<p>Generated documents:</p>
+<ul>
+<li>Druid:
+<ul>
+<li><a href="druid/druid.html">druid doc</a></li>
+<li><a href="druid/druid.pdf">druid pdf</a></li>
+<li><a href="druid/druid.reveal.html">druid html pres</a></li>
+<li><a href="druid/druid.beamer.pdf">druid pdf pres</a></li>
+</ul></li>
+<li>README:
+<ul>
+<li><a href="README.html">README doc</a></li>
+<li><a href="README.pdf">README pdf</a></li>
+<li><a href="README.reveal.html">README html pres</a></li>
+<li><a href="README.beamer.pdf">README pdf pres</a></li>
+</ul></li>
+</ul>
+<div id="footer">
+<a href="http://yannesposito.com">Y</a>
+</div>
+</body>
+</html>
--- a/index.md
+++ b/index.md
@ -0,0 +1,12 @@
+Generated documents:
+
+- Druid:
+    - [druid doc](druid/druid.html)
+    - [druid pdf](druid/druid.pdf)
+    - [druid html pres](druid/druid.reveal.html)
+    - [druid pdf pres](druid/druid.beamer.pdf)
+- README:
+    - [README doc](README.html)
+    - [README pdf](README.pdf)
+    - [README html pres](README.reveal.html)
+    - [README pdf pres](README.beamer.pdf)
--- a/index.pdf
+++ b/index.pdf
--- a/index.reveal.html
+++ b/index.reveal.html
@ -0,0 +1,91 @@
+<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title></title>
+<meta name="description" content="">
+<meta name="apple-mobile-web-app-capable" content="yes" />
+<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
+<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
+<link rel="stylesheet" href=".reveal.js-3.2.0/css/reveal.css">
+  <link rel="stylesheet" href=".reveal.js-3.2.0/css/theme/default.css" id="theme">
+<!-- For syntax highlighting -->
+  <link rel="stylesheet" href=".reveal.js-3.2.0/lib/css/zenburn.css">
+<!-- If the query includes 'print-pdf', use the PDF print sheet -->
+<script>
+  document.write( '<link rel="stylesheet" href=".reveal.js-3.2.0/css/print/' +
+    ( window.location.search.match( /print-pdf/gi ) ? 'pdf' : 'paper' ) +
+    '.css" type="text/css" media="print">' );
+</script>
+<!--[if lt IE 9]>
+<script src=".reveal.js-3.2.0/lib/js/html5shiv.js"></script>
+<![endif]-->
+</head>
+<body>
+
+<div class="reveal">
+
+<!-- Any section element inside of this container is displayed as a slide -->
+<div class="slides">
+
+<section>
+<h1></h1>
+
+<p>
+<h4></h4>
+</p>
+</section>
+
+
+<p>Generated documents:</p>
+<ul>
+<li>Druid:
+<ul>
+<li><a href="druid/druid.html">druid doc</a></li>
+<li><a href="druid/druid.pdf">druid pdf</a></li>
+<li><a href="druid/druid.reveal.html">druid html pres</a></li>
+<li><a href="druid/druid.beamer.pdf">druid pdf pres</a></li>
+</ul></li>
+<li>README:
+<ul>
+<li><a href="README.html">README doc</a></li>
+<li><a href="README.pdf">README pdf</a></li>
+<li><a href="README.reveal.html">README html pres</a></li>
+<li><a href="README.beamer.pdf">README pdf pres</a></li>
+</ul></li>
+</ul>
+</div>
+
+<script src=".reveal.js-3.2.0/lib/js/head.min.js"></script>
+<script src=".reveal.js-3.2.0/js/reveal.js"></script>
+
+<script>
+  // Full list of configuration options available here:
+  // https://github.com/hakimel/reveal.js#configuration
+  Reveal.initialize({
+    controls: true,
+    progress: true,
+    history: true,
+    center: false,
+
+  // available themes are in /css/theme
+      theme: Reveal.getQueryHash().theme || 'default',
+  
+  // default/cube/page/concave/zoom/linear/fade/none
+      transition: Reveal.getQueryHash().transition || 'linear',
+  
+  // Optional libraries used to extend on reveal.js
+  dependencies: [
+    { src: '/.reveal.js-3.2.0/lib/js/classList.js', condition: function() { return !document.body.classList; } },
+    { src: '/.reveal.js-3.2.0/plugin/markdown/showdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+    { src: '/.reveal.js-3.2.0/plugin/markdown/markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
+    { src: '/.reveal.js-3.2.0/plugin/highlight/highlight.js', async: true, callback: function() { hljs.initHighlightingOnLoad(); } },
+    { src: '/.reveal.js-3.2.0/plugin/zoom-js/zoom.js', async: true, condition: function() { return !!document.body.classList; } },
+    { src: '/.reveal.js-3.2.0/plugin/notes/notes.js', async: true, condition: function() { return !!document.body.classList; } }
+  ]
+  });
+
+</script>
+
+</body>
+</html>