diff --git a/.gitignore b/.gitignore index d775b26..d5f5270 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ build/ compile *.hi *.o +.DS_Store diff --git a/.reveal.js-3.2.0/css/theme/solarized-dark.css b/.reveal.js-3.2.0/css/theme/solarized-dark.css index bd0af83..e45d10d 100644 --- a/.reveal.js-3.2.0/css/theme/solarized-dark.css +++ b/.reveal.js-3.2.0/css/theme/solarized-dark.css @@ -84,7 +84,8 @@ body { .reveal strong, .reveal b { - font-weight: bold; } + font-weight: bold; + color: #b58900; } .reveal em { font-style: italic; } diff --git a/README.beamer.pdf b/README.beamer.pdf index 736c9a5..c098826 100644 Binary files a/README.beamer.pdf and b/README.beamer.pdf differ diff --git a/README.pdf b/README.pdf index bdadc41..e4b740c 100644 Binary files a/README.pdf and b/README.pdf differ diff --git a/druid/druid.beamer.pdf b/druid/druid.beamer.pdf index 7045f65..d218971 100644 Binary files a/druid/druid.beamer.pdf and b/druid/druid.beamer.pdf differ diff --git a/druid/druid.html b/druid/druid.html index 65ad7bb..0ead864 100644 --- a/druid/druid.html +++ b/druid/druid.html @@ -60,52 +60,43 @@
+
timestamp page ... added deleted
-2011-01-01T00:01:35Z Justin Bieber 10 65
-2011-01-01T00:03:63Z Justin Bieber 15 62
-2011-01-01T01:04:51Z Justin Bieber 32 45
-2011-01-01T01:01:00Z Ke$ha 17 87
-2011-01-01T01:02:00Z Ke$ha 43 99
-2011-01-01T02:03:00Z Ke$ha 12 53
-timestamp page ... nb added deleted
-2011-01-01T00:00:00Z Justin Bieber 2 25 127
-2011-01-01T01:00:00Z Justin Bieber 1 32 45
-2011-01-01T01:00:00Z Ke$ha 2 60 186
-2011-01-01T02:00:00Z Ke$ha 1 12 53
-GROUP BY timestamp, page, nb, added, deleted
- :: nb = COUNT(1)
- , added = SUM(added)
- , deleted = SUM(deleted)
-In practice can dramatically reduce the size (up to x100)
-sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0
2011-01-01T01:00:00Z Justin Bieber 1 20 45
-2011-01-01T01:00:00Z Ke$ha 1 30 106
-sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0
2011-01-01T01:00:00Z Justin Bieber 1 12 45
-2011-01-01T01:00:00Z Ke$ha 2 30 80
--
{ "Justin Bieber": 0
-, "Ke$ha": 1
-}
-[ 0
-, 0
-, 1
-, 1
-]
-one for each value of the column
-value="Justin Bieber": [1,1,0,0]
-value="Ke$ha": [0,0,1,1]
-{"USA" 1, "Canada" 2, "Mexico" 3, ...}
"USA" -> [0 1 0 0 1 1 0 0 0]
[2,1,3,15,1,1,2,8,7]
timestamp page ... added deleted
+2011-01-01T00:01:35Z Cthulhu 10 65
+2011-01-01T00:03:63Z Cthulhu 15 62
+2011-01-01T01:04:51Z Cthulhu 32 45
+2011-01-01T01:01:00Z Azatoth 17 87
+2011-01-01T01:02:00Z Azatoth 43 99
+2011-01-01T02:03:00Z Azatoth 12 53
+timestamp page ... nb added deleted
+2011-01-01T00:00:00Z Cthulhu 2 25 127
+2011-01-01T01:00:00Z Cthulhu 1 32 45
+2011-01-01T01:00:00Z Azatoth 2 60 186
+2011-01-01T02:00:00Z Azatoth 1 12 53
+GROUP BY timestamp, page, nb, added, deleted
+ :: nb = COUNT(1)
+ , added = SUM(added)
+ , deleted = SUM(deleted)
+In practice can dramatically reduce the size (up to x100)
+sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0
timestamp page ... nb added deleted
+2011-01-01T01:00:00Z Cthulhu 1 20 45
+2011-01-01T01:00:00Z Azatoth 1 30 106
+sampleData_2011-01-01T01:00:00:00Z_2011-01-01T02:00:00:00Z_v1_0
timestamp page ... nb added deleted
+2011-01-01T01:00:00Z Cthulhu 1 12 45
+2011-01-01T01:00:00Z Azatoth 2 30 80
++
dictionary: { "Cthulhu": 0
+ , "Azatoth": 1 }
+
+column data: [0, 0, 1, 1]
+
+bitmaps (one for each value of the column):
+value="Cthulhu": [1,1,0,0]
+value="Azatoth": [0,0,1,1]
+dictionary: { "Cthulhu": 0
+ , "Azatoth": 1 }
+
+column data: [0, [0,1], 1, 1]
+
+bitmaps (one for each value of the column):
+value="Cthulhu": [1,1,0,0]
+value="Azatoth": [0,1,1,1]
Task 1: [ Interval ][ Window ]
-Task 2: [ ]
---------------------------------------->
- time
-Minimum indexing slots =
- Data Sources × Partitions × Replicas × 2
Task 1: [ Interval ][ Window ]
+Task 2: [ ]
+----------------------------------------------------->
+ time
+topN
over groupBy
timeseries
over topN
TODO
+{"queryType": "groupBy",
+ "dataSource": "druidtest",
+ "granularity": "all",
+ "dimensions": [],
+ "aggregations": [
+ {"type": "count", "name": "rows"},
+ {"type": "longSum", "name": "imps", "fieldName": "impressions"},
+ {"type": "doubleSum", "name": "wp", "fieldName": "wp"}
+ ],
+ "intervals": ["2010-01-01T00:00/2020-01-01T00"]}
+[ {
+ "version" : "v1",
+ "timestamp" : "2010-01-01T00:00:00.000Z",
+ "event" : {
+ "imps" : 5,
+ "wp" : 15000.0,
+ "rows" : 5
+ }
+} ]
groupBy
is disabled on purpose!Metadata Storage
Cache
Manage Segments
-__
--
+
+
+
timestamp page ... added deleted
-2011-01-01T00:01:35Z Justin Bieber 10 65
-2011-01-01T00:03:63Z Justin Bieber 15 62
-2011-01-01T01:04:51Z Justin Bieber 32 45
-2011-01-01T01:01:00Z Ke$ha 17 87
-2011-01-01T01:02:00Z Ke$ha 43 99
-2011-01-01T02:03:00Z Ke$ha 12 53
-timestamp page ... nb added deleted
-2011-01-01T00:00:00Z Justin Bieber 2 25 127
-2011-01-01T01:00:00Z Justin Bieber 1 32 45
-2011-01-01T01:00:00Z Ke$ha 2 60 186
-2011-01-01T02:00:00Z Ke$ha 1 12 53
-GROUP BY timestamp, page, nb, added, deleted
- :: nb = COUNT(1)
- , added = SUM(added)
- , deleted = SUM(deleted)
-In practice can dramatically reduce the size (up to x100)
-{"USA" 1, "Canada" 2, "Mexico" 3, ...}
"USA" -> [0 1 0 0 1 1 0 0 0]
[2,1,3,15,1,1,2,8,7]
timestamp page ... added deleted
+2011-01-01T00:01:35Z Cthulhu 10 65
+2011-01-01T00:03:63Z Cthulhu 15 62
+2011-01-01T01:04:51Z Cthulhu 32 45
+2011-01-01T01:01:00Z Azatoth 17 87
+2011-01-01T01:02:00Z Azatoth 43 99
+2011-01-01T02:03:00Z Azatoth 12 53
+timestamp page ... nb added deleted
+2011-01-01T00:00:00Z Cthulhu 2 25 127
+2011-01-01T01:00:00Z Cthulhu 1 32 45
+2011-01-01T01:00:00Z Azatoth 2 60 186
+2011-01-01T02:00:00Z Azatoth 1 12 53
+GROUP BY timestamp, page, nb, added, deleted
+ :: nb = COUNT(1)
+ , added = SUM(added)
+ , deleted = SUM(deleted)
+In practice can dramatically reduce the size (up to x100)
++
dictionary: { "Cthulhu": 0
+ , "Azatoth": 1 }
+
+column data: [0, 0, 1, 1]
+
+bitmaps (one for each value of the column):
+value="Cthulhu": [1,1,0,0]
+value="Azatoth": [0,0,1,1]
+dictionary: { "Cthulhu": 0
+ , "Azatoth": 1 }
+
+column data: [0, [0,1], 1, 1]
+
+bitmaps (one for each value of the column):
+value="Cthulhu": [1,1,0,0]
+value="Azatoth": [0,1,1,1]
+Task 1: [ Interval ][ Window ]
-Task 2: [ ]
---------------------------------------->
- time
-Minimum indexing slots =
- Data Sources × Partitions × Replicas × 2
Task 1: [ Interval ][ Window ]
+Task 2: [ ]
+----------------------------------------------------->
+ time
topN
over groupBy
timeseries
over topN
TODO
+{"queryType": "groupBy",
+ "dataSource": "druidtest",
+ "granularity": "all",
+ "dimensions": [],
+ "aggregations": [
+ {"type": "count", "name": "rows"},
+ {"type": "longSum", "name": "imps", "fieldName": "impressions"},
+ {"type": "doubleSum", "name": "wp", "fieldName": "wp"}
+ ],
+ "intervals": ["2010-01-01T00:00/2020-01-01T00"]}
+[ {
+ "version" : "v1",
+ "timestamp" : "2010-01-01T00:00:00.000Z",
+ "event" : {
+ "imps" : 5,
+ "wp" : 15000.0,
+ "rows" : 5
+ }
+} ]
groupBy
is disabled on purpose!Metadata Storage
Cache
Manage Segments
-__
+
Generated documents:
+