MongoDB With Style
-
Upload
gabriele-lana -
Category
Technology
-
view
6.151 -
download
0
description
Transcript of MongoDB With Style
Styl
e
query
&d
esig
n
scale
mongo console
$ ~/Work/opt/mongodb-‐1.6.5/bin/mongod \ -‐-‐dbpath=~/Work/src/nosqlday/db/mongodb.01 \ -‐-‐logpath=~/Work/src/nosqlday/log/mongodb.01 \ -‐-‐fork -‐-‐port 30001
$ ~/Work/opt/mongodb-‐1.6.5/bin/mongo localhost:30001MongoDB shell version: 1.6.5connecting to: localhost:30001/test
> use nosqldayswitched to db nosqlday
> db.getCollectionNames()[ "system.indexes", "users" ]
> db.users.find({ "name": "Gabriele" }){ "_id" : ObjectId("4d8706767bb037a8a8f98db2"), "name" : "Gabriele", "surname" : "Lana", "job" : "softwarecraftsman" }
> exitbye
ruby driver
require "mongo"
db = Mongo::Connection.new("localhost", 30001).db("nosqlday")
puts "Collections:"db.collections.each do |collection| puts "\t#{collection.name}"end
puts "Gabriele:"db["users"].find(:name => "Gabriele").each do |user| puts "\t#{user["_id"]}"end
db.connection.close
require "mongo"
db = Mongo::Connection.new("localhost", 30001).db("nosqlday")
puts "Collections:"db.collections.each do |collection| puts "\t#{collection.name}"end
puts "Gabriele:"db["users"].find(:name => "Gabriele").each do |user| puts "\t#{user["_id"]}"end
db.connection.close
ruby driver
$ ruby src/connect.rb Collections: users system.indexesGabriele: 4d8706767bb037a8a8f98db2
Styl
e
query
&d
esig
n
scale
Styl
eStyl
e
know yourdriver
document object mapper
smart driver
mongo
puts "Gabriele:"db["users"].find(:name => "Gabriele").each do |user| puts "\t#{user["_id"]}"end
puts "Gabriele:"db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| puts "\t#{user["_id"]}"end
smart driver
mongo
puts "Gabriele:"db["users"].find(:name => "Gabriele").each do |user| puts "\t#{user["_id"]}"end
puts "Gabriele:"db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| puts "\t#{user["_id"]}"end
smart driver
mongo
$ ruby src/find_vs_select.rb Gabriele: 4d8706767bb037a8a8f98db2Gabriele: 4d8706767bb037a8a8f98db2
puts "Gabriele:"db["users"].find(:name => "Gabriele").each do |user| puts "\t#{user["_id"]}"end
puts "Gabriele:"db["users"].select{|user| user["name"] == "Gabriele"}.each do |user| puts "\t#{user["_id"]}"end
smart driver
mongo
Styl
eStyl
e incremental design
based onapplicationbehavior
the best design is the one where needed data can be easily extracted
the way you need to query your data should influence your design
Styl
eStyl
e incremental design
based onapplicationmonitoring
monitoring and adapting is better than doing it right the first time
...actually the first timeis the worst time :-)
monitoring & adapting
> db.setProfilingLevel(1, 5) { "was" : 1, "slowms" : 100, "ok" : 1 }
// after product usage find problematic queries
> db.system.profile.find().sort({millis:-‐1}) { "ts": "Mon Mar 21 2011 14:30:56 GMT+0100 (CET)", "info": " query pomodorist.pomodori reslen:202 nscanned:26950 query: { $query: { task_id: ObjectId('4d6f1d3931f2386e9c089796') }} nreturned:1 ", "millis":17}
monitoring & adapting
> db.pomodori.find({ $query: { task_id: ObjectId('4d6f1d3931f2386e9c089796') }, $explain: true}) { "cursor": "BasicCursor", "nscanned": 26950, "nscannedObjects": 26950, "n": 1, "millis": 17, "indexBounds": { }, "allPlans": [ { "cursor" : "BasicCursor", "indexBounds" : { } } ]}
monitoring & adapting
> db.pomodori.ensureIndex({"task_id": 1}) > db.pomodori.find({ $query: { task_id: ObjectId('4d6f1d3931f2386e9c089796') }, $explain: true})
{ "cursor": "BtreeCursor task_id_1", "nscanned": 1, "nscannedObjects": 1, "n": 1, "millis": 0, "indexBounds": { "task_id": [ [ ObjectId("4d6f1d3931f2386e9c089796"), ObjectId("4d6f1d3931f2386e9c089796") ] ]}, "allPlans": [...]}
Styl
e
query
&d
esig
n
scale
use $inoperatorfor batch
query
query
&d
esig
n
retrieve all objects with $in
users = [{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}]
ids = users.map{|user| db["users"].insert(user)}
puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in
users = [{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}]
ids = users.map{|user| db["users"].insert(user)}
puts ids.map{|id| db["users"].find_one(:_id => id)}
$ ruby src/find_by_all_ids.rb {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'), ...}{"_id"=>BSON::ObjectId('4d87605731f23824a0000002'), ...}{"_id"=>BSON::ObjectId('4d87605731f23824a0000003'), ...}
retrieve all objects with $in
users = [{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}]
ids = users.map{|user| db["users"].insert(user)}
puts ids.map{|id| db["users"].find_one(:_id => id)}
retrieve all objects with $in
users = [{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}]
ids = users.map{|user| db["users"].insert(user)}ids = db["users"].insert(users)
puts ids.map{|id| db["users"].find_one(:_id => id)}puts db["users"].find(:_id => {:$in => ids}).all
retrieve all objects with $in
users = [{:name => "Gabriele", :surname => "Lana", :job => "softwarecraftsman"},{:name => "Federico", :surname => "Galassi", :job => "softwarecraftsman"},{:name => "Giordano", :surname => "Scalzo", :job => "softwarecraftsman"}]
ids = users.map{|user| db["users"].insert(user)}ids = db["users"].insert(users)
puts ids.map{|id| db["users"].find_one(:_id => id)}puts db["users"].find(:_id => {:$in => ids}).all
$ ruby src/find_by_all_ids.rb {"_id"=>BSON::ObjectId('4d87605731f23824a0000001'), ...}{"_id"=>BSON::ObjectId('4d87605731f23824a0000002'), ...}{"_id"=>BSON::ObjectId('4d87605731f23824a0000003'), ...}
use conventions to
build smartobject
identifiers
query
&d
esig
n
conventions are fun to play with
> db.user_scores.find({}, {"_id": 1})
{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106" }{ "_id" : "4d873ce631f238241d00000d-‐week-‐200944" }{ "_id" : "4d873ce631f238241d00000d-‐month-‐200911" }{ "_id" : "4d873ce631f238241d00000d-‐year-‐2009" }{ "_id" : "4d873ce631f238241d00000d-‐user" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐advertising" }{ "_id" : "4d873ce631f238241d00000d-‐week-‐200944-‐advertising" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐art" }{ "_id" : "4d873ce631f238241d00000d-‐week-‐200944-‐art" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐artist" }{ "_id" : "4d873ce631f238241d00000d-‐week-‐200944-‐artist" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐information" }
conventions are fun to play with
> db.user_scores.findOne( {"_id": "4d873ce631f238241d00000d-‐day-‐20091106"} )
{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106", "pomodori" : 15, "pomodori_squashed" : 3, "breaks" : 7, "tasks_created" : 8, "tasks_done" : 6, "estimation_accuracy" : 0, "seconds_of_focused_time" : 22500, "seconds_of_wasted_time" : 1999, "seconds_of_breaks" : 8820}
conventions are fun to play with(user scores in day per tag)
> db.user_scores.find( {"_id": /^4d873ce631f238241d00000d-‐day-‐20091106-‐/}, {"_id": 1} ) { "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐advertising" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐art" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐artist" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐blogging" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐culture" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐html" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐illustration" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐information" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐inspiration" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐marketing" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐movies" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐resources" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐technology" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐tool" }{ "_id" : "4d873ce631f238241d00000d-‐day-‐20091106-‐tutorials" }
conventions are fun to play with(list of tags per day)
> db.user_scores.find( {"_id": /^4d873ce631f238241d00000d-‐day-‐20091106-‐/}, {"_id": 1}
).map(function(document) { return document._id.replace( "4d873ce631f238241d00000d-‐day-‐20091106-‐", "" ) })
[ "advertising", "art", "artist", "blogging", "culture", "html", "illustration", "information", ...]
conventions are fun to play with(anchored regexp uses indexes)
> db.user_scores.find( {"_id": /^4d873ce631f238241d00000d-‐day-‐20091106-‐/}, {"_id": 1} ).explain() { "cursor" : "BtreeCursor _id_ multi", "nscanned" : 15, "nscannedObjects" : 15, "n" : 15, "millis" : 0, "indexBounds" : { "_id" : [ [ "4d873ce631f238241d00000d-‐day-‐20091106-‐", "4d873ce631f238241d00000d-‐day-‐20091106." ], [ /^4d873ce631f238241d00000d-‐day-‐20091106-‐/, /^4d873ce631f238241d00000d-‐day-‐20091106-‐/ ] ]
conventions are fun to play with(anchored regexp uses indexes)
> db.user_scores.find( {"_id": /4d873ce631f238241d00000d-‐day-‐20091106-‐/}, {"_id": 1} ).explain()
{ "cursor" : "BtreeCursor _id_ multi", "nscanned" : 109349, "nscannedObjects" : 15, "n" : 15, "millis" : 217, "indexBounds" : { "_id" : [ ... ] }}
use “group”method todo small
computationswithoutfetchingrelated
documentsquery
&d
esig
n
group to compute data in mongo(inject client side)
days = [ 20091110, 20091111, 20091112 ]scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}
scores = db["user_scores"].find(:_id => scores_id)
pomodori = scores.inject(0) do |pomodori, scores| pomodori + scores["pomodori"]end
puts "Pomodori in days #{days.join(",")}: #{pomodori}"
group to compute data in mongo(inject client side)
days = [ 20091110, 20091111, 20091112 ]scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}
scores = db["user_scores"].find(:_id => scores_id)
pomodori = scores.inject(0) do |pomodori, scores| pomodori + scores["pomodori"]end
puts "Pomodori in days #{days.join(",")}: #{pomodori}"
$ ruby src/inject_for_reduce.rb Pomodori in days 20091110,20091111,20091112: 36
group to compute data in mongo(group server side)
days = [ 20091110, 20091111, 20091112 ]scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}
result = db["user_scores"].group( :cond => { :_id => scores_id }, :initial => { :pomodori => 0 }, :reduce => <<-EOF function(document, result) { result.pomodori += document.pomodori } EOF)
puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
group to compute data in mongo(group server side)
days = [ 20091110, 20091111, 20091112 ]scores_id = %r{^4d87d00931f2380c7700000d-day-(#{days.join("|")})$}
result = db["user_scores"].group( :cond => { :_id => scores_id }, :initial => { :pomodori => 0 }, :reduce => <<-EOF function(document, result) { result.pomodori += document.pomodori } EOF)
puts "Pomodori in days #{days.join(",")}: #{result.first["pomodori"]}"
$ ruby src/group_for_reduce.rb Pomodori in days 20091110,20091111,20091112: 36
group to compute data in mongo(ex. sum pomodori by tag “ruby”)
result = db["user_scores"].group( :cond => { :_id => /^4d87d00931f2380c7700000d-day-\d{8}-ruby$/ }, :initial => { :pomodori => 0, :days => 0 }, :reduce => <<-EOF function(document, result) { result.days += 1 result.pomodori += document.pomodori } EOF).first
puts "In #{result["days"]} days, #{result["pomodori"]} done for ruby"
group to compute data in mongo(ex. sum pomodori by tag “ruby”)
result = db["user_scores"].group( :cond => { :_id => /^4d87d00931f2380c7700000d-day-\d{8}-ruby$/ }, :initial => { :pomodori => 0, :days => 0 }, :reduce => <<-EOF function(document, result) { result.days += 1 result.pomodori += document.pomodori } EOF).first
puts "In #{result["days"]} days, #{result["pomodori"]} pomodori"
$ ruby src/group_for_ruby_tag.rb In 43 days, 45 pomodori
group to compute data in mongo(ex. sum pomodori by tag “ruby”)
> db.user_scores.find({ "_id": /^4d87d00931f2380c7700000d-‐day-‐\d{8}-‐ruby$/ }).explain()
{ "cursor" : "BtreeCursor _id_ multi", "nscanned" : 43, "nscannedObjects" : 43, "n" : 43, "millis" : 3, "indexBounds" : { "_id" : [...] }}
create indexes on arrays to create local
reverse indexes in documents
query
&d
esig
n
reverse index in place(an array could be indexed)
> db.tasks.find({ "tags": { $in: [ "nosqlday" ] } }) { "_id" : ObjectId("4d7de446175ca8243d000004"), "tags" : [ "nosqlday" ], "description" : "#nosqlday keynote", "is_recurrent" : false, "estimated" : 0, "worked_in" : [ "Mon Mar 14 2011 00:00:00 GMT+0100 (CET)", "Tue Mar 15 2011 00:00:00 GMT+0100 (CET)" ], "done_at" : "Tue Mar 15 2011 13:05:03 GMT+0100 (CET)", "todo_at" : null, "created_at" : "Mon Mar 14 2011 10:47:50 GMT+0100 (CET)", "updated_at" : "Tue Mar 15 2011 13:05:03 GMT+0100 (CET)", "keywords": [ "nosqldai", "keynot" ], "user_id": ObjectId("4d53996c137ce423ff000001"), "annotations" : [ ]}
reverse index in place(an array could be indexed)
> db.tasks.getIndexes()[ { "name" : "_id_", "ns" : "app435386.tasks", "key" : { "_id" : 1 } }, { "name" : "tags_1", "ns" : "app435386.tasks", "key" : { "tags" : 1 }, "unique" : false }, ...]
reverse index in place(container for deduced data, array)
db["orders"].insert({ :placed_at => [ now.strftime("%Y"), # year: "2011" now.strftime("%Y%m"), # month: "201103" now.strftime("%Yw%U"), # week: "2011w11" now.strftime("%Y%m%d") # day: "20110316" ], :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]}})
# ...
db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
> db.orders.findOne()
{ "_id" : ObjectId("4d88bf1f31f23812de0003fd"), "placed_at" : [ "2011", "201103", "2011w11", "20110316" ], "user_id" : ObjectId("4d88bf1f31f23812de0003e9"), "items" : [ ObjectId("4d88bf1f31f23812de0003da"), ObjectId("4d88bf1f31f23812de000047"), ObjectId("4d88bf1f31f23812de000078"), ObjectId("4d88bf1f31f23812de000068"), ObjectId("4d88bf1f31f23812de000288") ], "total" : 3502}
reverse index in place(container for deduced data, array)
> db.orders.find({ "placed_at": "20110310" }).count()77
> db.orders.find({ "placed_at": "20110310" }).explain(){ "cursor" : "BtreeCursor placed_at_-‐1", "nscanned" : 77, "nscannedObjects" : 77, "n" : 77, "millis" : 0, "indexBounds" : { "placed_at" : [ [ "20110310", "20110310" ] ] }}
reverse index in place(container for deduced data, array)
reverse index in place(container for deduced data, hash)
db["orders"].insert({ :placed_at => [ { :year => now.strftime("%Y") }, { :month => now.strftime("%Y%m") }, { :week => now.strftime("%Y%U") }, { :day => now.strftime("%Y%m%d") } ], :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} })
# ...
db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
> db.orders.findOne() { "_id" : ObjectId("4d88c31531f23812fe0003ea"), "placed_at" : [ { "year" : "2009" }, { "month" : "200911" }, { "week" : "200945" }, { "day" : "20091109" } ], "user_id" : ObjectId("4d88c31531f23812fe0003e9"), "items" : [ ObjectId("4d88c31531f23812fe00013f"), ObjectId("4d88c31531f23812fe000176"), ObjectId("4d88c31531f23812fe0003e2"), ObjectId("4d88c31531f23812fe0003d1"), ObjectId("4d88c31531f23812fe0001c1"), ObjectId("4d88c31531f23812fe000118"), ObjectId("4d88c31531f23812fe00031d") ], "total" : 10149}
reverse index in place(container for deduced data, hash)
> db.orders.find({ "placed_at.week": "201101" }).count() 331
> db.orders.find({ "placed_at.week": "201101" }).explain() { "cursor" : "BasicCursor", "nscanned" : 22374, "nscannedObjects" : 22374, "n" : 331, "millis" : 23, "indexBounds" : { }}
reverse index in place(container for deduced data, hash)
> db.orders.find({ "placed_at": { "week": "201101" }}).count() 331
> db.orders.find({ "placed_at": { "week": "201101" }}).explain(){ "cursor" : "BtreeCursor placed_at_-‐1", "nscanned" : 331, "nscannedObjects" : 331, "n" : 331, "millis" : 0, "indexBounds" : { "placed_at" : [ [ { "week" : "2011w01" }, { "week" : "2011w01" } ] ] }}
reverse index in place(container for deduced data, hash)
use dates butbe aware of
some pitfalls
query
&d
esig
n
db["orders"].insert({ :placed_at => now, :user_id => user, :items => items_in_order.map{|item| item[:id]}, :total => items_in_order.inject(0){|total,item| total += item[:price]} })
# ...
db["orders"].ensure_index([["placed_at", Mongo::DESCENDING]])
plain dates are good too
> db.orders.findOne() { "_id" : ObjectId("4d88d1f931f23813a10003ea"), "placed_at" : "Mon Nov 09 2009 08:00:00 GMT+0100 (CET)", "user_id" : ObjectId("4d88d1f931f23813a10003e9"), "items" : [ ObjectId("4d88d1f931f23813a100016d"), ObjectId("4d88d1f931f23813a1000346"), ObjectId("4d88d1f931f23813a10001e7"), ObjectId("4d88d1f931f23813a10000db"), ObjectId("4d88d1f931f23813a1000091"), ObjectId("4d88d1f931f23813a10001c1"), ObjectId("4d88d1f931f23813a10001d3"), ObjectId("4d88d1f931f23813a100031b"), ObjectId("4d88d1f931f23813a1000130") ], "total" : 5871}
plain dates are good too
> db.orders.find({ "placed_at": { $gte: new Date(2011,2,10), $lt: new Date(2011,2,11) } }).explain()
{ "cursor" : "BtreeCursor placed_at_-‐1", "nscanned" : 53, "nscannedObjects" : 53, "n" : 53, "millis" : 0, "indexBounds" : { "placed_at" : [ [ "Fri Mar 11 2011 00:00:00 GMT+0100 (CET)", "Thu Mar 10 2011 00:00:00 GMT+0100 (CET)" ] ] }
plain dates are good too
# find all mondays of the yearnow = Time.now.beginning_of_year
now += 1.day until now.monday?mondays = [ now ]mondays << now += 7.days while now.year == Time.now.year
# find all orders placed on mondaysquery = { :$or => mondays.map do |day| { :placed_at => { :$gte => day.beginning_of_day, :$lte => day.end_of_day } } end}
puts query
plain dates are good too, but...(total sold on this year’s mondays)
# find all mondays of the yearnow = Time.now.beginning_of_year
now += 1.day until now.monday?mondays = [ now ]mondays << now += 7.days while now.year == Time.now.year
# find all orders placed on mondaysquery = { :$or => mondays.map do |day| { :placed_at => { :$gte => day.beginning_of_day, :$lte => day.end_of_day } } end}
puts query
$ ruby src/orders_on_mondays.rb
{:$or=>[ {:placed_at=>{ :$gte=>2011-‐01-‐03 00:00:00 +0100, :$lte=>2011-‐01-‐03 23:59:59 +0100 }}, {:placed_at=>{ :$gte=>2011-‐01-‐10 00:00:00 +0100, :$lte=>2011-‐01-‐10 23:59:59 +0100 }}, {:placed_at=>{ :$gte=>2011-‐01-‐17 00:00:00 +0100, :$lte=>2011-‐01-‐17 23:59:59 +0100 }}, ...]}
plain dates are good too, but...(total sold on this year’s mondays)
db["orders"].find({ :$or => mondays.map do |day| { :placed_at => { :$gte => day.beginning_of_day, :$lte => day.end_of_day } } end})
plain dates are good too, but...(it works but it’s too slooow)
> db.orders.find({ $or: [ "placed_at":{ $gte: new Date(2011,2,3), $lt: new Date(2011,2,4) }, "placed_at":{ $gte: new Date(2011,2,10), $lt: new Date(2011,2,11) } ] }).explain()
{ "clauses" : [{ "cursor" : "BtreeCursor placed_at_-‐1", "indexBounds" : { "placed_at" : [[ "Tue Mar 3 2011 00:00:00 GMT+0100 (CET)", "Wed Mar 4 2011 00:00:00 GMT+0100 (CET)" ]]} }, { "cursor" : "BtreeCursor placed_at_-‐1", "indexBounds" : { "placed_at" : [[ "Tue Mar 10 2011 00:00:00 GMT+0100 (CET)", "Wed Mar 11 2011 00:00:00 GMT+0100 (CET)"
plain dates are good too, but...(why it’s too slow)
> db.orders.findOne()
{ "_id" : ObjectId("4d88bf1f31f23812de0003fd"), "placed_at" : [ "2011", "201103", "2011w11", "20110316" ], "user_id" : ObjectId("4d88bf1f31f23812de0003e9"), "items" : [ ObjectId("4d88bf1f31f23812de0003da"), ObjectId("4d88bf1f31f23812de000047"), ObjectId("4d88bf1f31f23812de000078"), ObjectId("4d88bf1f31f23812de000068"), ObjectId("4d88bf1f31f23812de000288") ], "total" : 3502}
with destructured dates(total sold on mondays this year)
now = Time.now.beginning_of_year
now += 1.day until now.monday?mondays = [ now ]mondays << now += 7.days while now.year == Time.now.year
orders = db["orders"].find({ :placed_at => { :$in => mondays.map {|day| day.strftime("%Y%m%d")} }})
puts orders.explain
with destructured dates(total sold on mondays this year)
with destructured dates(total sold on mondays this year)
now = Time.now.beginning_of_year
now += 1.day until now.monday?mondays = [ now ]mondays << now += 7.days while now.year == Time.now.year
orders = db["orders"].find({ :placed_at => { :$in => mondays.map {|day| day.strftime("%Y%m%d")} }})
puts orders.explain
$ ruby src/orders_on_mondays.rb
{ "cursor"=>"BtreeCursor placed_at_-‐1 multi", "nscanned"=>744, "nscannedObjects"=>744, "n"=>744, "millis"=>1, "indexBounds"=>{ "placed_at"=>[ ["20120102", "20120102"], ["20111226", "20111226"], ["20111219", "20111219"], ["20111212", "20111212"], ["20111205", "20111205"], ["20111128", "20111128"], ["20111121", "20111121"], ... ] }}
full query power with
$whereoperator
query
&d
esig
n
pomodori(find who is ticking)
> db.pomodori.findOne(){ "_id" : ObjectId("4d8916ed31f2381480000021"), "duration" : 1500, "interruptions" : 0, "after_break_of" : 0, "started_at" : "Mon Mar 14 2011 08:05:00 GMT+0100 (CET)", "squashed_at" : "Mon Mar 14 2011 08:07:31 GMT+0100 (CET)", "in_day" : { "position" : 1, "is_last" : false }, "task_id" : ObjectId("4d8916ec31f2381480000014"), "user_id" : ObjectId("4d8916ec31f2381480000010"), "annotations" : [ ]}
now = Time.now.yesterday.beginning_of_day + 10.hourstimestamp_of_now = now.to_i
ticking = db["pomodori"].find( :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF)
puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori(find who is ticking)
pomodori(find who is ticking)
now = Time.now.yesterday.beginning_of_day + 10.hourstimestamp_of_now = now.to_i
ticking = db["pomodori"].find( :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF)
puts ticking.map{|pomodoro| pomodoro["_id"]}
$ ruby src/find_who_is_ticking.rb 4d8916ef31f238148000011d4d8916f231f23814800002714d8916f931f23814800004dd4d8916f931f23814800004e0
now = Time.now.yesterday.beginning_of_day + 10.hourstimestamp_of_now = now.to_iuser_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010")
ticking = db["pomodori"].find( :user_id => user_id, :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF)
puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori(find who is ticking for an user)
now = Time.now.yesterday.beginning_of_day + 10.hourstimestamp_of_now = now.to_iuser_id = BSON::ObjectId.from_string("4d8916ec31f2381480000010")
ticking = db["pomodori"].find( :user_id => user_id, :$where => <<-EOF var startedAt = this.started_at.getTime()/1000 return ((startedAt + this.duration) > #{timestamp_of_now}) && (startedAt < #{timestamp_of_now}) EOF)
puts ticking.map{|pomodoro| pomodoro["_id"]}
pomodori(find who is ticking for an user)
$ ruby src/find_who_is_ticking_for_an_user.rb 4d8916ef31f238148000011d
related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF)
puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
pomodori(related to tasks tagged with “maps”)
related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF)
puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
pomodori(related to tasks tagged with “maps”)
$ ruby src/related_to_maps.rb 4d8916fa31f23814800005794d8916fa31f238148000057b4d8916fa31f238148000057d4d8916fa31f2381480000580
related_to_maps = db["pomodori"].find( :$where => <<-EOF db.tasks.findOne({ "_id": this.task_id }).tags.indexOf("maps") >= 0 EOF)
puts related_to_maps.explain
pomodori(don’t be carried away :-))
$ ruby src/related_to_maps.rb { "cursor"=>"BasicCursor", "nscanned"=>461, "nscannedObjects"=>461, "n"=>4, "millis"=>52, "indexBounds"=>{}, "allPlans"=>[...]}
related_to_maps = db["pomodori"].find(:task_id => { :$in => db["tasks"].find( {:tags => "maps"}, :fields => {:_id => 1} ).map{|task| task["_id"]}}) puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
pomodori(related to... a better solution)
$ ruby src/related_to_maps.rb 4d8916fa31f23814800005794d8916fa31f238148000057b4d8916fa31f238148000057d4d8916fa31f2381480000580
related_to_maps = db["pomodori"].find(:task_id => { :$in => db["tasks"].find( {:tags => "maps"}, :fields => {:_id => 1} ).map{|task| task["_id"]}}) puts related_to_maps.map{|pomodoro| pomodoro["_id"]}
$ ruby src/related_to_maps.rb { "cursor"=>"BtreeCursor tags_1", "nscanned"=>3, "nscannedObjects"=>3, "n"=>3, "millis"=>0, ...}
{ "cursor"=>"BtreeCursor task_id_1 multi", "nscanned"=>4, "nscannedObjects"=>4, "n"=>4, "millis"=>0, ...}
pomodori(related to... a better solution)
real time analytics with increments
query
&d
esig
n
result = db["visits"].update( { :_id => Digest::MD5.hexdigest(url) }, { :$inc => { :hits => 1 } }, :upsert => true, :safe => true)
puts "Update: #{result.inspect}"
puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
keep track of url’s visits(upsert with custom id)
keep track of url’s visits(upsert with custom id)
result = db["visits"].update( { :_id => Digest::MD5.hexdigest(url) }, { :$inc => { :hits => 1 } }, :upsert => true, :safe => true)
puts "Update: #{result.inspect}"
puts db["visits"].find_one(:_id => Digest::MD5.hexdigest(url))
$ ruby src/realtime_analytics.rb Update: { "err"=>nil, "updatedExisting"=>false, "n"=>1, "ok"=>1.0}{"_id"=>"2d86a774beffe90e715a8028c7bd177b", "hits"=>1}
$ ruby src/realtime_analytics.rb Update: { "err"=>nil, "updatedExisting"=>true, "n"=>1, "ok"=>1.0}{"_id"=>"2d86a774beffe90e715a8028c7bd177b", "hits"=>2}
url_digest = Digest::MD5.hexdigest(url)ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-")]puts "Expect to upsert: \n#{ids}"
result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true)puts result.inspectputs db["visits"].all
url’s visits aggregated by time(upsert with multiple documents)
url_digest = Digest::MD5.hexdigest(url)ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-")]puts "Expect to upsert: \n#{ids}"
result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true)puts result.inspectputs db["visits"].all
$ ruby src/realtime_analytics_with_aggregation.rb Expect to upsert:[ "2d86a774beffe90e715a8028c7bd177b-‐20110323", "2d86a774beffe90e715a8028c7bd177b-‐201103", "2d86a774beffe90e715a8028c7bd177b-‐2011", "2d86a774beffe90e715a8028c7bd177b-‐4d899fab31f238165c000001"]
{ "err"=>nil, "updatedExisting"=>false, "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'), "n"=>1, "ok"=>1.0}
{"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'), "hits"=>1}
url’s visits aggregated by time(upsert with multiple documents)
url_digest = Digest::MD5.hexdigest(url)ids = [ [ url_digest, Time.now.strftime("%Y%m%d") ].join("-"), [ url_digest, Time.now.strftime("%Y%m") ].join("-"), [ url_digest, Time.now.strftime("%Y") ].join("-"), [ url_digest, user_id ].join("-")]puts "Expect to upsert: \n#{ids}"
result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true)puts result.inspectputs db["visits"].all
$ ruby src/realtime_analytics_with_aggregation.rb Expect to upsert:[ "2d86a774beffe90e715a8028c7bd177b-‐20110323", "2d86a774beffe90e715a8028c7bd177b-‐201103", "2d86a774beffe90e715a8028c7bd177b-‐2011", "2d86a774beffe90e715a8028c7bd177b-‐4d899fab31f238165c000001"]
{ "err"=>nil, "updatedExisting"=>false, "upserted"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'), "n"=>1, "ok"=>1.0}
{"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76d'), "hits"=>1}{"_id"=>BSON::ObjectId('4d899fabe23bd37e768ae76e'), "hits"=>1}
url’s visits aggregated by time(upsert with multiple documents)
result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true)
if result["n"] != ids.size updated_ids = db["visits"].find( { :_id => { :$in => ids } }, :fields => { :_id => true } ).map{|document| document["_id"]}
db["visits"].insert((ids - updated_ids).map do |id| { :_id => id, :hits => 1 } end)
db["visits"].remove(:_id => result["upserted"]) if result["upserted"]end
url’s visits aggregated by time(look before you leap)
result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true)
if result["n"] != ids.size updated_ids = db["visits"].find( { :_id => { :$in => ids } }, :fields => { :_id => true } ).map{|document| document["_id"]}
db["visits"].insert((ids - updated_ids).map do |id| { :_id => id, :hits => 1 } end)
db["visits"].remove(:_id => result["upserted"]) if result["upserted"]end
$ ruby src/realtime_analytics_with_aggregation.rb{ "err"=>nil, "updatedExisting"=>false, "upserted"=>BSON::ObjectId('4d89a5ebe23bd37e768ae76f'), "n"=>1, "ok"=>1.0
}
{"_id"=>"<url_digest>-‐20110323", "hits"=>1}{"_id"=>"<url_digest>-‐201103", "hits"=>1}{"_id"=>"<url_digest>-‐2011", "hits"=>1}{"_id"=>"<url_digest>-‐4d89a43b31f238167a000001", "hits"=>1}
url’s visits aggregated by time(look before you leap)
result = db["visits"].update( { :_id => { :$in => ids } }, { :$inc => { :hits => 1 } }, :multi => true, :upsert => true, :safe => true)
if result["n"] != ids.size updated_ids = db["visits"].find( { :_id => { :$in => ids } }, :fields => { :_id => true } ).map{|document| document["_id"]}
db["visits"].insert((ids - updated_ids).map do |id| { :_id => id, :hits => 1 } end)
db["visits"].remove(:_id => result["upserted"]) if result["upserted"]end
$ ruby src/realtime_analytics_with_aggregation.rb{ "err"=>nil, "updatedExisting"=>true, "n"=>3, "ok"=>1.0}
{"_id"=>"<url_digest>-‐20110323", "hits"=>2}{"_id"=>"<url_digest>-‐201103", "hits"=>2}{"_id"=>"<url_digest>-‐2011", "hits"=>2}{"_id"=>"<url_digest>-‐4d89a43b31f238167a000001", "hits"=>1}{"_id"=>"<url_digest>-‐4d89a44231f238167e000001", "hits"=>1}
url’s visits aggregated by time(look before you leap)
incrementalmap/reduce
query
&d
esig
n
map/reduce hits per day(we have raw events)
> db.visit_events.findOne(){ "_id" : ObjectId("4d89fc6531f2381d2c00000b"), "url" : "8aa8b68e0b849f70df6dbb3031c6182b", "user_id" : ObjectId("4d89fc6531f2381d2c000005"), "at" : "Thu Jan 13 2011 08:00:06 GMT+0100 (CET)"}
def generate_events(visits, db, now) visits.times do |time| now += BETWEEN_VISITS.sample.seconds db["visit_events"].insert( :url => Digest::MD5.hexdigest(URLS.sample), :user_id => USERS.sample[:id], :at => now ) endend
generate_events(10_000, db, now)
map/reduce hits per day(generate data WITH something like)
MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) }EOF
REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } }EOF
result = db["visit_events"].map_reduce( MAP, REDUCE, :out => "visits", :raw => true, :verbose => true)
puts result.inspect
map/reduce hits per day(simple map/reduce)
MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) }EOF
REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } }EOF
result = db["visit_events"].map_reduce( MAP, REDUCE, :out => "visits", :raw => true, :verbose => true)
puts result.inspect
map/reduce hits per day(date.prototype.format don’t exists)
MAP = <<-EOF function() { Date.prototype.format = function(format) { ... } emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) }EOF
REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } }EOF
map/reduce hits per day(implement format in place)
MAP = <<-EOF function() { if (!Date.prototype.format) { Date.prototype.format = function(format) { ... } } emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) }EOF
REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } }EOF
map/reduce hits per day(implement format only if needed)
db[Mongo::DB::SYSTEM_JS_COLLECTION].save( :_id => "formatDate", :value => BSON::Code.new( <<-EOF function(date, format) { if (!Date.prototype.format) { Date.prototype.format = function(format) { ... } } return date.format(format) } EOF ))
MAP = <<-EOF function() { emit([ this.url, formatDate(this.at, "Ymd") ].join("-"), {"hits":1}) }EOF
map/reduce hits per day(implement format once and for all)
db[Mongo::DB::SYSTEM_JS_COLLECTION].save( :_id => "load", :value => BSON::Code.new( <<-EOF function(module) { if ((module === "date") && !Date.prototype.format) { Date.prototype.format = function(format) { ... } } return true } EOF ))
MAP = <<-EOF function() { load("date") && emit( [ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 } ) }EOF
map/reduce hits per day(implement format once and for all)
MAP = <<-EOF function() { emit([ this.url, this.at.format("Ymd") ].join("-"), { "hits": 1 }) }EOF
REDUCE = <<-EOF function(key, values) { var hits = 0 for(var index in values) hits += values[index]["hits"] return { "hits": hits } }EOF
result = db["visit_events"].map_reduce( MAP, REDUCE, :out => "visits", :raw => true, :verbose => true)
puts result.inspect
map/reduce hits per day(ok, but could be taking too long)
$ ruby src/incremental_mr.rb { "result"=>"visits", "timeMillis"=>4197, "timing"=> { "mapTime"=>3932, "emitLoop"=>4170, "total"=>4197 }, "counts"=> { "input"=>10000, "emit"=>10000, "output"=>200 }, "ok"=>1.0}
> db.visits.find()
{ "_id" : "019640ff7952425b1b8695605459d223-‐20110316", "value" : { "hits" : 47 }}
{ "_id" : "019640ff7952425b1b8695605459d223-‐20110317", "value" : { "hits" : 49 }}
{ "_id" : "019640ff7952425b1b8695605459d223-‐20110318", "value" : { "hits" : 59 } }
{ "_id" : "019640ff7952425b1b8695605459d223-‐20110319", "value" : { "hits" : 37 } }
map/reduce hits per day(ok, every time we need to start over)
map/reduce hits per day(incremental with savepoints)
temporarycollection
visit-elementscollection
visitcollection
map/reduceon last changed
documentsupsert
map/reduce hits per day(incremental with savepoints)
db.create_collection("visit_events", :capped => true, :max => 50_000, :size => 5_000_000)
temporarycollection
visit-elementscollection
map/reduceon last changed
documents
FINALIZE = <<-EOF function(key, value) { db.visits.update( { "_id": key }, { $inc: { "hits": value.hits } }, true ) }EOF
map/reduce hits per day(incremental with savepoints)
temporarycollection
visitcollection
upsert
generate_events(number_of_events, db, now)
from = from_last_updated(db)to = to_last_inserted(db)
result = db["visit_events"].map_reduce( MAP, REDUCE, :finalize => FINALIZE, :query => { :_id => { :$gt => from, :$lte => to } }, :raw => true, :verbose => true)
db["visits"].save(:_id => "savepoint", :at => to)
map/reduce hits per day(incremental with savepoints)
generate_events(number_of_events, db, now)
from = from_last_updated(db)to = to_last_inserted(db)
result = db["visit_events"].map_reduce( MAP, REDUCE, :finalize => FINALIZE, :query => { :_id => { :$gt => from, :$lte => to } }, :raw => true, :verbose => true)
db["visits"].save(:_id => "savepoint", :at => to)
map/reduce hits per day(incremental with savepoints)
$ ruby src/incremental_mr.rb -‐e 10000 { "result"=>"tmp.mr.mapreduce_1300892393_60", "timeMillis"=>4333, "timing"=>{...}, "counts"=>{ "input"=>10000, "emit"=>10000, "output"=>196 }, "ok"=>1.0}
{ "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-‐20110113", "hits"=>26}
generate_events(number_of_events, db, now)
from = from_last_updated(db)to = to_last_inserted(db)
result = db["visit_events"].map_reduce( MAP, REDUCE, :finalize => FINALIZE, :query => { :_id => { :$gt => from, :$lte => to } }, :raw => true, :verbose => true)
db["visits"].save(:_id => "savepoint", :at => to)
map/reduce hits per day(incremental with savepoints)
$ ruby src/incremental_mr.rb -‐e 4999 { "result"=>"tmp.mr.mapreduce_1300892399_61", "timeMillis"=>2159, "timing"=>{...}, "counts"=>{ "input"=>4999, "emit"=>4999, "output"=>146 }, "ok"=>1.0}
{ "_id"=>"05241f07d0e3ab6a227e67b33ea0b509-‐20110113", "hits"=>64}
def savepoint(db) db["visits"].find_one(:_id => "savepoint") or { "at" => BSON::ObjectId.from_time(10.years.ago) }end
def from_last_updated(db) savepoint["at"]end
def to_last_inserted(db) db["visit_events"].find.sort([:_id, Mongo::DESCENDING]).first["_id"]end
map/reduce hits per day(incremental with savepoints)
externalmap/reduce
query
&d
esig
n
master slave
replicate data
use an external mongod processto execute map/reduce jobs
master slave
map/reduceon last
replicateddata
use an external mongod processto execute map/reduce jobs
master slave
push back results
use an external mongod processto execute map/reduce jobs
look at the shell sourceis more powerful than you think
documents
embeddedor
linked?
query
&d
esig
n
life cycle:when root document
is deleted, he can stand for himself?
if yesembedded
if nolinked
if yesembedded
if nolinked
are always fetched together?
if yesembedded
if nolinked
his attributes are used to find the root
document?
if yesembedded
if nolinked
he’s small?
if yesembedded
if nolinked
he’s unique or there are less then
hundreds?
Styl
e
query
&d
esig
n
scale
distributedreads with
replicasets
scale
master
slave
slave
read/write
read
read
replicate
replicate
+ Durability+ fault tolerance
(seems stupid but...)
pumpyour
hardwarescale
(seems stupid but...)
call 10gensure they can
help :-)scale