MongoDB to assist with recommendations -
i have 3 collection schema shown below:
user collection has information regarding friends , listening count(weight) per artist
{ user_id : 1, friends : [3,5,6], artists : [ {artist_id: 10 , weight : 345}, {artist_id: 17 , weight : 378} ] }
artist collection schema has information regarding name of artist, tags given various users them.
{ artistid : 56, name : "ed sheeran", user_tag : [ {user_id : 2, tag_id : 6}, {user_id : 2, tag_id : 5}, {user_id : 3, tag_id : 7} ] }
tags collection having information various tags.
{tag_id : 3, tag_value : "hiphop"}
i want provide user recommendations artists using below rules:
rule 1 : find artists listened user's friends not user, order them sum of friends' listening counts.
rule 2 : select tag used user, find artist tag not in user's listening list, , order them number of unique listener.
can me write query perform above.
you need couple of things here end result, first stages relatively simple. take user object provide:
var user = { user_id : 1, friends : [3,5,6], artists : [ {artist_id: 10 , weight : 345}, {artist_id: 17 , weight : 378} ] };
now presuming have data retrieved, comes down finding same structures each "friend" , filtering out array content of "artists" single distinct list. presumably each "weight" considered in total here.
this simlple aggregation operation first filter out artists in in list given user:
var artists = user.artists.map(function(artist) { return artist.artist_id }); user.aggregate( [ // find possible friends without same artists { "$match": { "user_id": { "$in": user.friends }, "artists.artist_id": { "$nin": artists } }}, // pre-filter artists in user list { "$project": "artists": { "$setdifference": [ { "$map": { "input": "$artists", "as": "$el", "in": { "$cond": [ "$anyelementtrue": { "$map": { "input": artists, "as": "artist", "in": { "$eq": [ "$$artist", "$el.artist_id" ] } } }, false, "$$el" ] } }} [false] ] } }}, // unwind reduced array { "$unwind": "$artists" }, // group each artist , sum weights { "$group": { "_id": "$artists.artist_id", "weight": { "$sum": "$artists.weight" } }}, // sort results weight { "$sort": { "weight": -1 } } ], function(err,results) { // more come here } );
the "pre-filter" tricky part here. $unwind
array , $match
again filter out entries don't want. though want $unwind
results later in order combine them, works out more efficient remove them array "first", there less expand.
so here $map
operator allows inspection of each element of user "artists" array , comparison against filtered "user" artists list return wanted details. $setdifference
used "filter" results not returned array content, rather returned false
.
after there $unwind
de-normalize content in array , $group
bring total per artist. fun using $sort
show list returned in desired order, not necessary @ later stage.
that @ least part of way along here resulting list should other artists not in user's own list, , sorted summed "weight" artists possibly appear on multiple friends.
the next part going need data "artists" collection in order take number of listeners account. whilst mongoose has .populate()
method, don't want here looking "distinct user" counts. implies aggregation implementation in order distinct counts each artist.
following on result list of previous aggregation operation, use $_id
values this:
// first array of artist id's var artists = results.map(function(artist) { return artist._id; }); artist.aggregate( [ // match artists { "$match": { "artistid": { "$in": artists } }}, // project weight distinct users { "$project": { "_id": "$artistid", "weight": { "$multiply": [ { "$size": { "$setunion": [ { "$map": { "input": "$user_tag", "as": "tag", "in": "$$tag.user_id" }}, [] ] }}, 10 ] } }} ], function(err,results) { // more later } );
here trick done in aggregate $map
similar transform of values fed $setunion
make them unique list. $size
operator applied find out how big list is. additional math give number meaning when applied against recorded weights previous results.
of course need bring of somehow, right there 2 distinct sets of results. basic process "hash table", unique "artist" id values used key , "weight" values combined.
you can in number of ways, since there desire "sort" combined results prefernce "mongodbish" since follows basic methods should used to.
a handy way implement using nedb
, provides "in memory" store uses of same type of methods used read , write mongodb collections.
this scales if needed use actual collection large results, principles remain same.
first aggregation operation inserts new data store
second aggregation "updates" data increments "weight" field
as complete function listing, , other of async
library this:
function getuserrecommendations(userid,callback) { var async = require('async') datastore = require('nedb'); user.findone({ "user_id": user_id},function(err,user) { if (err) callback(err); var artists = user.artists.map(function(artist) { return artist.artist_id; }); async.waterfall( [ function(callback) { var pipeline = [ // find possible friends without same artists { "$match": { "user_id": { "$in": user.friends }, "artists.artist_id": { "$nin": artists } }}, // pre-filter artists in user list { "$project": "artists": { "$setdifference": [ { "$map": { "input": "$artists", "as": "$el", "in": { "$cond": [ "$anyelementtrue": { "$map": { "input": artists, "as": "artist", "in": { "$eq": [ "$$artist", "$el.artist_id" ] } } }, false, "$$el" ] } }} [false] ] } }}, // unwind reduced array { "$unwind": "$artists" }, // group each artist , sum weights { "$group": { "_id": "$artists.artist_id", "weight": { "$sum": "$artists.weight" } }}, // sort results weight { "$sort": { "weight": -1 } } ]; user.aggregate(pipeline, function(err,results) { if (err) callback(err); async.each( results, function(result,callback) { result.artist_id = result._id; delete result._id; datastore.insert(result,callback); }, function(err) callback(err,results); } ); }); }, function(results,callback) { var artists = results.map(function(artist) { return artist.artist_id; // note renamed }); var pipeline = [ // match artists { "$match": { "artistid": { "$in": artists } }}, // project weight distinct users { "$project": { "_id": "$artistid", "weight": { "$multiply": [ { "$size": { "$setunion": [ { "$map": { "input": "$user_tag", "as": "tag", "in": "$$tag.user_id" }}, [] ] }}, 10 ] } }} ]; artist.aggregate(pipeline,function(err,results) { if (err) callback(err); async.each( results, function(result,callback) { result.artist_id = result._id; delete result._id; datastore.update( { "artist_id": result.artist_id }, { "$inc": { "weight": result.weight } }, callback ); }, function(err) { callback(err); } ); }); } ], function(err) { if (err) callback(err); // callback errors // else fetch combined results , sort callback datastore.find({}).sort({ "weight": -1 }).exec(callback); } ); }); }
so after matching initial source user object values passed first aggregate function, executing in series , using async.waterfall
pass it's result.
before happens though aggregation results added datastore
regular .insert()
statements, taking care rename _id
fields nedb
not other it's own self generated _id
values. each result inserted artist_id
, weight
properties aggregation result.
that list passed second aggregation operation going return each specified "artist" calculated "weight" based on distinct user size. there "updated" same .update()
statement on datastore
each artist , incrementing "weight" field.
all going well, final operation .find()
results , .sort()
them combined "weight", , return result passed in callback function.
so use this:
getuserrecommendations(1,function(err,results) { // results sorted list });
and going return of artists not presently in user's list in friends lists , ordered combined weights of friend listening count plus score number of distinct users of artist.
this how deal data 2 different collections need combine single result various aggregated details. it's multiple queries , working space, part of mongodb philosopy such operations better performed way throwing them @ database "join" results.
Comments
Post a Comment