mongodb进阶
1.首先推荐一个好用的工具
下载链接:robot 3T.(接受打赏)
2.进入正题。聚合查询再回顾:
/* 查询并统计重复的books下的 units */
var c = db.units.aggregate( [
//{ $match: { book: "09060101-001" } }, //查找条件,查询指定条件的章节
{
$group: {
_id: {
book: "$book",
version:"$version",
code:"$code",
name:"$name",
startPageCode:"$startPageCode"
},
total: { $sum :1 }
}
},
{ $match: { total: { $gt: 1 } } }
] );
var i = 0;
while(c.hasNext()) {
print("/* "+(++i)+" */");
printjson(c.next());
}
3.查询并删除
/**
* 删除指定书本的重复的unit,只需要执行一次 ,以下可在控制台执行
* mongo host:port/books removeRedundantData.js
*/
// 查询重复的章节信息,返回数组
var c = db.units.aggregate( [
// 查询指定书本的章节
// {
// $match:{book: "0626020101-1335"}
// },
// 按书本code、章节name、章节code、版本号分组,统计文档数量
{
$group: {
_id: {
book: "$book",
name:"$name",
code:"$code",
version:"$version",
startPageCode:"$startPageCode"
},
total: { $sum :1 }
}
},
// 筛选数量大于1的分组
{ $match: { total: { $gt: 1 } } }
] ).toArray();
// 对每个章节,执行一次删除操作,如果不止重复2次,多次执行即可。
for (var i = 0;i <c.length ; i++) {
var q = {
"book" : c [i]._id.book,
"version" : c [i]._id.version,
"code" : c [i]._id.code,
"name" : c [i]._id.name
};
// var q = c [i]._id; // 当查询条件与分组条件一致时,直接赋值即可
// 循环删(重复数量-1)次
for(var j = 0; j < c [i].total-1; j++){
// 按条件删除一条记录
db.units.remove(q, 1);
}
}
/**
* 删除指定书本的重复的unit
* mongo removeRedundantData.js
*/
/*
// 查询重复的章节信息,返回数组
var c = db.units.aggregate( [
// 查询指定书本的章节
{
$match:{book: "0626020101-1335"}
},
// 按书本code、章节name、章节code、版本号分组,统计文档数量
{
$group: {
_id: {
book: "$book",
name:"$name",
code:"$code",
version:"$version"
},
total: { $sum :1 }
}
},
// 筛选数量大于1的分组
{ $match: { total: { $gt: 1 } } }
] ).map(
// 返回聚合查询结果中的分组信息
function(el) { return el._id }
)
// 对每个章节,执行一次删除操作,如果不止重复2次,多次执行即可。
for (var i = 0;i <c.length ; i++) {
var q = {
"book" : c [i]._id.book,
"version" : c [i]._id.version,
"code" : c [i]._id.code,
"name" : c [i]._id.name
};
// var q = c [i]._id; // 当查询条件与分组条件一致时,直接赋值即可
// 按条件删除一条记录
db.units.remove(q, 1);
}
*/
3.$unwind关键字:将特定数组字段拆分成多个文档。
查询name:目录 的文档
只有两个文档
/* 1 */
{
"_id" : ObjectId("5c7c885210fa17a67908a1c2"),
"sort" : 1,
"excludes" : [],
"endPageCode" : "A3",
"code" : "01",
"name" : "目录",
"level" : 1,
"pages" : [
{
"code" : "A2",
"name" : ""
},
{
"code" : "A3",
"name" : ""
}
],
"book" : "09060101-001",
"version" : [
1
],
"children" : [],
"types" : [],
"startPageCode" : "A2"
}
/* 2 */
{
"_id" : ObjectId("5c7c99fa10fa17a67908a1dd"),
"sort" : 1,
"excludes" : [],
"endPageCode" : "A3",
"code" : "01",
"name" : "目录",
"level" : 1,
"pages" : [
{
"code" : "A2",
"name" : ""
},
{
"code" : "A3",
"name" : ""
}
],
"book" : "09060101-001",
"version" : [
1,
2
],
"children" : [],
"types" : [],
"startPageCode" : "A2"
}
经过$unwind 拆分pages之后:
生成了四个文档,其结构如下
/* 1 */
{
"_id" : ObjectId("5c7c885210fa17a67908a1c2"),
"sort" : 1,
"excludes" : [],
"endPageCode" : "A3",
"code" : "01",
"name" : "目录",
"level" : 1,
"pages" : {
"code" : "A2",
"name" : ""
},
"book" : "09060101-001",
"version" : [
1
],
"children" : [],
"types" : [],
"startPageCode" : "A2"
}
/* 2 */
{
"_id" : ObjectId("5c7c885210fa17a67908a1c2"),
"sort" : 1,
"excludes" : [],
"endPageCode" : "A3",
"code" : "01",
"name" : "目录",
"level" : 1,
"pages" : {
"code" : "A3",
"name" : ""
},
"book" : "09060101-001",
"version" : [
1
],
"children" : [],
"types" : [],
"startPageCode" : "A2"
}
/* 3 */
{
"_id" : ObjectId("5c7c99fa10fa17a67908a1dd"),
"sort" : 1,
"excludes" : [],
"endPageCode" : "A3",
"code" : "01",
"name" : "目录",
"level" : 1,
"pages" : {
"code" : "A2",
"name" : ""
},
"book" : "09060101-001",
"version" : [
1,
2
],
"children" : [],
"types" : [],
"startPageCode" : "A2"
}
/* 4 */
{
"_id" : ObjectId("5c7c99fa10fa17a67908a1dd"),
"sort" : 1,
"excludes" : [],
"endPageCode" : "A3",
"code" : "01",
"name" : "目录",
"level" : 1,
"pages" : {
"code" : "A3",
"name" : ""
},
"book" : "09060101-001",
"version" : [
1,
2
],
"children" : [],
"types" : [],
"startPageCode" : "A2"
}
发现生成的新文档只是把原来的pages数组拆分成多个而已,其他数据都没变。
4.统计每个单元总页数:
db.getCollection('units').aggregate(
[
{$unwind: "$pages"},
{
$group: {
_id:{
name:"$name"
},
totalPage:{$sum:1}
}
}
]
)
/* 1 */
{
"_id" : {
"name" : "课题1 物质的变化和性质"
},
"totalPage" : 20.0
}
/* 2 */
{
"_id" : {
"name" : "第二单元 辽宋夏金元时期:民族关系发展和社会变化"
},
"totalPage" : 76.0
}
/* 3 */
{
"_id" : {
"name" : "第一单元 隋唐时期:繁荣与开放的时代"
},
"totalPage" : 52.0
}
/* 4 */
{
"_id" : {
"name" : "目录"
},
"totalPage" : 4.0
}