转自:
http://blog.fens.me/nosql-r-mongodb/
2. rmongodb函数库
文字说明部分:
rmongodb的开发了一大堆的函数,对应mongo的操作。比起别的Nosql来说,真是工程浩大啊。但我总觉得封装粒度不够,写起代码来比较复杂。
下面列出了所有rmongodb函数库,我只挑选几个常用的介绍。
建立mongo连接
mongo<-mongo.create()
查看接连是否正常
mongo.is.connected(mongo)
创建一个BSON对象缓存
buf <- mongo.bson.buffer.create()
给对象buf增加element
mongo.bson.buffer.append(buf,"name","Echo")
增加对象类型的element
score <- c(5,3.5,4) names(score) <- c("Mike","Jimmy","Ann") mongo.bson.buffer.append(buf,"score",score)
增加数组类型的element
mongo.bson.buffer.start.array(buf,"comments") mongo.bson.buffer.append(buf,"0","a1") mongo.bson.buffer.append(buf,"1","a2") mongo.bson.buffer.append(buf,"2","a3")
关闭数组类型的element
mongo.bson.buffer.finish.object(buf)
取出缓存数据
b <- mongo.bson.from.buffer(buf)
数据库.数据集
ns="db.blog"
插入一条记录
mongo.insert(mongo,ns,b) #mongo shell:(Not Run) db.blog.insert(b)
创建查询对象query
buf <- mongo.bson.buffer.create() mongo.bson.buffer.append(buf,"Echo") query <- mongo.bson.from.buffer(buf)
创建查询返回值对象
执行单条记录查询mongo.find.one(mongo,query,fields) #mongo shell:(Not Run) db.blog.findOne({query},{fields})执行列表记录查询
mongo.find(mongo,fields) #mongo shell:(Not Run) db.blog.find({query},79); font-size:13px; line-height:20px"> 创建修改器对象objNewbuf <- mongo.bson.buffer.create() mongo.bson.buffer.start.object(buf,"$inc") mongo.bson.buffer.append(buf,"age",1L) mongo.bson.buffer.finish.object(buf) objNew <- mongo.bson.from.buffer(buf)执行修改操作
mongo.update(mongo,objNew) #mongo shell:(Not Run) db.blog.update({query},{objNew})删除所选对象mongo.remove(mongo,query) #mongo shell:(Not Run) db.blog.remove({query},79); font-size:13px; line-height:20px"> 销毁mongo连接mongo.destroy(mongo)代码部分:
共有153个函数
mongo.add.user mongo.authenticate mongo.binary.binary mongo.binary.function mongo.binary.md5 mongo.binary.old mongo.binary.user mongo.binary.uuid mongo.bson.array mongo.bson.binary mongo.bson.bool mongo.bson.buffer.append mongo.bson.buffer.append.bool mongo.bson.buffer.append.bson mongo.bson.buffer.append.code mongo.bson.buffer.append.code.w.scope mongo.bson.buffer.append.complex mongo.bson.buffer.append.double mongo.bson.buffer.append.element mongo.bson.buffer.append.int mongo.bson.buffer.append.list mongo.bson.buffer.append.long mongo.bson.buffer.append.null mongo.bson.buffer.append.object mongo.bson.buffer.append.oid mongo.bson.buffer.append.raw mongo.bson.buffer.append.regex mongo.bson.buffer.append.string mongo.bson.buffer.append.symbol mongo.bson.buffer.append.time mongo.bson.buffer.append.timestamp mongo.bson.buffer.append.undefined mongo.bson.buffer.create mongo.bson.buffer.finish.object mongo.bson.buffer.size mongo.bson.buffer.start.array mongo.bson.buffer.start.object mongo.bson.code mongo.bson.code.w.scope mongo.bson.date mongo.bson.dbref mongo.bson.destroy mongo.bson.double mongo.bson.empty mongo.bson.eoo mongo.bson.find mongo.bson.from.buffer mongo.bson.from.list mongo.bson.int mongo.bson.iterator.create mongo.bson.iterator.key mongo.bson.iterator.next mongo.bson.iterator.type mongo.bson.iterator.value mongo.bson.long mongo.bson.null mongo.bson.object mongo.bson.oid mongo.bson.print mongo.bson.regex mongo.bson.size mongo.bson.string mongo.bson.symbol mongo.bson.timestamp mongo.bson.to.list mongo.bson.undefined mongo.bson.value mongo.code.create mongo.code.w.scope.create mongo.command mongo.count mongo.create mongo.cursor.destroy mongo.cursor.next mongo.cursor.value mongo.destroy mongo.disconnect mongo.distinct mongo.drop mongo.drop.database mongo.find mongo.find.await.data mongo.find.cursor.tailable mongo.find.exhaust mongo.find.no.cursor.timeout mongo.find.one mongo.find.oplog.replay mongo.find.partial.results mongo.find.slave.ok mongo.get.database.collections mongo.get.databases mongo.get.err mongo.get.hosts mongo.get.last.err mongo.get.prev.err mongo.get.primary mongo.get.server.err mongo.get.server.err.string mongo.get.socket mongo.get.timeout mongo.gridfile.destroy mongo.gridfile.get.chunk mongo.gridfile.get.chunk.count mongo.gridfile.get.chunks mongo.gridfile.get.chunk.size mongo.gridfile.get.content.type mongo.gridfile.get.descriptor mongo.gridfile.get.filename mongo.gridfile.get.length mongo.gridfile.get.md5 mongo.gridfile.get.Metadata mongo.gridfile.get.upload.date mongo.gridfile.pipe mongo.gridfile.read mongo.gridfile.seek mongo.gridfile.writer.create mongo.gridfile.writer.finish mongo.gridfile.writer.write mongo.gridfs.create mongo.gridfs.destroy mongo.gridfs.find mongo.gridfs.remove.file mongo.gridfs.store mongo.gridfs.store.file mongo.index.background mongo.index.create mongo.index.drop.dups mongo.index.sparse mongo.index.unique mongo.insert mongo.insert.batch mongo.is.connected mongo.is.master mongo.oid.create mongo.oid.from.string mongo.oid.print mongo.oid.time mongo.oid.to.string mongo.reconnect mongo.regex.create mongo.remove mongo.rename mongo.reset.err mongo.set.timeout mongo.shorthand mongo.simple.command mongo.symbol.create mongo.timestamp.create mongo.undefined.create mongo.update mongo.update.basic mongo.update.multi mongo.update.upsert3. rmongodb基本使用操作
首先,要安装rmongodb类库,加载类库。
然后,通过mongo.create()函数,建立与MongoDB Server的连接。如果是本地连接,mongo.create()不要参数,下面例子使用远程连接,增加host参数配置IP地址。mongo<-mongo.create(host=“192.168.1.11”)
检查是否连接正常,mongo.is.connected()。这条语句在开发时会经常使用到。在用R语言建模时,如果对象或者函数使用错误,连接会被自动断开。由于MongoDB的异常机制,断开时不会是提示。大家要手动使用这条命令测试,连接是否正常。
接下来,定义两个变量,db和ns。db是我们需要使用的数据库,ns是数据库+数据集。
下面我们创建一个Mongo对象。
{ "_id" : ObjectId("51663e14da2c51b1e8bc62eb"),"name" : "Echo","age" : 22,"gender" : "Male","score" : { "Mike" : 5,"Jimmy" : 3.5,"Ann" : 4 },"comments" : [ "a1","a2","a3" ] }然后,分别使用修改器$inc,$set,$push进行操作。
最后删除对象,并断开连接。
代码部分:
安装rmongodb
install.packages(rmongodb)加载类库
library(rmongodb)远程连接mongodb server
mongo<-mongo.create(host="192.168.1.11")查看是否连接正常
print(mongo.is.connected(mongo))定义db
db<-"foobar"定义db.collection
ns<-"foobar.blog"组织bson类型数据
score <- c(5,score) #数组类型 mongo.bson.buffer.start.array(buf,"a3") mongo.bson.buffer.finish.object(buf) b <- mongo.bson.from.buffer(buf)插入mongodb
单条显示插入的数据使用$set修改器,修改age=1使用$push修改器,给comments数组追加”Orange”数据使用简化修改语句,给对象重新赋值删除对象mongo.destroy(mongo)4. rmongodb测试案例
批量插入数据,使用修改器批量修改数据
3种修改器速度比较,$push最慢
$push > $set > $inc终于push是对数组操作,set是对任意值操作,inc是对数字操作,所以下面测试可能不太公平。测试结果仅供参考。
批量插入数据函数
batch_insert<-function(arr=1:10,ns){ library(stringr) mongo_insert<-function(x){ buf <- mongo.bson.buffer.create() mongo.bson.buffer.append(buf,str_c("Dave",x)) mongo.bson.buffer.append(buf,x) mongo.bson.buffer.start.array(buf,"comments") mongo.bson.buffer.append(buf,"a1") mongo.bson.buffer.append(buf,"a2") mongo.bson.buffer.append(buf,"a3") mongo.bson.buffer.finish.object(buf) return(mongo.bson.from.buffer(buf)) } mongo.insert.batch(mongo,lapply(arr,mongo_insert)) }batch_inc<-function(data,ns){ for(i in data){ buf <- mongo.bson.buffer.create() mongo.bson.buffer.append(buf,i)) criteria <- mongo.bson.from.buffer(buf) buf <- mongo.bson.buffer.create() mongo.bson.buffer.start.object(buf,"$inc") mongo.bson.buffer.append(buf,1L) mongo.bson.buffer.finish.object(buf) objNew <- mongo.bson.from.buffer(buf) mongo.update(mongo,criteria,objNew) } }batch_set<-function(data,"$set") mongo.bson.buffer.append(buf,79); font-size:13px; line-height:20px"> 批量修改,$push修改器函数batch_push<-function(data,"$push") mongo.bson.buffer.append(buf,"Orange") mongo.bson.buffer.finish.object(buf) objNew <- mongo.bson.from.buffer(buf) mongo.update(mongo,79); font-size:13px; line-height:20px"> 执行程序,3种修改速度比较,$push最慢ns="foobar.blog" data=1:1000 mongo.remove(mongo,ns) ## [1] TRUE system.time(batch_insert(data,ns)) ## user system elapsed ## 0.25 0.00 0.28 system.time(batch_inc(data,ns)) ## user system elapsed ## 0.47 0.27 2.50 system.time(batch_set(data,ns)) ## user system elapsed ## 0.77 0.48 3.17 system.time(batch_push(data,ns)) ## user system elapsed ## 0.81 0.41 4.23