11、删除日志文件(Deleting The Rollback Journal)
一旦更改写入设备,日志文件将会被删除,这是事务真正提交的时刻。如果在这之前系统发生崩溃,就会进行恢复处理,使得数据库和没发生改变一样;如果在这之后系统发生崩溃,表明所有的更改都已经写入磁盘。sqlite就是根据日志存在情况决定是否对数据库进行恢复处理。
删除文件本质上不是一个原子操作,但是从用户进程的角度来看是一个原子操作,所以一个事务看起来是一个原子操作。
在许多系统中,删除文件也是一个高代价的操作。作为优化,sqlite可以配置成把日志文件的长度截为0或者把日志文件头清零。
12、释放锁(Releasing The Lock)
作为原子提交的最后一步,释放排斥锁使得其它进程可以开始访问数据库了。
下图中,我们指明了当锁被释放的时候用户空间所拥有的信息已经被清空了.对于老版本的sqlite你可这么认为。但最新的sqlite会保存些用户空间的缓存不会被清空—万一下一个事务开始的时候,这些数据刚好可以用上呢。重新利用这些内存要比再次从操作系统磁盘缓存或者硬盘中读取要来得轻松与快捷得多,何乐而不为呢?在再次使用这些数据之前,我们必须先取得一个共享锁,同时我们还不得不去检查一下,保证还没有其他进程在我们拥有共享锁之前对数据库文件进行了修改。数据库文件的第一页中有一个计数器,数据库文件每做一次修改,这个计数器就会增长一下。我们可以通过检查这个计数器就可得知是否有其他进程修改过数据库文件。如果数据库文件已经被修改过了,那么用户内存空间的缓存就不得不清空,并重新读入。大多数情况下,这种情况不大会发生,因此用户空间的内存缓存将是有效的,这对于性能提高来说作用是显著的。
以上两步是在sqlite3BtreeCommit()---btree.c函数中实现的。
代码如下:
//提交事务,至此一个事务完成.主要做两件事:
//删除日志文件,释放数据库文件的写锁
intsqlite3BtreeCommit(Btree*p){
BtShared*pBt=p->pBt;
btreeIntegrity(p);
/*Ifthehandlehasawrite-transactionopen,committheshared-btrees
**transactionandsetthesharedstatetoTRANS_READ.
*/
if(p->inTrans==TRANS_WRITE){
intrc;
assert(pBt->inTransaction==TRANS_WRITE);
assert(pBt->nTransaction>0);
//调用pager,提交事务
rc=sqlite3pager_commit(pBt->pPager);
if(rc!=sqlITE_OK){
returnrc;
}
pBt->inTransaction=TRANS_READ;
pBt->inStmt=0;
}
unlockAllTables(p);
/*Ifthehandlehasanykindoftransactionopen,decrementthetransaction
**countofthesharedbtree.Ifthetransactioncountreaches0,set
**thesharedstatetoTRANS_NONE.TheunlockBtreeIfUnused()callbelow
**willunlockthepager.
*/
if(p->inTrans!=TRANS_NONE){
pBt->nTransaction--;
if(0==pBt->nTransaction){
pBt->inTransaction=TRANS_NONE;
}
}
}
//提交事务,主要调用pager_unwritelock()函数
intsqlite3pager_commit(Pager*pPager){
intrc;
PgHdr*pPg;
if(pPager->errCode){
returnpPager->errCode;
}
if(pPager->state<PAGER_RESERVED){
returnsqlITE_ERROR;
}
TRACE2("COMMIT%d/n",PAGERID(pPager));
if(MEMDB){
pPg=pager_get_all_dirty_pages(pPager);
while(pPg){
clearHistory(PGHDR_TO_HIST(pPg,pPager));
pPg->dirty=0;
pPg->inJournal=0;
pPg->inStmt=0;
pPg->needSync=0;
pPg->pPrevStmt=pPg->pNextStmt=0;
pPg=pPg->pDirty;
}
pPager->pDirty=0;
#ifndefNDEBUG
for(pPg=pPager->pAll;pPg;pPg=pPg->pNextAll){
PgHistory*pHist=PGHDR_TO_HIST(pPg,pPager);
assert(!pPg->alwaysRollback);
assert(!pHist->pOrig);
assert(!pHist->pStmt);
}
#endif
pPager->pStmt=0;
pPager->state=PAGER_SHARED;
returnsqlITE_OK;
}
if(pPager->dirtyCache==0){
/*Exitearly(withoutdoingthetime-consumingsqlite3OsSync()calls)
**iftherehavebeennochangestothedatabasefile.*/
assert(pPager->needSync==0);
rc=pager_unwritelock(pPager);
pPager->dbSize=-1;
returnrc;
}
assert(pPager->journalOpen);
rc=sqlite3pager_sync(pPager,0,0);
//删除文件,释放写锁
if(rc==sqlITE_OK){
rc=pager_unwritelock(pPager);
pPager->dbSize=-1;
}
returnrc;
}
//对数据库加readlock,删除日志文件
staticintpager_unwritelock(Pager*pPager){
PgHdr*pPg;
intrc;
assert(!MEMDB);
if(pPager->state<PAGER_RESERVED){
returnsqlITE_OK;
}
sqlite3pager_stmt_commit(pPager);
if(pPager->stmtOpen){
sqlite3OsClose(&pPager->stfd);
pPager->stmtOpen=0;
}
if(pPager->journalOpen){
//关闭日志文件
sqlite3OsClose(&pPager->jfd);
pPager->journalOpen=0;
//删除日志文件
sqlite3OsDelete(pPager->zJournal);
sqliteFree(pPager->aInJournal);
pPager->aInJournal=0;
for(pPg=pPager->pAll;pPg;pPg=pPg->pNextAll){
pPg->inJournal=0;
pPg->dirty=0;
pPg->needSync=0;
#ifdefsqlITE_CHECK_PAGES
pPg->pageHash=pager_pagehash(pPg);
#endif
}
pPager->pDirty=0;
pPager->dirtyCache=0;
pPager->nRec=0;
}else{
assert(pPager->aInJournal==0);
assert(pPager->dirtyCache==0||pPager->useJournal==0);
}
//释放写锁,加读锁
rc=sqlite3OsUnlock(pPager->fd,SHARED_LOCK);
pPager->state=PAGER_SHARED;
pPager->origDbSize=0;
pPager->setMaster=0;
pPager->needSync=0;
pPager->pFirstSynced=pPager->pFirst;
returnrc;
}
下图可进一步描述该过程:
最后来看看sqlite3BtreeSync()和sqlite3BtreeCommit()是如何被调用的。
一般来说,事务提交方式为自动提交的话,在虚拟机中的OP_Halt指令实现提交事务,相关代码如下:
//虚拟机停机指令
caseOP_Halt:{/*no-push*/
p->pTos=pTos;
p->rc=pOp->p1;
p->pc=pc;
p->errorAction=pOp->p2;
if(pOp->p3){
sqlite3SetString(&p->zErrMsg,pOp->p3,(char*)0);
}
//设置虚拟机状态sqlITE_MAGIC_RUN为sqlITE_MAGIC_HALT,
//并提交事务
rc=sqlite3VdbeHalt(p);
assert(rc==sqlITE_BUSY||rc==sqlITE_OK);
if(rc==sqlITE_BUSY){
p->rc=sqlITE_BUSY;
returnsqlITE_BUSY;
}
returnp->rc?sqlITE_ERROR:sqlITE_DONE;
}
//当虚拟机要停机时,调用该函数,如果VDBE改变了数据库且为自动
//提交模式,则提交这些改变
intsqlite3VdbeHalt(Vdbe*p){
sqlite3*db=p->db;
inti;
int(*xFunc)(Btree*pBt)=0;/*Functiontocalloneachbtreebackend*/
intisSpecialError;/*SettotrueifsqlITE_NOMEMorIOERR*/
/*Thisfunctioncontainsthelogicthatdeterminesifastatementor
**transactionwillbecommittedorrolledbackasaresultofthe
**executionofthisvirtualmachine.
**
**Specialerrors:
**
**IfansqlITE_NOMEMerrorhasoccuredinastatementthatwritesto
**thedatabase,theneitherastatementortransactionmustberolled
**backtoensurethetree-structuresareinaconsistentstate.A
**statementtransactionisrolledbackifoneisopen,otherwisethe
**entiretransactionmustberolledback.
**
**IfansqlITE_IOERRerrorhasoccuredinastatementthatwritesto
**thedatabase,thentheentiretransactionmustberolledback.The
**I/Oerrormayhavecausedgarbagetobewrittentothejournal
**file.Werethetransactiontocontinueandeventuallyberolled
**backthatgarbagemightendupinthedatabasefile.
**
**Inbothoftheabovecases,theVdbe.errorActionvariableis
**ignored.Ifthesqlite3.autoCommitflagisfalseandatransaction
**isrolledback,itwillbesettotrue.
**
**Othererrors:
**
**Noerror:
**
*/
if(sqlite3MallocFailed()){
p->rc=sqlITE_NOMEM;
}
if(p->magic!=VDBE_MAGIC_RUN){
/*Alreadyhalted.Nothingtodo.*/
assert(p->magic==VDBE_MAGIC_HALT);
returnsqlITE_OK;
}
//释放虚拟机中所有的游标
closeAllCursors(p);
checkActiveVdbeCnt(db);
/*Nocommitorrollbackneedediftheprogramneverstarted*/
if(p->pc>=0){
/*Checkforoneofthespecialerrors-sqlITE_NOMEMorsqlITE_IOERR*/
isSpecialError=((p->rc==sqlITE_NOMEM||p->rc==sqlITE_IOERR)?1:0);
if(isSpecialError){
/*Thisloopdoesstaticanalysisofthequerytoseewhichofthe
**followingthreecategoriesitfallsinto:
**
**Read-only
**Querywithstatementjournal
**Querywithoutstatementjournal
**
**Wecoulddosomethingmoreelegantthanthisstaticanalysis(i.e.
**storethetypeofqueryaspartofthecompliationphase),but
**handlingmalloc()orIOfailureisafairlyobscureedgecaseso
**thisisprobablyeasier.Todo:Mightbeanopportunitytoreduce
**codesizeaverysmallamountthough
*/
intisReadOnly=1;
intisStatement=0;
assert(p->aOp||p->nOp==0);
for(i=0;i<p->nOp;i++){
switch(p->aOp[i].opcode){
caseOP_Transaction:
isReadOnly=0;
break;
caseOP_Statement:
isStatement=1;
break;
}
}
/*Ifthequerywasread-only,weneeddonorollbackatall.Otherwise,
**proceedwiththespecialhandling.
*/
if(!isReadOnly){
if(p->rc==sqlITE_NOMEM&&isStatement){
xFunc=sqlite3BtreeRollbackStmt;
}else{
/*Weareforcedtorollbacktheactivetransaction.Beforedoing
**so,abortanyotherstatementsthishandlecurrentlyhasactive.
*/
sqlite3AbortOtherActiveVdbes(db,p);
sqlite3RollbackAll(db);
db->autoCommit=1;
}
}
}
/*Iftheauto-commitflagissetandthisistheonlyactivevdbe,then
**wedoeitheracommitorrollbackofthecurrenttransaction.
**
**Note:Thisblockalsorunsifoneofthespecialerrorshandled
**abovehasoccured.
*/
//如果自动提交事务,则提交事务
if(db->autoCommit&&db->activeVdbeCnt==1){
if(p->rc==sqlITE_OK||(p->errorAction==OE_Fail&&!isSpecialError)){
/*Theauto-commitflagistrue,andthevdbeprogramwas
**successfulorhitan'ORFAIL'constraint.Thismeansacommit
**isrequired.
*/
//提交事务
intrc=vdbeCommit(db);
if(rc==sqlITE_BUSY){
returnsqlITE_BUSY;
}elseif(rc!=sqlITE_OK){
p->rc=rc;
sqlite3RollbackAll(db);
}else{
sqlite3CommitInternalChanges(db);
}
}else{
sqlite3RollbackAll(db);
}
}elseif(!xFunc){
if(p->rc==sqlITE_OK||p->errorAction==OE_Fail){
xFunc=sqlite3BtreeCommitStmt;
}elseif(p->errorAction==OE_Abort){
xFunc=sqlite3BtreeRollbackStmt;
}else{
sqlite3AbortOtherActiveVdbes(db,p);
sqlite3RollbackAll(db);
db->autoCommit=1;
}
}
/*IfxFuncisnotNULL,thenitisoneofsqlite3BtreeRollbackStmtor
**sqlite3BtreeCommitStmt.Callitonceoneachbackend.Ifanerroroccurs
**andthereturncodeisstillsqlITE_OK,setthereturncodetothenew
**errorvalue.
*/
assert(!xFunc||
xFunc==sqlite3BtreeCommitStmt||
xFunc==sqlite3BtreeRollbackStmt
);
for(i=0;xFunc&&i<db->nDb;i++){
intrc;
Btree*pBt=db->aDb[i].pBt;
if(pBt){
rc=xFunc(pBt);
if(rc&&(p->rc==sqlITE_OK||p->rc==sqlITE_CONSTRAINT)){
p->rc=rc;
sqlite3SetString(&p->zErrMsg,0);
}
}
}
/*IfthiswasanINSERT,UPDATEorDELETEandthestatementwascommitted,
**setthechangecounter.
*/
if(p->changeCntOn&&p->pc>=0){
if(!xFunc||xFunc==sqlite3BtreeCommitStmt){
sqlite3VdbeSetChanges(db,p->nChange);
}else{
sqlite3VdbeSetChanges(db,0);
}
p->nChange=0;
}
/*Rollbackorcommitanyschemachangesthatoccurred.*/
if(p->rc!=sqlITE_OK&&db->flags&sqlITE_InternChanges){
sqlite3ResetInternalSchema(db,0);
db->flags=(db->flags|sqlITE_InternChanges);
}
}
/*WehavesuccessfullyhaltedandclosedtheVM.Recordthisfact.*/
if(p->pc>=0){
db->activeVdbeCnt--;
}
p->magic=VDBE_MAGIC_HALT;
checkActiveVdbeCnt(db);
returnsqlITE_OK;
}
//提交事务,主要调用:
//sqlite3BtreeSync()---同步btree,sqlite3BtreeCommit()---提交事务
staticintvdbeCommit(sqlite3*db){
inti;
intnTrans=0;/*Numberofdatabaseswithanactivewrite-transaction*/
intrc=sqlITE_OK;
intneedXcommit=0;
for(i=0;i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(pBt&&sqlite3BtreeIsInTrans(pBt)){
needXcommit=1;
if(i!=1)nTrans++;
}
}
/*Ifthereareanywrite-transactionsatall,invokethecommithook*/
if(needXcommit&&db->xCommitCallback){
sqlite3SafetyOff(db);
rc=db->xCommitCallback(db->pCommitArg);
sqlite3SafetyOn(db);
if(rc){
returnsqlITE_CONSTRAINT;
}
}
/*Thesimplecase-nomorethanonedatabasefile(notcountingthe
**TEMPdatabase)hasatransactionactive.Thereisnoneedforthe
**master-journal.
**
**Ifthereturnvalueofsqlite3BtreeGetFilename()isazerolength
**string,itmeansthemaindatabaseis:memory:.Inthatcasewedo
**notsupportatomicmulti-filecommits,sousethesimplecasethen
**too.
*/
//简单的情况,只有一个数据库文件,不需要master-journal
if(0==strlen(sqlite3BtreeGetFilename(db->aDb[0].pBt))||nTrans<=1){
for(i=0;rc==sqlITE_OK&&i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(pBt){
//同步btree
rc=sqlite3BtreeSync(pBt,0);
}
}
/*Dothecommitonlyifalldatabasessuccessfullysynced*/
//commite事务
if(rc==sqlITE_OK){
for(i=0;i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(pBt){
sqlite3BtreeCommit(pBt);
}
}
}
}
/*Thecomplexcase-Thereisamulti-filewrite-transactionactive.
**Thisrequiresamasterjournalfiletoensurethetransactionis
**committedatomicly.
*/
#ifndefsqlITE_OMIT_DISKIO
else{
intneedSync=0;
char*zMaster=0;/*File-nameforthemasterjournal*/
charconst*zMainFile=sqlite3BtreeGetFilename(db->aDb[0].pBt);
OsFile*master=0;
/*Selectamasterjournalfilename*/
do{
u32random;
sqliteFree(zMaster);
sqlite3Randomness(sizeof(random),&random);
zMaster=sqlite3MPrintf("%s-mj%08X",zMainFile,random&0x7fffffff);
if(!zMaster){
returnsqlITE_NOMEM;
}
}while(sqlite3OsFileExists(zMaster));
/*Openthemasterjournal.*/
rc=sqlite3OsOpenExclusive(zMaster,&master,0);
if(rc!=sqlITE_OK){
sqliteFree(zMaster);
returnrc;
}
/*Writethenameofeachdatabasefileinthetransactionintothenew
**masterjournalfile.Ifanerroroccursatthispointclose
**anddeletethemasterjournalfile.Alltheindividualjournalfiles
**stillhave'null'asthemasterjournalpointer,sotheywillroll
**backindependentlyifafailureoccurs.
*/
for(i=0;i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(i==1)continue;/*IgnoretheTEMPdatabase*/
if(pBt&&sqlite3BtreeIsInTrans(pBt)){
charconst*zFile=sqlite3BtreeGetJournalname(pBt);
if(zFile[0]==0)continue;/*Ignore:memory:databases*/
if(!needSync&&!sqlite3BtreeSyncDisabled(pBt)){
needSync=1;
}
rc=sqlite3OsWrite(master,zFile,strlen(zFile)+1);
if(rc!=sqlITE_OK){
sqlite3OsClose(&master);
sqlite3OsDelete(zMaster);
sqliteFree(zMaster);
returnrc;
}
}
}
/*Syncthemasterjournalfile.Beforedoingthis,openthedirectory
**themasterjournalfileisstoreinsothatitgetssyncedtoo.
*/
zMainFile=sqlite3BtreeGetDirname(db->aDb[0].pBt);
rc=sqlite3OsOpenDirectory(master,zMainFile);
if(rc!=sqlITE_OK||
(needSync&&(rc=sqlite3OsSync(master,0))!=sqlITE_OK)){
sqlite3OsClose(&master);
sqlite3OsDelete(zMaster);
sqliteFree(zMaster);
returnrc;
}
/*Syncallthedbfilesinvolvedinthetransaction.Thesamecall
**setsthemasterjournalpointerineachindividualjournal.If
**anerroroccurshere,donotdeletethemasterjournalfile.
**
**Iftheerroroccursduringthefirstcalltosqlite3BtreeSync(),
**thenthereisachancethatthemasterjournalfilewillbe
**orphaned.Butwecannotdeleteit,incasethemasterjournal
**filenamewaswrittenintothejournalfilebeforethefailure
**occured.
*/
for(i=0;i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(pBt&&sqlite3BtreeIsInTrans(pBt)){
rc=sqlite3BtreeSync(pBt,zMaster);
if(rc!=sqlITE_OK){
sqlite3OsClose(&master);
sqliteFree(zMaster);
returnrc;
}
}
}
sqlite3OsClose(&master);
/*Deletethemasterjournalfile.Thiscommitsthetransaction.After
**doingthisthedirectoryissyncedagainbeforeanyindividual
**transactionfilesaredeleted.
*/
rc=sqlite3OsDelete(zMaster);
assert(rc==sqlITE_OK);
sqliteFree(zMaster);
zMaster=0;
rc=sqlite3OsSyncDirectory(zMainFile);
if(rc!=sqlITE_OK){
/*Thisisnotgood.Themasterjournalfilehasbeendeleted,but
**thedirectorysyncFailed.ThereisnocompletelysafecourSEOf
**actionfromhere.Theindividualjournalscontainthenameofthe
**masterjournalfile,butthereisnowayofknowingifthat
**masterjournalexistsnoworifitwillexistaftertheoperating
**systemcrashthatmayfollowthefsync()failure.
*/
returnrc;
}
/*Allfilesanddirectorieshavealreadybeensynced,sothefollowing
**callstosqlite3BtreeCommit()areonlyclosingfilesanddeleting
**journals.Ifsomethinggoeswrongwhilethisishappeningwedon't
**reallycare.Theintegrityofthetransactionisalreadyguaranteed,
**butsomestray'cold'journalsmaybelyingaround.Returningan
**errorcodewon'thelpmatters.
*/
for(i=0;i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(pBt){
sqlite3BtreeCommit(pBt);
}
}
}
#endif
returnrc;
}