写在前面:由于内容较多,所以断续没有写完的内容。@H_403_1@
11、删除日志文件(Deleting The Rollback Journal)
一旦更改写入设备,日志文件将会被删除,这是事务真正提交的时刻。如果在这之前系统发生崩溃,就会进行恢复处理,使得数据库和没发生改变一样;如果在这之后系统发生崩溃,表明所有的更改都已经写入磁盘。sqlite就是根据日志存在情况决定是否对数据库进行恢复处理。@H_403_1@
删除文件本质上不是一个原子操作,但是从用户进程的角度来看是一个原子操作,所以一个事务看起来是一个原子操作。
在许多系统中,删除文件也是一个高代价的操作。作为优化,sqlite可以配置成把日志文件的长度截为0或者把日志文件头清零。@H_403_1@
@H_403_1@
@H_403_1@
12、释放锁(Releasing The Lock)
作为原子提交的最后一步,释放排斥锁使得其它进程可以开始访问数据库了。
下图中,我们指明了当锁被释放的时候用户空间所拥有的信息已经被清空了.对于老版本的sqlite你可这么认为。但最新的sqlite会保存些用户空间的缓存不会被清空—万一下一个事务开始的时候,这些数据刚好可以用上呢。重新利用这些内存要比再次从操作系统磁盘缓存或者硬盘中读取要来得轻松与快捷得多,何乐而不为呢?在再次使用这些数据之前,我们必须先取得一个共享锁,同时我们还不得不去检查一下,保证还没有其他进程在我们拥有共享锁之前对数据库文件进行了修改。数据库文件的第一页中有一个计数器,数据库文件每做一次修改,这个计数器就会增长一下。我们可以通过检查这个计数器就可得知是否有其他进程修改过数据库文件。如果数据库文件已经被修改过了,那么用户内存空间的缓存就不得不清空,并重新读入。大多数情况下,这种情况不大会发生,因此用户空间的内存缓存将是有效的,这对于性能提高来说作用是显著的。@H_403_1@
以上两步是在sqlite3BtreeCommit()---btree.c函数中实现的。@H_403_1@
@H_403_1@
//提交事务,至此一个事务完成.主要做两件事:
删除日志文件,释放数据库文件的写锁
intsqlite3BtreeCommit(Btree*p){
BtShared*pBt=p->pBt;
btreeIntegrity(p);
/*Ifthehandlehasawrite-transactionopen,committheshared-btrees
**transactionandsetthesharedstatetoTRANS_READ.
*/
if(p->inTrans==TRANS_WRITE){
intrc;
assert(pBt->inTransaction==TRANS_WRITE);
assert(pBt->nTransaction>0);
调用pager,提交事务
rc=sqlite3pager_commit(pBt->pPager);
if(rc!=sqlITE_OK){
returnrc;
}
pBt->inTransaction=TRANS_READ;
pBt->inStmt=0;
}
unlockAllTables(p);
Ifthehandlehasanykindoftransactionopen,decrementthetransaction
**countofthesharedbtree.Ifthetransactioncountreaches0,set
**thesharedstatetoTRANS_NONE.TheunlockBtreeIfUnused()callbelow
**willunlockthepager.
if(p->inTrans!=TRANS_NONE){
pBt->nTransaction--;
if(0==pBt->nTransaction){
pBt->inTransaction=TRANS_NONE;
}
}
}
提交事务,主要调用pager_unwritelock()函数intsqlite3pager_commit(Pager*pPager){
intrc;
PgHdr*pPg;
if(pPager->errCode){
returnpPager->errCode;
}
if(pPager->state<PAGER_RESERVED){
returnsqlITE_ERROR;
}
TRACE2("COMMIT%d\n",PAGERID(pPager));
if(MEMDB){
pPg=pager_get_all_dirty_pages(pPager);
while(pPg){
clearHistory(PGHDR_TO_HIST(pPg,pPager));
pPg->dirty=0;
pPg->inJournal=0;
pPg->inStmt=0;
pPg->needSync=0;
pPg->pPrevStmt=pPg->pNextStmt=0;
pPg=pPg->pDirty;
}
pPager->pDirty=0;
#ifndefNDEBUG
for(pPg=pPager->pAll;pPg;pPg=pPg->pNextAll){
PgHistory*pHist=PGHDR_TO_HIST(pPg,pPager);
assert(!pPg->alwaysRollback);
assert(!pHist->pOrig);
assert(!pHist->pStmt);
}
#endif
pPager->pStmt=0;
pPager->state=PAGER_SHARED;
returnsqlITE_OK;
}
if(pPager->dirtyCache==0){
Exitearly(withoutdoingthetime-consumingsqlite3OsSync()calls)
**iftherehavebeennochangestothedatabasefile.*/
assert(pPager->needSync==0);
rc=pager_unwritelock(pPager);
pPager->dbSize=-1;
returnrc;
}
assert(pPager->journalOpen);
rc=sqlite3pager_sync(pPager,0,0)">删除文件,释放写锁
if(rc==sqlITE_OK){
rc=pager_unwritelock(pPager);
pPager->dbSize=-1;
}
returnrc;
}
对数据库加readlock,删除日志文件staticintpager_unwritelock(Pager*pPager){
PgHdr*pPg;
intrc;
assert(!MEMDB);
returnsqlITE_OK;
}
sqlite3pager_stmt_commit(pPager);
if(pPager->stmtOpen){
sqlite3OsClose(&pPager->stfd);
pPager->stmtOpen=0;
}
if(pPager->journalOpen){
关闭日志文件
sqlite3OsClose(&pPager->jfd);
pPager->journalOpen=0;
删除日志文件
sqlite3OsDelete(pPager->zJournal);
sqliteFree(pPager->aInJournal);
pPager->aInJournal=for(pPg=pPager->pAll;pPg;pPg=pPg->pNextAll){
pPg->inJournal=0;
pPg->dirty=0;
#ifdefsqlITE_CHECK_PAGES
pPg->pageHash=pager_pagehash(pPg);
#endif
}
pPager->pDirty=0;
pPager->dirtyCache=0;
pPager->nRec=0;
}else{
assert(pPager->aInJournal==0);
assert(pPager->dirtyCache==0||pPager->useJournal==0);
}
释放写锁,加读锁
rc=sqlite3OsUnlock(pPager->fd,SHARED_LOCK);
pPager->state=PAGER_SHARED;
pPager->origDbSize=0;
pPager->setMaster=0;
pPager->needSync=0;
pPager->pFirstSynced=pPager->pFirst;
returnrc;
}
下图可进一步描述该过程:@H_403_1@
最后来看看sqlite3BtreeSync()和sqlite3BtreeCommit()是如何被调用的。@H_403_1@
一般来说,事务提交方式为自动提交的话,在虚拟机中的OP_Halt指令实现提交事务,相关代码如下:@H_403_1@
虚拟机停机指令caseOP_Halt:{no-push*/
p->pTos=pTos;
p->rc=pOp->p1;
p->pc=pc;
p->errorAction=pOp->p2;
if(pOp->p3){
sqlite3SetString(&p->zErrMsg,pOp->p3,(char*)0);
}
设置虚拟机状态sqlITE_MAGIC_RUN为sqlITE_MAGIC_HALT,
并提交事务
rc=sqlite3VdbeHalt(p);
assert(rc==sqlITE_BUSY||rc==sqlITE_OK);
if(rc==sqlITE_BUSY){
p->rc=sqlITE_BUSY;
returnsqlITE_BUSY;
}
returnp->rc?sqlITE_ERROR:sqlITE_DONE;
}
当虚拟机要停机时,调用该函数,如果VDBE改变了数据库且为自动
提交模式,则提交这些改变intsqlite3VdbeHalt(Vdbe*p){
sqlite3*db=p->db;
inti;
int(*xFunc)(Btree*pBt)=0;FunctiontocalloneachbtreebackendintisSpecialError;SettotrueifsqlITE_NOMEMorIOERR*/
Thisfunctioncontainsthelogicthatdeterminesifastatementor
**transactionwillbecommittedorrolledbackasaresultofthe
**executionofthisvirtualmachine.
**
**Specialerrors:
**
**IfansqlITE_NOMEMerrorhasoccuredinastatementthatwritesto
**thedatabase,theneitherastatementortransactionmustberolled
**backtoensurethetree-structuresareinaconsistentstate.A
**statementtransactionisrolledbackifoneisopen,otherwisethe
**entiretransactionmustberolledback.
**
**IfansqlITE_IOERRerrorhasoccuredinastatementthatwritesto
**thedatabase,thentheentiretransactionmustberolledback.The
**I/Oerrormayhavecausedgarbagetobewrittentothejournal
**file.Werethetransactiontocontinueandeventuallyberolled
**backthatgarbagemightendupinthedatabasefile.
**
**Inbothoftheabovecases,theVdbe.errorActionvariableis
**ignored.Ifthesqlite3.autoCommitflagisfalseandatransaction
**isrolledback,itwillbesettotrue.
**
**Othererrors:
**
**Noerror:
**
if(sqlite3MallocFailed()){
p->rc=sqlITE_NOMEM;
}
if(p->magic!=VDBE_MAGIC_RUN){
Alreadyhalted.Nothingtodo.*/
assert(p->magic==VDBE_MAGIC_HALT);
释放虚拟机中所有的游标
closeAllCursors(p);
checkActiveVdbeCnt(db);
Nocommitorrollbackneedediftheprogramneverstartedif(p->pc>=0){
Checkforoneofthespecialerrors-sqlITE_NOMEMorsqlITE_IOERR*/
isSpecialError=((p->rc==sqlITE_NOMEM||p->rc==sqlITE_IOERR)?1:0);
if(isSpecialError){
Thisloopdoesstaticanalysisofthequerytoseewhichofthe
**followingthreecategoriesitfallsinto:
**
**Read-only
**Querywithstatementjournal
**Querywithoutstatementjournal
**
**Wecoulddosomethingmoreelegantthanthisstaticanalysis(i.e.
**storethetypeofqueryaspartofthecompliationphase),but
**handlingmalloc()orIOfailureisafairlyobscureedgecaseso
**thisisprobablyeasier.Todo:Mightbeanopportunitytoreduce
**codesizeaverysmallamountthough
intisReadOnly=intisStatement=0;
assert(p->aOp||p->nOp==for(i=0;i<p->nOp;i++){
switch(p->aOp[i].opcode){
caseOP_Transaction:
isReadOnly=break;
caseOP_Statement:
isStatement=break;
}
}
Ifthequerywasread-only,weneeddonorollbackatall.Otherwise,
**proceedwiththespecialhandling.
if(!isReadOnly){
if(p->rc==sqlITE_NOMEM&&isStatement){
xFunc=sqlite3BtreeRollbackStmt;
}else{
Weareforcedtorollbacktheactivetransaction.Beforedoing
**so,abortanyotherstatementsthishandlecurrentlyhasactive.
*/
sqlite3AbortOtherActiveVdbes(db,p);
sqlite3RollbackAll(db);
db->autoCommit=1;
}
}
}
Iftheauto-commitflagissetandthisistheonlyactivevdbe,then
**wedoeitheracommitorrollbackofthecurrenttransaction.
**
**Note:Thisblockalsorunsifoneofthespecialerrorshandled
**abovehasoccured.
如果自动提交事务,则提交事务if(db->autoCommit&&db->activeVdbeCnt==1){
if(p->rc==sqlITE_OK||(p->errorAction==OE_Fail&&!isSpecialError)){
Theauto-commitflagistrue,andthevdbeprogramwas
**successfulorhitan'ORFAIL'constraint.Thismeansacommit
**isrequired.
提交事务intrc=vdbeCommit(db);
if(rc==sqlITE_BUSY){
returnsqlITE_BUSY;
}elseif(rc!=sqlITE_OK){
p->rc=rc;
sqlite3RollbackAll(db);
}else{
sqlite3CommitInternalChanges(db);
}
}else{
sqlite3RollbackAll(db);
}
}if(!xFunc){
if(p->rc==sqlITE_OK||p->errorAction==OE_Fail){
xFunc=sqlite3BtreeCommitStmt;
}if(p->errorAction==OE_Abort){
xFunc=sqlite3BtreeRollbackStmt;
}else{
sqlite3AbortOtherActiveVdbes(db,128)">1;
}
}
IfxFuncisnotNULL,thenitisoneofsqlite3BtreeRollbackStmtor
**sqlite3BtreeCommitStmt.Callitonceoneachbackend.Ifanerroroccurs
**andthereturncodeisstillsqlITE_OK,setthereturncodetothenew
**errorvalue.
*/
assert(!xFunc||
xFunc==sqlite3BtreeCommitStmt||
xFunc==sqlite3BtreeRollbackStmt
);
0;xFunc&&i<db->nDb;i++){
intrc;
Btree*pBt=db->aDb[i].pBt;
if(pBt){
rc=xFunc(pBt);
if(rc&&(p->rc==sqlITE_OK||p->rc==sqlITE_CONSTRAINT)){
p->rc=rc;
sqlite3SetString(&p->zErrMsg,128)">0);
}
}
}
IfthiswasanINSERT,UPDATEorDELETEandthestatementwascommitted,
**setthechangecounter.
if(p->changeCntOn&&p->pc>=if(!xFunc||xFunc==sqlite3BtreeCommitStmt){
sqlite3VdbeSetChanges(db,p->nChange);
}else{
sqlite3VdbeSetChanges(db,128)">0);
}
p->nChange=0;
}
Rollbackorcommitanyschemachangesthatoccurred.if(p->rc!=sqlITE_OK&&db->flags&sqlITE_InternChanges){
sqlite3ResetInternalSchema(db,128)">0);
db->flags=(db->flags|sqlITE_InternChanges);
}
}
WehavesuccessfullyhaltedandclosedtheVM.Recordthisfact.0){
db->activeVdbeCnt--;
}
p->magic=VDBE_MAGIC_HALT;
checkActiveVdbeCnt(db);
returnsqlITE_OK;
}
调用:
sqlite3BtreeSync()---同步btree,sqlite3BtreeCommit()---提交事务intvdbeCommit(sqlite3*db){
intnTrans=Numberofdatabaseswithanactivewrite-transactionintrc=sqlITE_OK;
intneedXcommit=0;
0;i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(pBt&&sqlite3BtreeIsInTrans(pBt)){
needXcommit=if(i!=1)nTrans++;
}
}
Ifthereareanywrite-transactionsatall,invokethecommithookif(needXcommit&&db->xCommitCallback){
sqlite3SafetyOff(db);
rc=db->xCommitCallback(db->pCommitArg);
sqlite3SafetyOn(db);
if(rc){
returnsqlITE_CONSTRAINT;
}
}
Thesimplecase-nomorethanonedatabasefile(notcountingthe
**TEMPdatabase)hasatransactionactive.Thereisnoneedforthe
**master-journal.
**
**Ifthereturnvalueofsqlite3BtreeGetFilename()isazerolength
**string,itmeansthemaindatabaseis:memory:.Inthatcasewedo
**notsupportatomicmulti-filecommits,sousethesimplecasethen
**too.
简单的情况,只有一个数据库文件,不需要master-journal0==strlen(sqlite3BtreeGetFilename(db->aDb[0].pBt))||nTrans<=0;rc==sqlITE_OK&&i<db->nDb;i++){
Btree*pBt=db->aDb[i].pBt;
if(pBt){
同步btree
rc=sqlite3BtreeSync(pBt,128)">0);
}
}
Dothecommitonlyifalldatabasessuccessfullysyncedcommite事务if(rc==sqlITE_OK){
if(pBt){
sqlite3BtreeCommit(pBt);
}
}
}
}
Thecomplexcase-Thereisamulti-filewrite-transactionactive.
**Thisrequiresamasterjournalfiletoensurethetransactionis
**committedatomicly.
*/
#ifndefsqlITE_OMIT_DISKIO
intneedSync=char*zMaster=File-nameforthemasterjournalcharconst*zMainFile=sqlite3BtreeGetFilename(db->aDb[0].pBt);
OsFile*master=Selectamasterjournalfilenamedo{
u32random;
sqliteFree(zMaster);
sqlite3Randomness(sizeof(random),&random);
zMaster=sqlite3MPrintf(%s-mj%08X&0x7fffffff);
if(!zMaster){
returnsqlITE_NOMEM;
}
}while(sqlite3OsFileExists(zMaster));
Openthemasterjournal.*/
rc=sqlite3OsOpenExclusive(zMaster,&master,255)">if(rc!=sqlITE_OK){
sqliteFree(zMaster);
Writethenameofeachdatabasefileinthetransactionintothenew
**masterjournalfile.Ifanerroroccursatthispointclose
**anddeletethemasterjournalfile.Alltheindividualjournalfiles
**stillhave'null'asthemasterjournalpointer,sotheywillroll
**backindependentlyifafailureoccurs.
if(i==1)continue;IgnoretheTEMPdatabaseif(pBt&&sqlite3BtreeIsInTrans(pBt)){
const*zFile=sqlite3BtreeGetJournalname(pBt);
if(zFile[0]==0)Ignore:memory:databasesif(!needSync&&!sqlite3BtreeSyncDisabled(pBt)){
needSync=1;
}
rc=sqlite3OsWrite(master,zFile,strlen(zFile)+1);
if(rc!=sqlITE_OK){
sqlite3OsClose(&master);
sqlite3OsDelete(zMaster);
sqliteFree(zMaster);
returnrc;
}
}
}
Syncthemasterjournalfile.Beforedoingthis,openthedirectory
**themasterjournalfileisstoreinsothatitgetssyncedtoo.
*/
zMainFile=sqlite3BtreeGetDirname(db->aDb[0].pBt);
rc=sqlite3OsOpenDirectory(master,zMainFile);
if(rc!=sqlITE_OK||
(needSync&&(rc=sqlite3OsSync(master,128)">0))!=sqlITE_OK)){
sqlite3OsClose(&master);
sqlite3OsDelete(zMaster);
sqliteFree(zMaster);
Syncallthedbfilesinvolvedinthetransaction.Thesamecall
**setsthemasterjournalpointerineachindividualjournal.If
**anerroroccurshere,donotdeletethemasterjournalfile.
**
**Iftheerroroccursduringthefirstcalltosqlite3BtreeSync(),
**thenthereisachancethatthemasterjournalfilewillbe
**orphaned.Butwecannotdeleteit,incasethemasterjournal
**filenamewaswrittenintothejournalfilebeforethefailure
**occured.
if(pBt&&sqlite3BtreeIsInTrans(pBt)){
rc=sqlite3BtreeSync(pBt,zMaster);
if(rc!=sqlITE_OK){
sqlite3OsClose(&master);
sqliteFree(zMaster);
returnrc;
}
}
}
sqlite3OsClose(&master);
Deletethemasterjournalfile.Thiscommitsthetransaction.After
**doingthisthedirectoryissyncedagainbeforeanyindividual
**transactionfilesaredeleted.
*/
rc=sqlite3OsDelete(zMaster);
assert(rc==sqlITE_OK);
sqliteFree(zMaster);
zMaster=0;
rc=sqlite3OsSyncDirectory(zMainFile);
Thisisnotgood.Themasterjournalfilehasbeendeleted,but
**thedirectorysyncFailed.ThereisnocompletelysafecourSEOf
**actionfromhere.Theindividualjournalscontainthenameofthe
**masterjournalfile,butthereisnowayofknowingifthat
**masterjournalexistsnoworifitwillexistaftertheoperating
**systemcrashthatmayfollowthefsync()failure.
Allfilesanddirectorieshavealreadybeensynced,sothefollowing
**callstosqlite3BtreeCommit()areonlyclosingfilesanddeleting
**journals.Ifsomethinggoeswrongwhilethisishappeningwedon't
**reallycare.Theintegrityofthetransactionisalreadyguaranteed,
**butsomestray'cold'journalsmaybelyingaround.Returningan
**errorcodewon'thelpmatters.
if(pBt){
sqlite3BtreeCommit(pBt);
}
}
}
#endif
returnrc; }