PostgreSQL源码简单分析

Postgresql源码简单分析(by linux_prog@loveopensource.com) Postgresql是一个非常强大的开源数据库，既然使开源，当然，我们可以去修改他的代码做任何事情。最近，忙着设计一个分布式数据库系统，所以，理所当然，就想到了在postgresql的基础上直接改。因此，分析其源代码就必不可少了。简单讲一下分析内容。源码目录： $ cd postgresql-8.2.4/src/backend/ $ ls access catalog executorlibpqMakefilenodes parserport postmasterrewritetcop bootstrapcommandslib main nls.mk optimizerpo postgresregex storageutils 其中：main/main.c是程序启动主文件主文件没有作什么重要的事情，主要是作成为daemon等等一些我们并不关心的事情。 tcop/postgres.c是backend执行入口文件。请看第3414行： case /* simple query */ { const char *query_string; /* Set statement_timestamp() */ SetCurrentStatementStartTimestamp(); query_string = pq_getmsgstring(&input_message); //拿到通过libpq传过来的sql语句 pq_getmsgend(&input_message); exec_simple_query(query_string); //执行这个sql，并把结果通过libpq返回 send_ready_for_query = true; } break; 再看看postgres.c的第745行：static voidexec_simple_query(const char *query_string){CommandDest dest = whereToSendOutput;MemoryContext oldcontext;List *parsetree_list;ListCell *parsetree_item;boolsave_log_statement_stats = log_statement_stats;boolwas_logged = false;charmsec_str[32];/** Report query to varIoUs monitoring facilities.*/debug_query_string = query_string;pgstat_report_activity(query_string);/** We use save_log_statement_stats so ShowUsage doesn’t report incorrect* results because ResetUsage wasn’t called.*/if (save_log_statement_stats)ResetUsage();/** Start up a transaction command. All queries generated by the* query_string will be in this same command block,*unless* we find a* BEGIN/COMMIT/ABORT statement; we have to force a new xact command after* one of those,else bad things will happen in xact.c. (Note that this* will normally change current memory context.)*/start_xact_command();/** Zap any pre-existing unnamed statement. (While not strictly necessary,* it seems best to define simple-Query mode as if it used the unnamed* statement and portal; this ensures we recover any storage used by prior* unnamed operations.)*/unnamed_stmt_pstmt = NULL;if (unnamed_stmt_context){DropDependentPortals(unnamed_stmt_context);MemoryContextDelete(unnamed_stmt_context);}unnamed_stmt_context = NULL;/** Switch to appropriate context for constructing parsetrees.*/oldcontext = MemoryContextSwitchTo(MessageContext);QueryContext = CurrentMemoryContext;/** Do basic parsing of the query or queries (this should be safe even if* we are in aborted transaction state!)*/ // 解析这个sql语句到一个语法树结构中parsetree_list = pg_parse_query(query_string); 我想做的事情如下：在postgresql的基础上作一个分布式数据库，但sql parse和backend/frontend的通信都不想自己写，也就是说要使用postgresql的libpq。因此做如下实验：任何sql语句进来后，我会在exec_simple_query里面捷获，如果是一个select语句，我会返回一行记录：列名—name 列值– lijianghua 继续分析文件： src/access/common/printtup.c//以下函数使通过libpq发送返回的列的column 描述信息的voidSendRowDescriptionMessage(TupleDesc typeinfo,List *targetlist,int16 *formats){Form_pg_attribute *attrs = typeinfo->attrs;int natts = typeinfo->natts;int proto = PG_PROTOCOL_MAJOR(FrontendProtocol);int i;StringInfoData buf;ListCell *tlist_item = list_head(targetlist);pq_beginmessage(&buf,‘T’); /* tuple descriptor message type */pq_sendint(&buf,natts,2); /* # of attrs in tuples */for (i = 0; i < natts; ++i){Oid atttypid = attrs->atttypid;int32atttypmod = attrs->atttypmod;pq_sendstring(&buf,NameStr(attrs->attname));/* column ID info appears in protocol 3.0 and up */if (proto >= 3){ /* Do we have a non-resjunk tlist item? */ while (tlist_item && ((TargetEntry *) lfirst(tlist_item))->resjunk) tlist_item = lnext(tlist_item); if (tlist_item) { TargetEntry *tle = (TargetEntry *) lfirst(tlist_item); pq_sendint(&buf,tle->resorigtbl,4); pq_sendint(&buf,tle->resorigcol,2); tlist_item = lnext(tlist_item); } else { /* No info available,so send zeroes */ pq_sendint(&buf,2); }}/* If column is a domain,send the base type and typmod instead */atttypid = getBaseTypeAndTypmod(atttypid,&atttypmod);pq_sendint(&buf,(int) atttypid,sizeof(atttypid));pq_sendint(&buf,attrs->attlen,sizeof(attrs->attlen));/* typmod appears in protocol 2.0 and up */if (proto >= 2) pq_sendint(&buf,atttypmod,sizeof(atttypmod));/* format info appears in protocol 3.0 and up */if (proto >= 3){ if (formats) pq_sendint(&buf,formats,2); else pq_sendint(&buf,2);}}pq_endmessage(&buf);}//下面这个函数是select返回的数据的值，每一行数据都会调用一下这个函数static voidprinttup(TupleTableSlot *slot,DestReceiver *self){TupleDesc typeinfo = slot->tts_tupleDescriptor;DR_printtup *myState = (DR_printtup *) self;StringInfoData buf;int natts = typeinfo->natts;int i;/* Set or update my derived attribute info,if needed */if (myState->attrinfo != typeinfo || myState->nattrs != natts)printtup_prepare_info(myState,typeinfo,natts);/* Make sure the tuple is fully deconstructed */slot_getallattrs(slot);/** Prepare a DataRow message*/pq_beginmessage(&buf,‘D’);pq_sendint(&buf,2);/** send the attributes of this tuple*/for (i = 0; i < natts; ++i){PrinttupAttrInfo *thisState = myState->myinfo + i;Datumorigattr = slot->tts_values,attr;if (slot->tts_isnull){ pq_sendint(&buf,-1,4); continue;}/* * If we have a toasted datum,forcibly detoast it here to avoid * memory leakage inside the type’s output routine. */if (thisState->typisvarlena) attr = PointerGetDatum(PG_DETOAST_DATUM(origattr));else attr = origattr;if (thisState->format == 0){ /* Text output */ char *outputstr; outputstr = OutputFunctionCall(&thisState->finfo,attr); pq_sendcountedtext(&buf,outputstr,strlen(outputstr),false); pfree(outputstr);}else{ /* Binary output */ bytea *outputbytes; outputbytes = SendFunctionCall(&thisState->finfo,attr); pq_sendint(&buf,VARSIZE(outputbytes) - VARHDRSZ,4); pq_sendbytes(&buf,VARDATA(outputbytes),VARSIZE(outputbytes) - VARHDRSZ); pfree(outputbytes);}/* Clean up detoasted copy,if any */if (attr != origattr) pfree(DatumGetPointer(attr));}pq_endmessage(&buf);}根据以上分析，我来修改exec_simple_query: 在833行加入如下内容： //此范例只处理select语句 if(parsetree->type == T_SelectStmt) { StringInfoData buf; pq_beginmessage(&buf,‘T’);/* tuple descriptor message type */ pq_sendint(&buf,1,2); /* number of columns in tuples */ pq_sendstring(&buf,“name”); // column名称 pq_sendint(&buf,4); pq_sendint(&buf,2); pq_sendint(&buf,2,2); pq_endmessage(&buf); pq_beginmessage(&buf,‘D’); pq_sendint(&buf,2); pq_sendcountedtext(&buf,“lijianghua”,10,false); pq_endmessage(&buf); //此行必须加上,告诉libpq返回结果结束(C代表completed) pq_puttextmessage(’C',“select return 1 rows”); return; } 修改结束，按照正常流程编译Postgresql,并启动。测试结果：[mypg@webtrends/copyright for distribution terms /h for help with sql commands /? for help with psql commands /g or terminate with semicolon to execute query /q to quitmypg=# /dList of relations name ————lijianghua(1 row)mypg=# select * from test2; name ————lijianghua(1 row)mypg=# select * from test3; name ————lijianghua(1 row)mypg=# select * from test5; name ————lijianghua(1 row) 可以看到任何select语句都只返回我们预定义的结果，说明我们当初的想法是可行的(/d其实也是一个select语句)。下一目标是基于postgresql自己开发一个分布式数据库，有得忙了。

PostgreSQL源码简单分析

猜你在找的Postgre SQL相关文章