pgpool就是一个架在数据库与应用系统之间的中间层,用于实现cluster或分布式数据库,实现数据库的大规模集成应用,类以于oracle的Tuxedo;不过这个是开源的,功能上也有一些限制,具体应用去它的官网上去看,下面主要分析它的实现原理

对于replication,它和mysql一样是通过传递SQL实现的,对于分布式存储,它是把SQL经过parse,rewrite之后,生成经过优化后的SQL分别在结点上执行,再把结果汇总。

对postgresql的通讯是通过libpq和socket实现的,对于自身多结点的通讯是通过socket实现的,对于一台机器的多个进程间是通过pipe和unix domain socket通信的

pgpool也和postgresql一样是多进程的,主要分为main进程,child进程,pcp进程,main进程主要是health check和消息中转,child进程负责与postgresql通讯,pcp进程负责与其它的pgpool进程通讯和用户名密码的检查,pcp之间的通讯全是用一个字母代表语义,这个通讯协议还需要看看是不是采用的是postgresql中的通讯协议,在pcp目录下面,还保存了一些用户管理pgpool的工具

pgpool的数据库结点分为两类,第一是node db就是普通的数据库结点,第二是system db这个在分布式存储中主要存储分布的规则,在query cache中用来保存cache,这在一个pgpool的缓冲池中只有一个

 

在结构体中,第一个最重要的是

typedef struct {
char *listen_addresses; /* hostnames/IP addresses to listen on */
    int port; /* port # to bind */
int pcp_port;     /* PCP port # to bind */
char *socket_dir;   /* pgpool socket directory */
char *pcp_socket_dir;   /* PCP socket directory */
int pcp_timeout;    /* PCP timeout for an idle client */
    int num_init_children; /* # of children initially pre-forked */
    int child_life_time; /* if idle for this seconds, child exits */
    int connection_life_time; /* if idle for this seconds, connection closes */
    int child_max_connections; /* if max_connections received, child exits */
int client_idle_limit;   /* If client_idle_limit is n (n > 0), the client is forced to be
           disconnected after n seconds idle */
int authentication_timeout; /* maximum time in seconds to complete client authentication */
    int max_pool; /* max # of connection pool per child */
    char *logdir;   /* logging directory */
    char *pid_file_name;   /* pid file name */
    char *backend_socket_dir; /* Unix domain socket directory for the PostgreSQL server */
int replication_mode;   /* replication mode */

int log_connections;   /* 0:false, 1:true - logs incoming connections */
int log_hostname;   /* 0:false, 1:true - resolve hostname */
int enable_pool_hba;   /* 0:false, 1:true - enables pool_hba.conf file authentication */

int load_balance_mode;   /* load balance mode */

int replication_stop_on_mismatch;   /* if there's a data mismatch between master and secondary
            * start degenration to stop replication mode
            */
int replicate_select; /* if non 0, replicate SELECT statement when load balancing is disabled. */
char **reset_query_list;   /* comma separated list of quries to be issued at the end of session */

int print_timestamp;   /* if non 0, print time stamp to each log line */
int master_slave_mode;   /* if non 0, operate in master/slave mode */
int connection_cache;   /* if non 0, cache connection pool */
int health_check_timeout; /* health check timeout */
int health_check_period; /* health check period */
char *health_check_user;   /* PostgreSQL user name for health check */
char *failover_command;     /* execute command when failover happens */
char *failback_command;     /* execute command when failback happens */

/*
* If true, trigger fail over when writing to the backend
* communication socket fails. This is the same behavior of
* pgpool-II 2.2.x or earlier. If set to false, pgpool will report
* an error and disconnect the session.
*/
int fail_over_on_backend_error;

char *recovery_user;   /* PostgreSQL user name for online recovery */
char *recovery_password;   /* PostgreSQL user password for online recovery */
char *recovery_1st_stage_command;   /* Online recovery command in 1st stage */
char *recovery_2nd_stage_command;   /* Online recovery command in 2nd stage */
int recovery_timeout;     /* maximum time in seconds to wait for remote start-up */
int client_idle_limit_in_recovery;   /* If > 0, the client is forced to be
            * disconnected after n seconds idle
            * This parameter is only valid while in recovery 2nd statge */
int insert_lock; /* if non 0, automatically lock table with INSERT to keep SERIAL
         data consistency */
int ignore_leading_white_space;   /* ignore leading white spaces of each query */
   int log_statement; /* 0:false, 1: true - logs all SQL statements */
   int log_per_node_statement; /* 0:false, 1: true - logs per node detailed SQL statements */

int parallel_mode; /* if non 0, run in parallel query mode */

int enable_query_cache;   /* if non 0, use query cache. 0 by default */

char *pgpool2_hostname;   /* pgpool2 hostname */
char *system_db_hostname; /* system DB hostname */
int system_db_port;    /* system DB port number */
char *system_db_dbname;   /* system DB name */
char *system_db_schema;   /* system DB schema name */
char *system_db_user;   /* user name to access system DB */
char *system_db_password; /* password to access system DB */

char *lobj_lock_table;   /* table name to lock for rewriting lo_creat */

BackendDesc *backend_desc; /* PostgreSQL Server description. Placed on shared memory */

LOAD_BALANCE_STATUS load_balance_status[MAX_NUM_BACKENDS]; /* to remember which DB node is selected for load balancing */

/* followings do not exist in the configuration file */
    int current_slot; /* current backend slot # */
int replication_enabled;   /* replication mode enabled */
int master_slave_enabled;   /* master/slave mode enabled */
int num_reset_queries;   /* number of queries in reset_query_list */

/* ssl configuration */
int ssl; /* if non 0, activate ssl support (frontend+backend) */
char *ssl_cert; /* path to ssl certificate (frontend only) */
char *ssl_key; /* path to ssl key (frontend only) */
char *ssl_ca_cert; /* path to root (CA) certificate */
char *ssl_ca_cert_dir; /* path to directory containing CA certificates */
} POOL_CONFIG;

POOL_CONFIG里面保存了pgpool的配置信息,是从配置文件中读入的,这里面最重要的是BackendDesc *backend_desc,它里面保存了pgpool和数据库的连接信息,是放在共享内存中,供所有pgpool的child进程共享的。其余的字段看注释就可以了

每一个BackendInfo对应一个数据库连接,其实就是postgresql的一个进程

/*
* PostgreSQL backend descriptor. Placed on shared memory area.
*/
typedef struct {
char backend_hostname[MAX_DB_HOST_NAMELEN]; /* backend host name */
int backend_port; /* backend port numbers */
BACKEND_STATUS backend_status; /* backend status */
double backend_weight; /* normalized backend load balance ratio */
double unnormalized_weight; /* descripted parameter */
char backend_data_directory[MAX_PATH_LENGTH];
} BackendInfo;

typedef struct {
int num_backends;   /* number of used PostgreSQL backends */
BackendInfo backend_info[MAX_NUM_BACKENDS];
} BackendDesc;

Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐