/*--------------------------------------------------------------------
 * FILE:
 *     recovery.c
 *
 * NOTE:
 *     This file is composed of the functions to call with the source
 *     at pgreplicate for the recovery.
 *
 * Portions Copyright (c) 2003, Atsushi Mitani
 *--------------------------------------------------------------------
 */
#ifdef USE_REPLICATION

#include "postgres.h"

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/msg.h>
#include <netdb.h>
#include <netinet/in.h>
#include <errno.h>
#include <fcntl.h>
#include <time.h>
#include <arpa/inet.h>
#include <sys/param.h>
#include <sys/file.h>

#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif

#ifdef HAVE_CRYPT_H
#include <crypt.h>
#endif

#include "miscadmin.h"
#include "nodes/nodes.h"
#include "libpq-fe.h"
#include "libpq/libpq-fs.h"
#include "libpq-int.h"
#include "fe-auth.h"
#include "access/xact.h"
#include "replicate_com.h"
#include "pgreplicate.h"


#ifdef WIN32
#include "win32.h"
#else
#ifdef HAVE_NETINET_TCP_H
#include <netinet/tcp.h>
#endif
#include <arpa/inet.h>
#endif

#ifdef HAVE_CRYPT_H
#include <crypt.h>
#endif

#ifdef MULTIBYTE
#include "mb/pg_wchar.h"
#endif


/*--------------------------------------
 * GLOBAL VARIABLE DECLARATION
 *--------------------------------------
 */
RecoveryPacket MasterPacketData;
RecoveryTbl Master;
RecoveryTbl Target;


/*--------------------------------------
 * PROTOTYPE DECLARATION
 *--------------------------------------
 */
static int read_packet(int sock,RecoveryPacket * packet);
static int read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet );
static int send_recovery_packet(int  sock, RecoveryPacket * packet);
static int send_packet(RecoveryTbl * host, RecoveryPacket * packet );
static void start_recovery_prepare(void);
static void reset_recovery_prepare(void);
static void start_recovery(void);
static void finish_recovery(void);
static bool first_setup_recovery(int * sock, RecoveryPacket * packet);
static bool second_setup_recovery (RecoveryPacket * packet);
static void pgrecovery_loop(int fd);
static void show_recovery_packet(RecoveryPacket * packet);
static int PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target);
static int send_vacuum(HostTbl *host, char * userName, int stage);
static int send_recovery_queue( HostTbl * master_ptr, HostTbl * target_ptr, ReplicateHeader *header, char * query);
static void clear_recovery_queue_file(void);

int PGRsend_load_balance_packet(RecoveryPacket * packet);
void PGRrecovery_main(void);
FILE * PGRget_recovery_queue_file_for_write(void);
FILE * PGRget_recovery_queue_file_for_read(int next);

/*-----------------------------------------------------------
 * SYMBOL
 *    read_packet()
 * NOTES
 *    Read recovery packet data 
 * ARGS
 *    int sock : socket
 *    RecoveryPacket * packet : read packet buffer
 * RETURN
 *    -1 : error
 *    >0 : read size
 *-----------------------------------------------------------
 */
static int
read_packet(int sock,RecoveryPacket * packet)
{
	int r = 0;
	int cnt = 0;
	char * read_ptr = NULL;
	int read_size = 0;
	int packet_size = 0;

	if (packet == NULL)
	{
		return -1;
	}
	read_ptr = (char*)packet;
	packet_size = sizeof(RecoveryPacket);
	for (;;)
	{
		r = recv(sock,read_ptr + read_size ,packet_size - read_size, MSG_WAITALL);
		if (r < 0)
		{
			if (errno == EINTR)
			{
				usleep(PGR_RECV_WAIT_MSEC);
				continue;
			}
#ifdef EAGAIN
			if (errno == EAGAIN)
			{
				usleep(PGR_RECV_WAIT_MSEC);
				continue;
			}
#endif /* EAGAIN */
			if (cnt < PGR_RECV_RETRY_CNT )
			{
				cnt ++;
				usleep(PGR_RECV_WAIT_MSEC);
				continue;
			}
			else
			{
				return -1;
			}
		}
		if (r > 0)
		{
			read_size += r;
			if (read_size == PGR_MESSAGE_BUFSIZE)
			{
				show_recovery_packet(packet);
				return read_size;
			}
		}
		return -1;
	}
}

static int
read_packet_from_master( RecoveryTbl * host, RecoveryPacket * packet )
{
	int read_size = 0;
	int rtn;
	fd_set	  rmask;
	struct timeval timeout;

	timeout.tv_sec = PGR_RECV_TIMEOUT;
	timeout.tv_usec = 0;

	/*
	 * Wait for something to happen.
	 */
	FD_ZERO(&rmask);
	FD_SET(host->recovery_sock,&rmask);
	for(;;)
	{
		rtn = select(host->recovery_sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
		if (rtn && FD_ISSET(host->recovery_sock, &rmask))
		{
			read_size = read_packet(host->recovery_sock, packet);
			return read_size;
		}
	}
}

static int
send_recovery_packet(int  sock, RecoveryPacket * packet)
{
	char * func = "send_recovery_packet()";
	char * send_ptr;
	int send_size= 0;
	int buf_size = 0;
	int s;
	
	errno = 0;
	send_ptr = (char *)packet;
	buf_size = sizeof(RecoveryPacket);
	if (sock < 0)
	{
		return STATUS_ERROR;
	}

	for (;;)
	{
		s = send(sock, send_ptr + send_size,buf_size - send_size ,0);
		if (s < 0){
			if (errno == EINTR)
			{
				continue;
			}
			show_error("%s:send() failed. (%s)",func,strerror(errno));
			return STATUS_ERROR;
		}
		if (s == 0)
		{
			return STATUS_OK;
		}
		send_size += s;
	}
}

static int
send_packet(RecoveryTbl * host, RecoveryPacket * packet )
{
	char * func = "send_packet()";
	int count = 0;

	if (host->recovery_sock == -1)
	{
		while(PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort) != STATUS_OK )
		{
			if (count > MAX_RETRY_TIMES )
			{
				show_error("%s:PGR_Create_Socket_Connect failed",func);
				return STATUS_ERROR;
			}
			count ++;
		}
	}
	count = 0;
	while (send_recovery_packet(host->recovery_sock,packet) != STATUS_OK)
	{
		close(host->recovery_sock);
		host->recovery_sock = -1;
		PGR_Create_Socket_Connect(&(host->recovery_sock), host->hostName , host->recoveryPort);
		if (count > PGR_CONNECT_RETRY_TIME )
		{

			show_error("%s:send failed and PGR_Create_Socket_Connect failed",func);
			return STATUS_ERROR;
		}
		count ++;
	}
	return STATUS_OK;
}

static void
start_recovery_prepare(void)
{
	PGRset_recovery_status (RECOVERY_PREPARE_START);	
}

static void
reset_recovery_prepare(void)
{
	PGRset_recovery_status (RECOVERY_INIT);
}

static void
start_recovery(void)
{
	PGRset_recovery_status (RECOVERY_START);
}

static void
finish_recovery(void)
{
	PGRset_recovery_status (RECOVERY_INIT);
}

int
PGRsend_load_balance_packet(RecoveryPacket * packet)
{
	char * func = "PGRsend_load_balance_packet()";
	RecoveryTbl * lbp;
	int status;

	lbp = LoadBalanceTbl;
	if (lbp == (RecoveryTbl *)NULL)
	{
		show_error("%s:recovery table is NULL",func);
		return STATUS_ERROR;
	}
	while (lbp->hostName[0] != 0)
	{
		status = send_packet(lbp,packet);
		if (status == STATUS_OK)
		{
			if (lbp->sock != -1)
			{
				close(lbp->sock);
				lbp->sock = -1;
			}
		}
		lbp ++;
	}
	return STATUS_OK;
}

static int
send_vacuum(HostTbl *host, char * userName, int stage)
{
	int rtn = STATUS_OK;
	ReplicateHeader header;
	char * query = NULL;

	if (stage == PGR_RECOVERY_1ST_STAGE)
	{
		query = strdup("VACUUM");
	}
	else
	{
		query = strdup("VACUUM FULL");
	}
	memset(&header,0,sizeof(header));
	header.query_size = strlen(query) + 1;
	strncpy(header.dbName,"template1",sizeof(header.dbName));
	strncpy(header.userName,userName,sizeof(header.userName));
	header.cmdSys = CMD_SYS_REPLICATE;
	header.cmdSts = CMD_STS_QUERY;
	header.cmdType = CMD_TYPE_VACUUM;
	header.pid = getpid();
	header.query_id = getpid();
	rtn = PGRsend_replicate_packet_to_server(host,&header,query,PGR_Result, RECOVERY_QUERY_TYPE);
	if (query !=NULL)
		free(query);
	return rtn;	
}

static bool
first_setup_recovery(int * sock, RecoveryPacket * packet)
{
	char * func = "first_setup_recovery()";
	int status;
	HostTbl * master = (HostTbl *)NULL;
	bool loop_end = false;
	HostTbl host_tbl;
	char * userName = NULL;

	strncpy(Target.hostName,packet->hostName,sizeof(Target.hostName));
	Target.port = ntohs(packet->port);
	Target.recoveryPort = ntohs(packet->recoveryPort);
	Target.sock = *sock;
	Target.recovery_sock = *sock;
#ifdef PRINT_DEBUG
	show_debug("%s:1st setup target %s",func,Target.hostName);
	show_debug("%s:1st setup port %d",func,Target.port);
#endif			
	/*
	 * check another recovery process 
	 */
	if (PGRget_recovery_status() != RECOVERY_INIT)
	{
		/*
		 * recovery process is already running
		 */
		show_error("%s:already recovery job runing",func);
		memset(packet,0,sizeof(packet));
		PGRset_recovery_packet_no(packet, RECOVERY_ERROR_OCCUPIED) ;
		status = send_packet(&Target,packet);
		loop_end = true;
		return loop_end;
	}
	/*
	 * add recovery target to host table
	 */
	strncpy(host_tbl.hostName,packet->hostName,sizeof(host_tbl.hostName));
	host_tbl.port = ntohs(packet->port);
	PGRset_recovered_host(&host_tbl,DATA_INIT);
	PGRadd_HostTbl(&host_tbl,DATA_INIT);
	/*
	 * send prepare recovery to load balancer
	 */
	PGRsend_load_balance_packet(packet);
	userName = strdup(packet->userName);

	/*
	 * set RECOVERY_PGDATA_REQ packet data
	 */
	memset(packet,0,sizeof(RecoveryPacket));
	PGRset_recovery_packet_no(packet, RECOVERY_PGDATA_REQ );

retry_connect_master:
	master = PGRget_master();
	if (master == (HostTbl *)NULL)
	{
		/*
		 * connection error , master may be down
		 */
		show_error("%s:get master info error , master may be down",func);
		PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
		status = send_packet(&Target, packet);
		reset_recovery_prepare();
		loop_end = true;
		if (userName != NULL)
		{
			free(userName);
		}
		return loop_end;
	}
	/* send vauum command to master server */
	status = send_vacuum(master, userName, PGR_RECOVERY_1ST_STAGE );
	if (status != STATUS_OK)
	{
		show_error("%s:vacuum error , master may be down",func);
		PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
		status = send_packet(&Target, packet);
		reset_recovery_prepare();
		loop_end = true;
		if (userName != NULL)
		{
			free(userName);
		}
		return loop_end;
	}

	strncpy(Master.hostName,master->hostName,sizeof(Master.hostName));
	Master.sock = -1;
	Master.recovery_sock = -1;
	Master.port = master->port;
	Master.recoveryPort = master->recoveryPort;
	status = send_packet(&Master, packet);
	if (status != STATUS_OK)
	{
		/*
		 * connection error , master may be down
		 */
		show_error("%s:connection error , master may be down",func);
		PGRset_host_status(master,DATA_ERR);
		goto retry_connect_master ;
	}
	
	/*
	 * start prepare of recovery
	 *     set recovery status to "prepare start"
	 *     start transaction count up
	 */
	start_recovery_prepare();
	/*
	 * wait answer from master server 
	 */
	memset(packet,0,sizeof(RecoveryPacket));
	read_packet_from_master(&Master, packet);
	if (ntohs(packet->packet_no) == RECOVERY_PGDATA_ANS)
	{
		/*
		 * send a packet to load balancer that is stopped master's 
		 * load balancing until all recovery process is finished
		 */
		PGRsend_load_balance_packet(packet);
		memcpy((char *)&MasterPacketData,packet,sizeof(RecoveryPacket));

		/*
		 * prepare answer from master DB
		 */
		PGRset_recovery_packet_no(packet, RECOVERY_PREPARE_ANS );
		strncpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
		status = send_packet(&Target, packet);
		if (status != STATUS_OK)
		{
			PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
			status = send_packet(&Master,packet);
			reset_recovery_prepare();
			loop_end = true;
		}
	}
	if (userName != NULL)
		free(userName);
	return loop_end;
}

static bool
second_setup_recovery (RecoveryPacket * packet)
{
	char * func = "second_setup_recovery()";
	HostTbl * master = (HostTbl *)NULL;
	int status;
	bool loop_end = false;
	char * userName = NULL;

	start_recovery();
	/*
	 * wait until all started transactions are going to finish
	 */
	status = PGRwait_transaction_count_clear();
	if (status != STATUS_OK)
	{
		show_error("%s:transaction is too busy, please try again after",func);
		PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
		status = send_packet(&Target,packet);
		status = send_packet(&Master,packet);
		reset_recovery_prepare();
		return true;
	}

	/* send vauum command to master server */
	master = PGRget_master();
	userName = strdup(packet->userName);
	status = send_vacuum(master, userName, PGR_RECOVERY_2ND_STAGE );
	if (status != STATUS_OK)
	{
		show_error("%s:vacuum error , master may be down",func);
		PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
		status = send_packet(&Target,packet);
		status = send_packet(&Master,packet);
		reset_recovery_prepare();
		if (userName != NULL)
			free(userName);
		return true;
	}

	/*
	 * then, send fsync request to master DB
	 */
	PGRset_recovery_packet_no(packet, RECOVERY_FSYNC_REQ );
	status = send_packet(&Master,packet);
	if (status != STATUS_OK)
	{
		/*
		 * connection error , master may be down
		 */
		show_error("%s:connection error , master may be down",func);
		PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
		status = send_packet(&Target,packet);
		status = send_packet(&Master,packet);
		reset_recovery_prepare();
		loop_end = true;
		if (userName != NULL)
			free(userName);
		return true;
	}
	/*
	 * wait answer from master server 
	 */
	memset(packet,0,sizeof(RecoveryPacket));
	read_packet_from_master(&Master,packet);
	if (ntohs(packet->packet_no) == RECOVERY_FSYNC_ANS )
	{
		/*
		 * master DB finished fsync
		 */
		PGRset_recovery_packet_no(packet, RECOVERY_START_ANS );
		strncpy(packet->hostName,Master.hostName,sizeof(packet->hostName));
		status = send_packet(&Target,packet);
		if (status != STATUS_OK)
		{
			finish_recovery();
		}
		loop_end = true;
	}
	else
	{
		show_error("%s:failer answer returned",func);
		PGRset_recovery_packet_no(packet, RECOVERY_ERROR_CONNECTION);
		status = send_packet(&Target,packet);
		status = send_packet(&Master,packet);
		reset_recovery_prepare();
		loop_end = true;
	}
	if (userName != NULL)
		free(userName);
	return loop_end;
}

static int
PGRsend_queue(RecoveryTbl * master, RecoveryTbl * target)
{
	char * func = "PGRsend_queue()";
	HostTbl * master_ptr = NULL;
	HostTbl * target_ptr = NULL;
	ReplicateHeader header;
	char * query = NULL;
	int status = STATUS_OK;
	int query_size = 0;
	int alloc_size = 0;
	int read_lock;

	if (RecoverySemID == 0)
	{
		show_error("%s:initial setup should be failed",func);
		return STATUS_ERROR;
	}
	if (master == (RecoveryTbl *)NULL)
	{
		show_error("%s:there is no master ",func);
		return STATUS_ERROR;
	}
#ifdef PRINT_DEBUG
	show_debug("%s:master %s - %d\n",func,master->hostName,master->port);
#endif			
	master_ptr = PGRget_HostTbl(master->hostName,master->port);
	if (master_ptr == (HostTbl *)NULL)
	{
		show_error("%s:master table is null",func);
		return STATUS_ERROR;
	}
	if (target != (RecoveryTbl *)NULL)
	{
#ifdef PRINT_DEBUG
		show_debug("%s:target %s - %d",func,target->hostName,target->port);
#endif			
		target_ptr = PGRget_HostTbl(target->hostName,target->port);
		if (target_ptr == (HostTbl *)NULL)
		{
			show_error("%s:target table is null",func);
			return STATUS_ERROR;
		}
	}
	RecoveryQueue.queue_fp = PGRget_recovery_queue_file_for_read(0);
	if (RecoveryQueue.queue_fp == NULL)
	{
		return STATUS_OK;
	}
	clearerr(RecoveryQueue.queue_fp);
	fseek(RecoveryQueue.queue_fp,0,SEEK_SET);
	status = STATUS_OK;
	read_lock = 0;
	while (RecoveryQueue.queue_fp != NULL)
	{
		if(feof(RecoveryQueue.queue_fp) != 0)
		{
			RecoveryQueue.queue_fp = PGRget_recovery_queue_file_for_read(1);
			if (RecoveryQueue.queue_fp == NULL)
			{
				clear_recovery_queue_file();
				break;
			}
			else
			{
				clearerr(RecoveryQueue.queue_fp);
				fseek(RecoveryQueue.queue_fp,0,SEEK_SET);
			}
		}
		if (!read_lock)
		{
			read_lock = 1;
			PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
		}
		if (fread(&header,sizeof(ReplicateHeader),1,RecoveryQueue.queue_fp) >0)
		{
			query_size = ntohl(header.query_size);
			if (query_size < 0)
			{
				continue;
			}
			else if (query_size > 0)
			{
				if (alloc_size < query_size +4)
				{
					if (alloc_size == 0)
					{
						query = (char *)malloc(query_size+4);
					}
					else
					{
						query = (char*)realloc(query,query_size+4);
					}
					if (query == (char*)NULL)
					{
						show_error("%s:malloc or realloc faild: (%s)",func,strerror(errno));
						PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
						return STATUS_ERROR;
					}
				}
				memset(query,0,query_size+4);
				if (fread(query,query_size,1, RecoveryQueue.queue_fp) >0)
				{
#ifdef PRINT_DEBUG
	show_debug("%s: send_recovery_queue[%s]",func,query);
#endif			
					read_lock = 0;
					status = send_recovery_queue(master_ptr,target_ptr, &header, query);
					PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
				}
			}
		}
		if (read_lock)
		{
			read_lock = 0;
			PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
		}
		if (ferror(RecoveryQueue.queue_fp) != 0)
		{
			status = STATUS_ERROR;
			break;
		}
	}
	if (read_lock)
	{
		read_lock = 0;
		PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
	}
	if (query != NULL)
	{
		free(query);
	}
	return status;
}

static int
send_recovery_queue( HostTbl * master_ptr, HostTbl * target_ptr, ReplicateHeader *header, char * query)
{
	char * func = "send_recovery_queue()";

	if (PGRsend_replicate_packet_to_server(master_ptr,header,query,PGR_Result, RECOVERY_QUERY_TYPE) != STATUS_OK)
	{
		show_error("%s:PGRsend_replicate_packet_to_server to master error",func);
		return STATUS_ERROR;
	}
	if (target_ptr != NULL)
	{
		if (PGRsend_replicate_packet_to_server(target_ptr,header,query,PGR_Result, RECOVERY_QUERY_TYPE) != STATUS_OK)
		{
			show_error("%s:PGRsend_replicate_packet_to_server to target error",func);
			return STATUS_ERROR;
		}
	}
	return STATUS_OK;
}

static void
pgrecovery_loop(int fd)
{
	char * func = "pgrecovery_loop()";
	int count = 0;
	int sock = -1;
	int status = STATUS_OK;
	bool loop_end = false;
	RecoveryPacket packet;
	HostTbl new_host;

	count = 0;
	while ((status = PGR_Create_Acception(fd,&sock,"",Recovery_Port_Number)) != STATUS_OK)
	{
		show_error("%s:PGR_Create_Acception failed",func);
		close(sock);
		sock = -1;
		if ( count > PGR_CONNECT_RETRY_TIME)
		{
			return;
		}
		count ++;
	}
	for(;;)
	{
		int read_size = 0;
		int rtn = 0;
		fd_set	  rmask;
		struct timeval timeout;

		timeout.tv_sec = RECOVERY_TIMEOUT;
		timeout.tv_usec = 0;

		/*
		 * Wait for something to happen.
		 */
		FD_ZERO(&rmask);
		FD_SET(sock,&rmask);
		/*
		 * read packet from target cluster server
		 */
		rtn = select(sock+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
		if (rtn && FD_ISSET(sock, &rmask))
		{
			read_size = read_packet(sock, &packet);
		}
		else
		{
			continue;
		}

#ifdef PRINT_DEBUG
		show_debug("%s:[%d]receive packet no:%d",func,sock,ntohs(packet.packet_no));
#endif			

		switch (ntohs(packet.packet_no))
		{
			case RECOVERY_PREPARE_REQ :
				/*
				 * start prepare of recovery
				 */
				loop_end = first_setup_recovery(&sock, &packet);
#ifdef PRINT_DEBUG
				show_debug("%s:1st master %s - %d",
					func,Master.hostName,Master.port);
				show_debug("%s:1st target %s - %d",
					func,Target.hostName,Target.port);
				show_debug("%s:first_setup_recovery end :%d ",
					func,loop_end);
#endif			
				break;
			case RECOVERY_START_REQ : 
				/*
				 * now, recovery process will start
				 *    stop the transaction count up
				 *    start queueing and stop send all queries for master DB
				 */
				loop_end = second_setup_recovery (&packet);
#ifdef PRINT_DEBUG
				show_debug("%s:2nd master %s - %d",
					func,Master.hostName,Master.port);
				show_debug("%s:2nd target %s - %d",
					func,Target.hostName,Target.port);
				show_debug("%s:second_setup_recovery end :%d ",
					func,loop_end);
#endif			
				break;
			case RECOVERY_FINISH : 
				/*
				 * finished rsync DB datas from master to target 
				 */
				/*
				 * send all queries in queue
				 */
#ifdef PRINT_DEBUG
				show_debug("%s:last master %s - %d",
					func,Master.hostName,Master.port);
				show_debug("%s:last target %s - %d",
					func,Target.hostName,Target.port);
#endif			
				status = PGRsend_queue(&Master,&Target);
				if (status == STATUS_OK)
				{
					strncpy(new_host.hostName,Target.hostName,sizeof(new_host.hostName));
					new_host.port = Target.port;
					new_host.recoveryPort = Target.recoveryPort;
					PGRset_recovered_host(&new_host,DATA_USE);
					PGRadd_HostTbl(&new_host,DATA_USE);
				}
				else
				{
					/* connection error , master or target may be down */
					show_error("%s:PGRsend_queue failed",func);
					PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_CONNECTION);
					status = send_packet(&Target,&packet);
				}
				/*
				 * stop queueing, and initiarise recovery status
				 */
				finish_recovery();
				/*
				 * send finish recovery to load balancer
				 */
				send_packet(&Master, &packet);
				MasterPacketData.packet_no = packet.packet_no;
				PGRsend_load_balance_packet(&MasterPacketData);
				PGRsend_load_balance_packet(&packet);
				memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
				loop_end = true;
				break;
			case RECOVERY_ERROR_ANS : 
#ifdef PRINT_DEBUG
				show_debug("%s:recovery error accept. top queueing and initiarse recovery status",func);
#endif			
				status = PGRsend_queue(&Master,NULL);
				memset(&packet,0,sizeof(RecoveryPacket));
				PGRset_recovery_packet_no(&packet, RECOVERY_ERROR_ANS);
				send_packet(&Master, &packet);
				finish_recovery();
				PGRset_recovery_packet_no(&MasterPacketData, RECOVERY_FINISH );
				PGRsend_load_balance_packet(&MasterPacketData);
				memset((char *)&MasterPacketData,0,sizeof(RecoveryPacket));
				loop_end = true;
				break;
		}
		if (loop_end)
		{
			if (Master.sock != -1)
			{
				close (Master.sock);
				Master.sock = -1;
			}
			if (Master.recovery_sock != -1)
			{
				close (Master.recovery_sock);
				Master.recovery_sock = -1;
			}
			close(sock);
			sock = -1;
			return;
		}
	}
}

void
PGRrecovery_main(void)
{
	char * func = "PGRrecovery_main()";
	int status;
	int fd = -1;
	int rtn;

	status = PGR_Create_Socket_Bind(&fd, "", Recovery_Port_Number);
	if (status != STATUS_OK)
	{
		show_error("%s:PGR_Create_Socket_Bind failed",func);
		exit(1);
	}
	memset(&MasterPacketData,0,sizeof(RecoveryPacket));
	memset(&Master,0,sizeof(RecoveryTbl));
	memset(&Target,0,sizeof(RecoveryTbl));
	for (;;)
	{
		fd_set	  rmask;
		struct timeval timeout;

		Idle_Flag = IDLE_MODE;
		if (Exit_Request)
		{
			exit(0);
		}

		timeout.tv_sec = RECOVERY_TIMEOUT;
		timeout.tv_usec = 0;

		/*
		 * Wait for something to happen.
		 */
		FD_ZERO(&rmask);
		FD_SET(fd,&rmask);
		rtn = select(fd+1, &rmask, (fd_set *)NULL, (fd_set *)NULL, &timeout);
		if (rtn && FD_ISSET(fd, &rmask))
		{
			Idle_Flag = BUSY_MODE;
			pgrecovery_loop(fd);
		}
	}
}

static void
show_recovery_packet(RecoveryPacket * packet)
{
	char * func = "show_recovery_packet()";

#ifdef PRINT_DEBUG
	show_debug("%s:no = %d",func,ntohs(packet->packet_no));
	show_debug("%s:max_connect = %d",func,ntohs(packet->max_connect));
	show_debug("%s:port = %d",func,ntohs(packet->port));
	show_debug("%s:recoveryPort = %d",func,ntohs(packet->recoveryPort));
	if (packet->hostName != NULL)
		show_debug("%s:hostName = %s",func,packet->hostName);
	if (packet->pg_data != NULL)
		show_debug("%s:pg_data = %s",func,packet->pg_data);
#endif			
}

FILE *
PGRget_recovery_queue_file_for_write(void)
{
	char * func = "PGRget_recovery_queue_file_for_write()";
	int write_queue_no = 0;
	int read_queue_no = 0;
	char fname[256];

	PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
	/*
	 * open recovery queue file
	 */
	write_queue_no = Recovery_Status_Inf->write_queue_no;
	read_queue_no = Recovery_Status_Inf->read_queue_no;
	if (write_queue_no == read_queue_no)
	{
		if (RecoveryQueue.queue_fp != (FILE *)NULL)
		{
			fflush(RecoveryQueue.queue_fp);
			fclose(RecoveryQueue.queue_fp);
			RecoveryQueue.queue_fp = (FILE *)NULL;
		}
		write_queue_no ++;
		Recovery_Status_Inf->write_queue_no = write_queue_no;
	}
	if (write_queue_no != RecoveryQueue.current_queue_no)
	{
		if (RecoveryQueue.queue_fp != (FILE *)NULL)
		{
			fflush(RecoveryQueue.queue_fp);
			fclose(RecoveryQueue.queue_fp);
			RecoveryQueue.queue_fp = (FILE *)NULL;
		}
		RecoveryQueue.current_queue_no = write_queue_no;
	}
	if (RecoveryQueue.queue_fp == (FILE *)NULL)
	{
		snprintf(fname, sizeof(fname), "%s/%s%d", PGR_Write_Path, RECOVERY_QUEUE_FILE,write_queue_no);
		RecoveryQueue.queue_fp = fopen(fname, "w+");
		if (!RecoveryQueue.queue_fp)
		{
			show_error("%s:could not open recovery queue file as %s. reason: %s",
					   func,fname, strerror(errno));
			PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
			return (FILE*)NULL;
		}
	}
	PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);

	return RecoveryQueue.queue_fp;
}

FILE *
PGRget_recovery_queue_file_for_read(int next)
{
	char * func = "PGRget_recovery_queue_file_for_read()";
	int write_queue_no = 0;
	int read_queue_no = 0;
	char fname[256];

	PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
	/*
	 * open recovery queue file
	 */
	read_queue_no = Recovery_Status_Inf->read_queue_no;
#ifdef PRINT_DEBUG
	show_debug("%s: read_queue_no[%d]",func,read_queue_no);
#endif			

	if (read_queue_no == 0)
	{
		read_queue_no ++;
		Recovery_Status_Inf->read_queue_no = read_queue_no;
	}
	if (next)
	{
		if (read_queue_no < write_queue_no)
		{
			if (RecoveryQueue.queue_fp != NULL)
			{
				fflush(RecoveryQueue.queue_fp);
				fclose(RecoveryQueue.queue_fp);
				snprintf(fname, sizeof(fname), "%s/%s%d", PGR_Write_Path, RECOVERY_QUEUE_FILE,read_queue_no);
				unlink(fname);
				RecoveryQueue.queue_fp = NULL;
			}
			read_queue_no ++;
			Recovery_Status_Inf->read_queue_no = read_queue_no;
		}
	}

	if (RecoveryQueue.queue_fp == NULL)
	{
		snprintf(fname, sizeof(fname), "%s/%s%d", PGR_Write_Path, RECOVERY_QUEUE_FILE,read_queue_no);
#ifdef PRINT_DEBUG
		show_debug("%s: fopen[%s]",func,fname);
#endif			
		RecoveryQueue.queue_fp = fopen(fname, "r");
		if (!RecoveryQueue.queue_fp)
		{
			show_error("%s:could not open recovery queue file as %s. reason: %s",
					   func,fname, strerror(errno));
			PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
			return (FILE*)NULL;
		}
	}
	PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);

	return RecoveryQueue.queue_fp;
}

static void
clear_recovery_queue_file(void)
{
	char * func ="clear_recovery_queue_file()";
	int write_queue_no = 0;
	int read_queue_no = 0;
	char fname[256];

	PGRsem_lock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
	write_queue_no = Recovery_Status_Inf->write_queue_no;
	read_queue_no = Recovery_Status_Inf->read_queue_no;
	if (write_queue_no != read_queue_no)
	{
		return;
	}
	if (RecoveryQueue.queue_fp != NULL)
	{
		fflush(RecoveryQueue.queue_fp);
		fclose(RecoveryQueue.queue_fp);
		RecoveryQueue.queue_fp = (FILE *)NULL;
	}
	RecoveryQueue.current_queue_no = 0;
	snprintf(fname, sizeof(fname), "%s/%s%d", PGR_Write_Path, RECOVERY_QUEUE_FILE,read_queue_no);
	unlink(fname);
	PGRsem_unlock(RecoverySemID, SEM_NUM_OF_RECOVERY_QUEUE);
}

#endif /* USE_REPLICATION */
