Crossfire Server, Branch 1.12
R12190
|
00001 /* This is a watchdog pgrogram from Christian Stieber. It is not an 00002 * officially supported piece of crossfire (I hope it works, but I am not 00003 * going to be spending time debugging problems in this piece of code). The 00004 * idea is that it periodically sends/gets udp messages to the server - if 00005 * the server isn't responding, it kills it off and starts a new one. There 00006 * is a bit more logic to it - From Christian: 00007 * 00008 * The wrapper is just a hack. I'm using it on a Solaris machine, and it 00009 * seems to work fine. Notable features: 00010 * - uses the watchdog interface 00011 * - if the server crashes more than 10 times, with the time between 00012 * successive crashes being less than 30 seconds, the wrapper terminates 00013 * itself (to prevent bringing down the machine in case of startup problems) 00014 * - the server runs at nice 10 00015 * 00016 * Note that the main advantage the watchdog has over just the simple 00017 * crossloop scripts is in the case of infinite loops. For simple crashes, 00018 * the crossloop programs do a fine job. 00019 */ 00020 00021 /* 00022 * Version 1 00023 */ 00024 00025 /************************************************************************/ 00026 /* 00027 * Configuration options 00028 */ 00029 00030 /* server executable */ 00031 #define CROSSFIRE_SERVER "/usr/stud/stieber/bin/server" 00032 00033 /* directory to cd to before starting the server */ 00034 #define CROSSFIRE_TMPDIR "/usr/stud/stieber/crossfire/tmp/" 00035 00036 /* if the server crashes more than CRASH_COUNT times, with less than 00037 * CRASH_INTERVAL seconds between two successive crashes, the loop 00038 * program is terminated. */ 00039 #define CRASH_COUNT 10 00040 #define CRASH_INTERVAL 30 00041 00042 #define USE_WATCHDOG 00043 #define ERROR_SLEEP 30 00044 00045 /************************************************************************/ 00046 00047 #include <sys/unistd.h> 00048 #include <sys/wait.h> 00049 #include <stdlib.h> 00050 #include <signal.h> 00051 #include <stdio.h> 00052 #include <unistd.h> 00053 #include <sys/types.h> 00054 #include <sys/socket.h> 00055 #include <netinet/in.h> 00056 #include <netdb.h> 00057 #include <errno.h> 00058 00059 /************************************************************************/ 00060 00061 #ifdef USE_WATCHDOG 00062 int Pipe[2]; 00063 #endif 00064 00065 /************************************************************************/ 00066 00067 void SignalHandler(int Unused) { 00068 if (write(Pipe[1], "", 1) != 1) { 00069 perror("Pipe"); 00070 exit(EXIT_FAILURE); 00071 } 00072 } 00073 00074 /************************************************************************/ 00075 00076 int main(void) { 00077 int CrashCount; 00078 #ifdef USE_WATCHDOG 00079 struct protoent *protoent; 00080 struct sockaddr_in insock; 00081 int fd; 00082 00083 memset(&insock, 0, sizeof(insock)); 00084 00085 if ((protoent = getprotobyname("udp")) == NULL) { 00086 perror("Can't get protobyname"); 00087 return EXIT_FAILURE; 00088 } 00089 if ((fd = socket(PF_INET, SOCK_DGRAM, protoent->p_proto)) == -1) { 00090 perror("Can't create socket"); 00091 return EXIT_FAILURE; 00092 } 00093 insock.sin_family = AF_INET; 00094 insock.sin_port = htons((unsigned short)13325); 00095 if (bind(fd, (struct sockaddr *)&insock, sizeof(insock)) == -1) { 00096 perror("Error on bind"); 00097 return EXIT_FAILURE; 00098 } 00099 #endif 00100 00101 CrashCount = 0; 00102 nice(10-nice(0)); 00103 while (CrashCount < CRASH_COUNT) { 00104 time_t StartTime; 00105 time_t EndTime; 00106 pid_t Server; 00107 00108 chdir(CROSSFIRE_TMPDIR); 00109 time(&StartTime); 00110 #ifdef USE_WATCHDOG 00111 if (pipe(Pipe) == 0) { 00112 void (*OldHandler)(int); 00113 00114 OldHandler = signal(SIGCHLD, SignalHandler); 00115 #endif 00116 switch (Server = fork()) { 00117 case 0: 00118 execl(CROSSFIRE_SERVER, CROSSFIRE_SERVER, "-server", NULL); 00119 return EXIT_FAILURE; 00120 00121 case -1: 00122 sleep(ERROR_SLEEP); 00123 break; 00124 00125 default: 00126 #ifdef USE_WATCHDOG 00127 while (1) { 00128 fd_set Files; 00129 struct timeval Timeout; 00130 int Max; 00131 00132 FD_ZERO(&Files); 00133 FD_SET(Pipe[0], &Files); 00134 FD_SET(fd, &Files); 00135 Timeout.tv_sec = 5*60; 00136 Timeout.tv_usec = 0; 00137 if (fd > Pipe[0]) { 00138 Max = fd+1; 00139 } else { 00140 Max = Pipe[0]+1; 00141 } 00142 while (select(Max, &Files, NULL, NULL, &Timeout) == -1) { 00143 if (errno != EINTR) { 00144 perror("Error on select"); 00145 return EXIT_FAILURE; 00146 } 00147 } 00148 if (FD_ISSET(Pipe[0], &Files)) { 00149 /* crash */ 00150 unlink("core"); 00151 waitpid(Server, NULL, 0); 00152 printf("Server crash!\n"); 00153 break; 00154 } else if (FD_ISSET(fd, &Files)) { 00155 /* watchdog */ 00156 char t; 00157 00158 recv(fd, &t, 1, 0); 00159 } else { 00160 /* timeout */ 00161 printf("Watchdog timeout!\n"); 00162 if (kill(Server, SIGKILL) != 0) { 00163 perror("Error on kill"); 00164 return EXIT_FAILURE; 00165 } 00166 } 00167 } 00168 #else 00169 waitpid(Server, NULL, 0); 00170 #endif 00171 #ifdef USE_WATCHDOG 00172 signal(SIGCHLD, OldHandler); 00173 #endif 00174 time(&EndTime); 00175 if (EndTime-StartTime < CRASH_INTERVAL) { 00176 CrashCount++; 00177 } else { 00178 CrashCount = 0; 00179 } 00180 break; 00181 } 00182 #ifdef USE_WATCHDOG 00183 close(Pipe[0]); 00184 close(Pipe[1]); 00185 } else { 00186 sleep(ERROR_SLEEP); 00187 } 00188 #endif 00189 } 00190 #ifdef USE_WATCHDOG 00191 close(fd); 00192 #endif 00193 return 0; 00194 }