Crossfire Server, Branch 1.12  R12190
crossfire-loop.c
Go to the documentation of this file.
00001 /* This is a watchdog pgrogram from Christian Stieber.  It is not an
00002  * officially supported piece of crossfire (I hope it works, but I am not
00003  * going to be spending time debugging problems in this piece of code).  The
00004  * idea is that it periodically sends/gets udp messages to the server - if
00005  * the server isn't responding, it kills it off and starts a new one.  There
00006  * is a bit more logic to it - From Christian:
00007  *
00008  * The wrapper is just a hack. I'm using it on a Solaris machine, and it
00009  * seems to work fine. Notable features:
00010  * - uses the watchdog interface
00011  * - if the server crashes more than 10 times, with the time between
00012  *   successive crashes being less than 30 seconds, the wrapper terminates
00013  *   itself (to prevent bringing down the machine in case of startup problems)
00014  * - the server runs at nice 10
00015  *
00016  * Note that the main advantage the watchdog has over just the simple
00017  * crossloop scripts is in the case of infinite loops.  For simple crashes,
00018  * the crossloop programs do a fine job.
00019  */
00020 
00021 /*
00022  * Version 1
00023  */
00024 
00025 /************************************************************************/
00026 /*
00027  * Configuration options
00028  */
00029 
00030 /* server executable */
00031 #define CROSSFIRE_SERVER        "/usr/stud/stieber/bin/server"
00032 
00033 /* directory to cd to before starting the server */
00034 #define CROSSFIRE_TMPDIR        "/usr/stud/stieber/crossfire/tmp/"
00035 
00036 /* if the server crashes more than CRASH_COUNT times, with less than
00037  * CRASH_INTERVAL seconds between two successive crashes, the loop
00038  * program is terminated. */
00039 #define CRASH_COUNT             10
00040 #define CRASH_INTERVAL          30
00041 
00042 #define USE_WATCHDOG
00043 #define ERROR_SLEEP             30
00044 
00045 /************************************************************************/
00046 
00047 #include <sys/unistd.h>
00048 #include <sys/wait.h>
00049 #include <stdlib.h>
00050 #include <signal.h>
00051 #include <stdio.h>
00052 #include <unistd.h>
00053 #include <sys/types.h>
00054 #include <sys/socket.h>
00055 #include <netinet/in.h>
00056 #include <netdb.h>
00057 #include <errno.h>
00058 
00059 /************************************************************************/
00060 
00061 #ifdef USE_WATCHDOG
00062 int Pipe[2];
00063 #endif
00064 
00065 /************************************************************************/
00066 
00067 void SignalHandler(int Unused) {
00068     if (write(Pipe[1], "", 1) != 1) {
00069         perror("Pipe");
00070         exit(EXIT_FAILURE);
00071     }
00072 }
00073 
00074 /************************************************************************/
00075 
00076 int main(void) {
00077     int CrashCount;
00078 #ifdef USE_WATCHDOG
00079     struct protoent *protoent;
00080     struct sockaddr_in insock;
00081     int fd;
00082 
00083     memset(&insock, 0, sizeof(insock));
00084 
00085     if ((protoent = getprotobyname("udp")) == NULL) {
00086         perror("Can't get protobyname");
00087         return EXIT_FAILURE;
00088     }
00089     if ((fd = socket(PF_INET, SOCK_DGRAM, protoent->p_proto)) == -1) {
00090         perror("Can't create socket");
00091         return EXIT_FAILURE;
00092     }
00093     insock.sin_family = AF_INET;
00094     insock.sin_port = htons((unsigned short)13325);
00095     if (bind(fd, (struct sockaddr *)&insock, sizeof(insock)) == -1) {
00096         perror("Error on bind");
00097         return EXIT_FAILURE;
00098     }
00099 #endif
00100 
00101     CrashCount = 0;
00102     nice(10-nice(0));
00103     while (CrashCount < CRASH_COUNT) {
00104         time_t StartTime;
00105         time_t EndTime;
00106         pid_t Server;
00107 
00108         chdir(CROSSFIRE_TMPDIR);
00109         time(&StartTime);
00110 #ifdef USE_WATCHDOG
00111         if (pipe(Pipe) == 0) {
00112             void (*OldHandler)(int);
00113 
00114             OldHandler = signal(SIGCHLD, SignalHandler);
00115 #endif
00116             switch (Server = fork()) {
00117             case 0:
00118                 execl(CROSSFIRE_SERVER, CROSSFIRE_SERVER, "-server", NULL);
00119                 return EXIT_FAILURE;
00120 
00121             case -1:
00122                 sleep(ERROR_SLEEP);
00123                 break;
00124 
00125             default:
00126 #ifdef USE_WATCHDOG
00127                 while (1) {
00128                     fd_set Files;
00129                     struct timeval Timeout;
00130                     int Max;
00131 
00132                     FD_ZERO(&Files);
00133                     FD_SET(Pipe[0], &Files);
00134                     FD_SET(fd, &Files);
00135                     Timeout.tv_sec = 5*60;
00136                     Timeout.tv_usec = 0;
00137                     if (fd > Pipe[0]) {
00138                         Max = fd+1;
00139                     } else {
00140                         Max = Pipe[0]+1;
00141                     }
00142                     while (select(Max, &Files, NULL, NULL, &Timeout) == -1) {
00143                         if (errno != EINTR) {
00144                             perror("Error on select");
00145                             return EXIT_FAILURE;
00146                         }
00147                     }
00148                     if (FD_ISSET(Pipe[0], &Files)) {
00149                         /* crash */
00150                         unlink("core");
00151                         waitpid(Server, NULL, 0);
00152                         printf("Server crash!\n");
00153                         break;
00154                     } else if (FD_ISSET(fd, &Files)) {
00155                         /* watchdog */
00156                         char t;
00157 
00158                         recv(fd, &t, 1, 0);
00159                     } else {
00160                         /* timeout */
00161                         printf("Watchdog timeout!\n");
00162                         if (kill(Server, SIGKILL) != 0) {
00163                             perror("Error on kill");
00164                             return EXIT_FAILURE;
00165                         }
00166                     }
00167                 }
00168 #else
00169                 waitpid(Server, NULL, 0);
00170 #endif
00171 #ifdef USE_WATCHDOG
00172                 signal(SIGCHLD, OldHandler);
00173 #endif
00174                 time(&EndTime);
00175                 if (EndTime-StartTime < CRASH_INTERVAL) {
00176                     CrashCount++;
00177                 } else {
00178                     CrashCount = 0;
00179                 }
00180                 break;
00181             }
00182 #ifdef USE_WATCHDOG
00183             close(Pipe[0]);
00184             close(Pipe[1]);
00185         } else {
00186             sleep(ERROR_SLEEP);
00187         }
00188 #endif
00189     }
00190 #ifdef USE_WATCHDOG
00191     close(fd);
00192 #endif
00193     return 0;
00194 }