[telepathy-ashes/master] Reliably restart on fail.

David Laban david.laban at collabora.co.uk
Thu Nov 12 11:52:48 PST 2009


---
 ashes/tools/commands.py    |    2 +
 ashes/tools/echo_bot.py    |   11 +++++++-
 ashes/tools/echo_daemon.py |   57 +++++++++++++++++++++++++++++++------------
 3 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/ashes/tools/commands.py b/ashes/tools/commands.py
index cb60eae..a4f70c8 100644
--- a/ashes/tools/commands.py
+++ b/ashes/tools/commands.py
@@ -4,6 +4,7 @@ import traceback
 import inspect
 import types
 import os
+import sys
 
 import gobject
 import telepathy
@@ -44,6 +45,7 @@ class CommandExecutor(TextChannelEchoer):
             reply = self.identify(timestamp, text)
         print "Responding:", reply
         self.channel[telepathy.CHANNEL_TYPE_TEXT].Send(type, reply)
+        sys.stdout.flush()
 
     def execute_command(self, id, timestamp, sender, type, flags, text):
         """
diff --git a/ashes/tools/echo_bot.py b/ashes/tools/echo_bot.py
index 325c844..20d6464 100644
--- a/ashes/tools/echo_bot.py
+++ b/ashes/tools/echo_bot.py
@@ -58,6 +58,7 @@ class EchoBotRunner(object):
         self.owned_connections = [] # Connections we own.
         self.account_files = [] # Account files used to [re]create connections.
         self.config = config
+        self.should_restart = True
         self.mainloop = gobject.MainLoop(is_running=True)
         dmainloop = dbus.mainloop.glib.DBusGMainLoop()
         dbus.set_default_main_loop(dmainloop)
@@ -109,7 +110,14 @@ class EchoBotRunner(object):
         self.connections.remove(conn)
         if len(self.connections) == 0 and not prevent_quit:
             print "No longer handling any connections. Exiting."
-            gobject.idle_add(quit)
+            if not self.should_restart:
+                # Return a magic number so we don't get restarted.
+                print "Please don't restart me. I may have been replaced."
+                sys.stdout.flush()
+                gobject.idle_add(quit, 123)
+            else:
+                # Quit normally, and echo_daemon will restart us.
+                gobject.idle_add(quit, 0)
 
     def reconnect(self, conn):
         """
@@ -130,6 +138,7 @@ class EchoBotRunner(object):
         elif reason == 5: # name in use.
             print "The service is being run elsewhere. Removing."
             self.remove_connection(conn)
+            self.should_restart = False
         else:
             print "Disconnected because %s. Removing." % reason
             self.remove_connection(conn)
diff --git a/ashes/tools/echo_daemon.py b/ashes/tools/echo_daemon.py
index 313ca48..c71e8fb 100644
--- a/ashes/tools/echo_daemon.py
+++ b/ashes/tools/echo_daemon.py
@@ -101,41 +101,54 @@ def initial_program_setup(account_file):
 
     subprocesses['gabble'] = gabble
 
-def program_cleanup(signum, stack_):
-    for process in subprocesses.values():
+def kill_everything():
+    for name, process in subprocesses.items():
         try:
             os.kill(process.pid, signal.SIGTERM)
         except OSError:
-            print "process not killed."
+            print name, "not killed."
     if os.environ.get("DBUS_SESSION_BUS_PID", None):
         os.kill(int(os.environ["DBUS_SESSION_BUS_PID"]),  signal.SIGTERM)
         del os.environ["DBUS_SESSION_BUS_PID"]
+
+def program_cleanup(signum, stack_):
+    kill_everything()
     os.remove(PIDFILENAME)
     exit(-signum)
 
-
-def do_daemon(account_file):
+def start_echo_bot(account_file, pipe_stdout):
     global PIDFILENAME
     global ACCOUNT_NAME
+    global DATE
+    DATE = time.strftime("%Y.%m.%d-%H.%M.%S")
     account_name = between(account_file, '/', '.account')
     ACCOUNT_NAME = account_name
     PIDFILENAME = '/tmp/%s.pid' % account_name
 
     initial_program_setup(account_file)
-    
+
+    if pipe_stdout:
+        stdout = PIPE
+    else:
+        stdout = open("%s/log/echobot-%s-%s.log" %
+                        (HOME, ACCOUNT_NAME, DATE), 'w')
     echo_bot = Popen(['/usr/bin/python',
                       os.path.join(THIS_DIR, 'echo_bot.py'),
                       '.*', '.*', account_file],
                      env=os.environ,
-                     stdout=PIPE, stderr=STDOUT)
+                     stdout=stdout, stderr=STDOUT)
     subprocesses['echo_bot'] = echo_bot
-    
+    return echo_bot
+
+def do_daemon(account_file):
+    echo_bot = start_echo_bot(account_file, True)
+
+    # Don't go into the background until we're sure we're set up correctly.
     echobot_logfile = open("%s/log/echobot-%s-%s.log" %
                         (HOME, ACCOUNT_NAME, DATE), 'w')
-
-    # Don't go into the background until we're sure we're alive.
     forward_stream_until(echo_bot.stdout, echobot_logfile, "List Handled:")
-
+    assert echo_bot.poll() is None
+    # Bot is alive and set up correctly.
     # Now we can go into the background.
     context = daemon.DaemonContext(
         working_directory='/home/echobot',
@@ -144,19 +157,29 @@ def do_daemon(account_file):
         stdout=sys.stdout,
         stderr=sys.stderr,
         )
-
     context.signal_map = {
         signal.SIGTERM: program_cleanup,
         signal.SIGINT: program_cleanup,
         signal.SIGHUP: 'terminate',
         #signal.SIGUSR1: reload_program_config,
         }
+
     with context:
         pidfile = open(PIDFILENAME, 'w')
         pidfile.write("%s\n" % os.getpid ())
         pidfile.close()
-        # Block until echo bot closes stdout.
-        forward_stream(echo_bot.stdout, echobot_logfile)
+        # Block until echo bot closes stdout. Since we can't get the return code
+        # of sister processes, we have to pass exit status information as the
+        # last line of input.
+        last_line = forward_stream(echo_bot.stdout, echobot_logfile)
+        # Re-start the bot until it tells us not to.
+        while 'replaced' not in last_line:
+            kill_everything()
+            echo_bot = start_echo_bot(account_file, True)
+            echobot_logfile = open("%s/log/echobot-%s-%s.log" %
+                                (HOME, ACCOUNT_NAME, DATE), 'w')
+            last_line = forward_stream(echo_bot.stdout, echobot_logfile)
+            print last_line
         program_cleanup(0, None)
 
 def forward_stream_until(from_stream, to_stream, stop):
@@ -164,14 +187,16 @@ def forward_stream_until(from_stream, to_stream, stop):
     assert '\n' not in stop # Can't handle things split over multiple lines easily.
     line = 1
     while line:
+        last_line = line
         line = from_stream.readline()
         to_stream.write(line)
         to_stream.flush()
         if stop and stop in line:
-            return
+            return line
+    return last_line
 
 def forward_stream(from_stream, to_stream):
-    forward_stream_until(from_stream, to_stream, '')
+    return forward_stream_until(from_stream, to_stream, '')
 
 if __name__ == "__main__":
     do_daemon(*sys.argv[1:])
-- 
1.5.6.5



More information about the telepathy-commits mailing list