[telepathy-ashes/master] Reliably restart on fail.
David Laban
david.laban at collabora.co.uk
Thu Nov 12 11:52:48 PST 2009
---
ashes/tools/commands.py | 2 +
ashes/tools/echo_bot.py | 11 +++++++-
ashes/tools/echo_daemon.py | 57 +++++++++++++++++++++++++++++++------------
3 files changed, 53 insertions(+), 17 deletions(-)
diff --git a/ashes/tools/commands.py b/ashes/tools/commands.py
index cb60eae..a4f70c8 100644
--- a/ashes/tools/commands.py
+++ b/ashes/tools/commands.py
@@ -4,6 +4,7 @@ import traceback
import inspect
import types
import os
+import sys
import gobject
import telepathy
@@ -44,6 +45,7 @@ class CommandExecutor(TextChannelEchoer):
reply = self.identify(timestamp, text)
print "Responding:", reply
self.channel[telepathy.CHANNEL_TYPE_TEXT].Send(type, reply)
+ sys.stdout.flush()
def execute_command(self, id, timestamp, sender, type, flags, text):
"""
diff --git a/ashes/tools/echo_bot.py b/ashes/tools/echo_bot.py
index 325c844..20d6464 100644
--- a/ashes/tools/echo_bot.py
+++ b/ashes/tools/echo_bot.py
@@ -58,6 +58,7 @@ class EchoBotRunner(object):
self.owned_connections = [] # Connections we own.
self.account_files = [] # Account files used to [re]create connections.
self.config = config
+ self.should_restart = True
self.mainloop = gobject.MainLoop(is_running=True)
dmainloop = dbus.mainloop.glib.DBusGMainLoop()
dbus.set_default_main_loop(dmainloop)
@@ -109,7 +110,14 @@ class EchoBotRunner(object):
self.connections.remove(conn)
if len(self.connections) == 0 and not prevent_quit:
print "No longer handling any connections. Exiting."
- gobject.idle_add(quit)
+ if not self.should_restart:
+ # Return a magic number so we don't get restarted.
+ print "Please don't restart me. I may have been replaced."
+ sys.stdout.flush()
+ gobject.idle_add(quit, 123)
+ else:
+ # Quit normally, and echo_daemon will restart us.
+ gobject.idle_add(quit, 0)
def reconnect(self, conn):
"""
@@ -130,6 +138,7 @@ class EchoBotRunner(object):
elif reason == 5: # name in use.
print "The service is being run elsewhere. Removing."
self.remove_connection(conn)
+ self.should_restart = False
else:
print "Disconnected because %s. Removing." % reason
self.remove_connection(conn)
diff --git a/ashes/tools/echo_daemon.py b/ashes/tools/echo_daemon.py
index 313ca48..c71e8fb 100644
--- a/ashes/tools/echo_daemon.py
+++ b/ashes/tools/echo_daemon.py
@@ -101,41 +101,54 @@ def initial_program_setup(account_file):
subprocesses['gabble'] = gabble
-def program_cleanup(signum, stack_):
- for process in subprocesses.values():
+def kill_everything():
+ for name, process in subprocesses.items():
try:
os.kill(process.pid, signal.SIGTERM)
except OSError:
- print "process not killed."
+ print name, "not killed."
if os.environ.get("DBUS_SESSION_BUS_PID", None):
os.kill(int(os.environ["DBUS_SESSION_BUS_PID"]), signal.SIGTERM)
del os.environ["DBUS_SESSION_BUS_PID"]
+
+def program_cleanup(signum, stack_):
+ kill_everything()
os.remove(PIDFILENAME)
exit(-signum)
-
-def do_daemon(account_file):
+def start_echo_bot(account_file, pipe_stdout):
global PIDFILENAME
global ACCOUNT_NAME
+ global DATE
+ DATE = time.strftime("%Y.%m.%d-%H.%M.%S")
account_name = between(account_file, '/', '.account')
ACCOUNT_NAME = account_name
PIDFILENAME = '/tmp/%s.pid' % account_name
initial_program_setup(account_file)
-
+
+ if pipe_stdout:
+ stdout = PIPE
+ else:
+ stdout = open("%s/log/echobot-%s-%s.log" %
+ (HOME, ACCOUNT_NAME, DATE), 'w')
echo_bot = Popen(['/usr/bin/python',
os.path.join(THIS_DIR, 'echo_bot.py'),
'.*', '.*', account_file],
env=os.environ,
- stdout=PIPE, stderr=STDOUT)
+ stdout=stdout, stderr=STDOUT)
subprocesses['echo_bot'] = echo_bot
-
+ return echo_bot
+
+def do_daemon(account_file):
+ echo_bot = start_echo_bot(account_file, True)
+
+ # Don't go into the background until we're sure we're set up correctly.
echobot_logfile = open("%s/log/echobot-%s-%s.log" %
(HOME, ACCOUNT_NAME, DATE), 'w')
-
- # Don't go into the background until we're sure we're alive.
forward_stream_until(echo_bot.stdout, echobot_logfile, "List Handled:")
-
+ assert echo_bot.poll() is None
+ # Bot is alive and set up correctly.
# Now we can go into the background.
context = daemon.DaemonContext(
working_directory='/home/echobot',
@@ -144,19 +157,29 @@ def do_daemon(account_file):
stdout=sys.stdout,
stderr=sys.stderr,
)
-
context.signal_map = {
signal.SIGTERM: program_cleanup,
signal.SIGINT: program_cleanup,
signal.SIGHUP: 'terminate',
#signal.SIGUSR1: reload_program_config,
}
+
with context:
pidfile = open(PIDFILENAME, 'w')
pidfile.write("%s\n" % os.getpid ())
pidfile.close()
- # Block until echo bot closes stdout.
- forward_stream(echo_bot.stdout, echobot_logfile)
+ # Block until echo bot closes stdout. Since we can't get the return code
+ # of sister processes, we have to pass exit status information as the
+ # last line of input.
+ last_line = forward_stream(echo_bot.stdout, echobot_logfile)
+ # Re-start the bot until it tells us not to.
+ while 'replaced' not in last_line:
+ kill_everything()
+ echo_bot = start_echo_bot(account_file, True)
+ echobot_logfile = open("%s/log/echobot-%s-%s.log" %
+ (HOME, ACCOUNT_NAME, DATE), 'w')
+ last_line = forward_stream(echo_bot.stdout, echobot_logfile)
+ print last_line
program_cleanup(0, None)
def forward_stream_until(from_stream, to_stream, stop):
@@ -164,14 +187,16 @@ def forward_stream_until(from_stream, to_stream, stop):
assert '\n' not in stop # Can't handle things split over multiple lines easily.
line = 1
while line:
+ last_line = line
line = from_stream.readline()
to_stream.write(line)
to_stream.flush()
if stop and stop in line:
- return
+ return line
+ return last_line
def forward_stream(from_stream, to_stream):
- forward_stream_until(from_stream, to_stream, '')
+ return forward_stream_until(from_stream, to_stream, '')
if __name__ == "__main__":
do_daemon(*sys.argv[1:])
--
1.5.6.5
More information about the telepathy-commits
mailing list