daemon: prevent races by opening with O_EXCL

Rework the way the daemon and server are created.  If we might
want to create a daemon, try to grab the lockfile with O_EXCL.
If we got it, create a server and daemon, otherwise try and
connect to the server.  If connecting to the server fails (including
after sleeping a little in case the server was recently created),
remove the lockfile and start over.

Other fix: shut down network gracefully on daemon exit.

Fixes #1638
This commit is contained in:
Neil Booth 2016-01-31 14:40:22 +09:00
parent 69da96feb4
commit e6020975a5
1 changed files with 110 additions and 47 deletions

View File

@ -19,6 +19,7 @@
import ast import ast
import os import os
import sys import sys
import time
import jsonrpclib import jsonrpclib
from jsonrpclib.SimpleJSONRPCServer import SimpleJSONRPCServer, SimpleJSONRPCRequestHandler from jsonrpclib.SimpleJSONRPCServer import SimpleJSONRPCServer, SimpleJSONRPCRequestHandler
@ -49,7 +50,6 @@ def get_daemon(config):
except: except:
pass pass
class RequestHandler(SimpleJSONRPCRequestHandler): class RequestHandler(SimpleJSONRPCRequestHandler):
def do_OPTIONS(self): def do_OPTIONS(self):
@ -66,7 +66,7 @@ class RequestHandler(SimpleJSONRPCRequestHandler):
class Daemon(DaemonThread): class Daemon(DaemonThread):
def __init__(self, config): def __init__(self, config, server):
DaemonThread.__init__(self) DaemonThread.__init__(self)
self.config = config self.config = config
if config.get('offline'): if config.get('offline'):
@ -78,18 +78,15 @@ class Daemon(DaemonThread):
self.wallets = {} self.wallets = {}
self.wallet = None self.wallet = None
self.cmd_runner = Commands(self.config, self.wallet, self.network) self.cmd_runner = Commands(self.config, self.wallet, self.network)
host = config.get('rpchost', 'localhost') self.server = server
port = config.get('rpcport', 0) # Setup server
self.server = SimpleJSONRPCServer((host, port), requestHandler=RequestHandler, logRequests=False) server.timeout = 0.1
with open(lockfile(config), 'w') as f:
f.write(repr(self.server.socket.getsockname()))
self.server.timeout = 0.1
for cmdname in known_commands: for cmdname in known_commands:
self.server.register_function(getattr(self.cmd_runner, cmdname), cmdname) server.register_function(getattr(self.cmd_runner, cmdname), cmdname)
self.server.register_function(self.run_cmdline, 'run_cmdline') server.register_function(self.run_cmdline, 'run_cmdline')
self.server.register_function(self.ping, 'ping') server.register_function(self.ping, 'ping')
self.server.register_function(self.run_daemon, 'daemon') server.register_function(self.run_daemon, 'daemon')
self.server.register_function(self.run_gui, 'gui') server.register_function(self.run_gui, 'gui')
def ping(self): def ping(self):
return True return True
@ -178,11 +175,16 @@ class Daemon(DaemonThread):
def run(self): def run(self):
while self.is_running(): while self.is_running():
self.server.handle_request() self.server.handle_request()
os.unlink(lockfile(self.config))
def stop(self):
for k, wallet in self.wallets.items(): for k, wallet in self.wallets.items():
wallet.stop_threads() wallet.stop_threads()
if self.network:
self.print_error("shutting down network")
self.network.stop()
self.network.join()
def stop(self):
self.print_error("stopping, removing lockfile")
Daemon.remove_lockfile(Daemon.lockfile(self.config))
DaemonThread.stop(self) DaemonThread.stop(self)
def init_gui(self, config, plugins): def init_gui(self, config, plugins):
@ -194,19 +196,78 @@ class Daemon(DaemonThread):
self.gui.main() self.gui.main()
@staticmethod @staticmethod
def gui_command(config, config_options, plugins): def lockfile(config):
server = get_daemon(config) return os.path.join(config.path, 'daemon')
if server is not None:
return server.gui(config_options)
daemon = Daemon(config) @staticmethod
def remove_lockfile(lockfile):
os.unlink(lockfile)
@staticmethod
def get_fd_or_server(lockfile):
'''If create is True, tries to create the lockfile, using O_EXCL to
prevent races. If it succeeds it returns the FD.
Otherwise try and connect to the server specified in the lockfile.
If this succeeds, the server is returned. Otherwise remove the
lockfile and try again.'''
while True:
try:
return os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
except OSError:
pass
server = Daemon.get_server(lockfile)
if server is not None:
return server
# Couldn't connect; remove lockfile and try again.
Daemon.remove_lockfile(lockfile)
@staticmethod
def get_server(lockfile):
while True:
create_time = None
try:
with open(lockfile) as f:
(host, port), create_time = ast.literal_eval(f.read())
server = jsonrpclib.Server('http://%s:%d' % (host, port))
# Test daemon is running
server.ping()
return server
except:
pass
if not create_time or create_time < time.time() - 1.0:
return None
# Sleep a bit and try again; it might have just been started
time.sleep(1.0)
@staticmethod
def create_daemon(config, fd):
'''Create a daemon and server when they don't exist.'''
host = config.get('rpchost', 'localhost')
port = config.get('rpcport', 0)
server = SimpleJSONRPCServer((host, port), logRequests=False,
requestHandler=RequestHandler)
os.write(fd, repr((server.socket.getsockname(), time.time())))
os.close(fd)
daemon = Daemon(config, server)
daemon.start() daemon.start()
daemon.init_gui(config, plugins) return daemon
sys.exit(0)
@staticmethod
def gui_command(config, config_options, plugins):
lockfile = Daemon.lockfile(config)
fd = Daemon.get_fd_or_server(lockfile)
if isinstance(fd, int):
daemon = Daemon.create_daemon(config, fd)
daemon.init_gui(config, plugins)
sys.exit(0)
server = fd
return server.gui(config_options)
@staticmethod @staticmethod
def cmdline_command(config, config_options): def cmdline_command(config, config_options):
server = get_daemon(config) server = get_server(Daemon.lockfile(config))
if server is not None: if server is not None:
return False, server.run_cmdline(config_options) return False, server.run_cmdline(config_options)
@ -214,29 +275,31 @@ class Daemon(DaemonThread):
@staticmethod @staticmethod
def daemon_command(config, config_options): def daemon_command(config, config_options):
server = get_daemon(config) lockfile = Daemon.lockfile(config)
if server is not None: fd = Daemon.get_fd_or_server(lockfile)
return server.daemon(config_options) if isinstance(fd, int):
subcommand = config.get('subcommand')
subcommand = config.get('subcommand') if subcommand != 'start':
if subcommand in ['status', 'stop']: if subcommand in ['status', 'stop']:
print_msg("Daemon not running") print_msg("Daemon not running")
sys.exit(1) else:
elif subcommand == 'start': print_msg("syntax: electrum daemon <start|status|stop>")
os.close(fd)
Daemon.remove_lockfile(lockfile)
sys.exit(1)
pid = os.fork() pid = os.fork()
if pid == 0: if pid:
daemon = Daemon(config)
if config.get('websocket_server'):
from electrum import websockets
websockets.WebSocketServer(config, daemon.network).start()
if config.get('requests_dir'):
check_www_dir(config.get('requests_dir'))
daemon.start()
daemon.join()
sys.exit(0)
else:
print_stderr("starting daemon (PID %d)" % pid) print_stderr("starting daemon (PID %d)" % pid)
sys.exit(0) sys.exit(0)
else: daemon = Daemon.create_daemon(config, fd)
print_msg("syntax: electrum daemon <start|status|stop>") if config.get('websocket_server'):
sys.exit(1) from electrum import websockets
websockets.WebSocketServer(config, daemon.network).start()
if config.get('requests_dir'):
check_www_dir(config.get('requests_dir'))
daemon.join()
sys.exit(0)
server = fd
if server is not None:
return server.daemon(config_options)