daemon: prevent races by opening with O_EXCL

Rework the way the daemon and server are created.  If we might
want to create a daemon, try to grab the lockfile with O_EXCL.
If we got it, create a server and daemon, otherwise try and
connect to the server.  If connecting to the server fails (including
after sleeping a little in case the server was recently created),
remove the lockfile and start over.

Other fix: shut down network gracefully on daemon exit.

Fixes #1638
This commit is contained in:
Neil Booth 2016-01-31 14:40:22 +09:00
parent 69da96feb4
commit e6020975a5
1 changed files with 110 additions and 47 deletions

View File

@ -19,6 +19,7 @@
import ast
import os
import sys
import time
import jsonrpclib
from jsonrpclib.SimpleJSONRPCServer import SimpleJSONRPCServer, SimpleJSONRPCRequestHandler
@ -49,7 +50,6 @@ def get_daemon(config):
except:
pass
class RequestHandler(SimpleJSONRPCRequestHandler):
def do_OPTIONS(self):
@ -66,7 +66,7 @@ class RequestHandler(SimpleJSONRPCRequestHandler):
class Daemon(DaemonThread):
def __init__(self, config):
def __init__(self, config, server):
DaemonThread.__init__(self)
self.config = config
if config.get('offline'):
@ -78,18 +78,15 @@ class Daemon(DaemonThread):
self.wallets = {}
self.wallet = None
self.cmd_runner = Commands(self.config, self.wallet, self.network)
host = config.get('rpchost', 'localhost')
port = config.get('rpcport', 0)
self.server = SimpleJSONRPCServer((host, port), requestHandler=RequestHandler, logRequests=False)
with open(lockfile(config), 'w') as f:
f.write(repr(self.server.socket.getsockname()))
self.server.timeout = 0.1
self.server = server
# Setup server
server.timeout = 0.1
for cmdname in known_commands:
self.server.register_function(getattr(self.cmd_runner, cmdname), cmdname)
self.server.register_function(self.run_cmdline, 'run_cmdline')
self.server.register_function(self.ping, 'ping')
self.server.register_function(self.run_daemon, 'daemon')
self.server.register_function(self.run_gui, 'gui')
server.register_function(getattr(self.cmd_runner, cmdname), cmdname)
server.register_function(self.run_cmdline, 'run_cmdline')
server.register_function(self.ping, 'ping')
server.register_function(self.run_daemon, 'daemon')
server.register_function(self.run_gui, 'gui')
def ping(self):
return True
@ -178,11 +175,16 @@ class Daemon(DaemonThread):
def run(self):
while self.is_running():
self.server.handle_request()
os.unlink(lockfile(self.config))
def stop(self):
for k, wallet in self.wallets.items():
wallet.stop_threads()
if self.network:
self.print_error("shutting down network")
self.network.stop()
self.network.join()
def stop(self):
self.print_error("stopping, removing lockfile")
Daemon.remove_lockfile(Daemon.lockfile(self.config))
DaemonThread.stop(self)
def init_gui(self, config, plugins):
@ -194,19 +196,78 @@ class Daemon(DaemonThread):
self.gui.main()
@staticmethod
def gui_command(config, config_options, plugins):
server = get_daemon(config)
if server is not None:
return server.gui(config_options)
def lockfile(config):
return os.path.join(config.path, 'daemon')
daemon = Daemon(config)
@staticmethod
def remove_lockfile(lockfile):
os.unlink(lockfile)
@staticmethod
def get_fd_or_server(lockfile):
'''If create is True, tries to create the lockfile, using O_EXCL to
prevent races. If it succeeds it returns the FD.
Otherwise try and connect to the server specified in the lockfile.
If this succeeds, the server is returned. Otherwise remove the
lockfile and try again.'''
while True:
try:
return os.open(lockfile, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
except OSError:
pass
server = Daemon.get_server(lockfile)
if server is not None:
return server
# Couldn't connect; remove lockfile and try again.
Daemon.remove_lockfile(lockfile)
@staticmethod
def get_server(lockfile):
while True:
create_time = None
try:
with open(lockfile) as f:
(host, port), create_time = ast.literal_eval(f.read())
server = jsonrpclib.Server('http://%s:%d' % (host, port))
# Test daemon is running
server.ping()
return server
except:
pass
if not create_time or create_time < time.time() - 1.0:
return None
# Sleep a bit and try again; it might have just been started
time.sleep(1.0)
@staticmethod
def create_daemon(config, fd):
'''Create a daemon and server when they don't exist.'''
host = config.get('rpchost', 'localhost')
port = config.get('rpcport', 0)
server = SimpleJSONRPCServer((host, port), logRequests=False,
requestHandler=RequestHandler)
os.write(fd, repr((server.socket.getsockname(), time.time())))
os.close(fd)
daemon = Daemon(config, server)
daemon.start()
daemon.init_gui(config, plugins)
sys.exit(0)
return daemon
@staticmethod
def gui_command(config, config_options, plugins):
lockfile = Daemon.lockfile(config)
fd = Daemon.get_fd_or_server(lockfile)
if isinstance(fd, int):
daemon = Daemon.create_daemon(config, fd)
daemon.init_gui(config, plugins)
sys.exit(0)
server = fd
return server.gui(config_options)
@staticmethod
def cmdline_command(config, config_options):
server = get_daemon(config)
server = get_server(Daemon.lockfile(config))
if server is not None:
return False, server.run_cmdline(config_options)
@ -214,29 +275,31 @@ class Daemon(DaemonThread):
@staticmethod
def daemon_command(config, config_options):
server = get_daemon(config)
if server is not None:
return server.daemon(config_options)
subcommand = config.get('subcommand')
if subcommand in ['status', 'stop']:
print_msg("Daemon not running")
sys.exit(1)
elif subcommand == 'start':
lockfile = Daemon.lockfile(config)
fd = Daemon.get_fd_or_server(lockfile)
if isinstance(fd, int):
subcommand = config.get('subcommand')
if subcommand != 'start':
if subcommand in ['status', 'stop']:
print_msg("Daemon not running")
else:
print_msg("syntax: electrum daemon <start|status|stop>")
os.close(fd)
Daemon.remove_lockfile(lockfile)
sys.exit(1)
pid = os.fork()
if pid == 0:
daemon = Daemon(config)
if config.get('websocket_server'):
from electrum import websockets
websockets.WebSocketServer(config, daemon.network).start()
if config.get('requests_dir'):
check_www_dir(config.get('requests_dir'))
daemon.start()
daemon.join()
sys.exit(0)
else:
if pid:
print_stderr("starting daemon (PID %d)" % pid)
sys.exit(0)
else:
print_msg("syntax: electrum daemon <start|status|stop>")
sys.exit(1)
daemon = Daemon.create_daemon(config, fd)
if config.get('websocket_server'):
from electrum import websockets
websockets.WebSocketServer(config, daemon.network).start()
if config.get('requests_dir'):
check_www_dir(config.get('requests_dir'))
daemon.join()
sys.exit(0)
server = fd
if server is not None:
return server.daemon(config_options)