From 0fa26c1cf37ca376bdee7d66620faafc9d8be39b Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Wed, 3 Oct 2012 12:29:14 -0700 Subject: [PATCH 01/13] push notification implementation with one-shot lock --- server-ca/chocolate.py | 21 +++++++++++++++++++++ server-ca/daemon.py | 29 ++++++++--------------------- server-ca/redis_lock.py | 9 ++++++++- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/server-ca/chocolate.py b/server-ca/chocolate.py index 2c8b38a6f..ff50a6615 100755 --- a/server-ca/chocolate.py +++ b/server-ca/chocolate.py @@ -81,6 +81,7 @@ class session(object): if timestamp is None: timestamp = int(time.time()) if not self.exists(): sessions.hset(self.id, "created", timestamp) + sessions.hset(self.id, "lastpoll", 0) sessions.hset(self.id, "live", True) sessions.lpush("active-requests", self.id) else: @@ -101,6 +102,15 @@ class session(object): def age(self): return int(time.time()) - int(sessions.hget(self.id, "created")) + def poll_age(self): + return float(time.time()) - float(sessions.hget(self.id, "lastpoll")) + + def request_test(self): + """Ask a daemon to test challenges.""" + # TODO: check whether this session is already in pending-testchallenge? + sessions.lpush("pending-testchallenge", self.id) + sessions.publish("requests", "testchallenge") + def request_made(self): """Has there already been a signing request made in this session?""" return sessions.hget(self.id, "state") is not None @@ -310,6 +320,17 @@ class session(object): # If we're in testchallenge, tell the client about the challenges and their # current status. if state == "testchallenge": + if m.completedchallenge: + try: + with redis_lock(sessions, "lock-" + self.id, one_shot=True): + if self.poll_age() < poll_interval: + # Too recent! + pass + else: + sessions.hset(self.id, "lastpoll", time.time()) + self.request_test() + except KeyError: + pass self.send_challenges(m, r) return # If we're in done, tell the client about the successfully issued cert. diff --git a/server-ca/daemon.py b/server-ca/daemon.py index 28c80a16b..82f024749 100644 --- a/server-ca/daemon.py +++ b/server-ca/daemon.py @@ -150,10 +150,6 @@ def makechallenge(session): if debug: print "created new challenge", short(challenge) if True: # challenges have been created r.hset(session, "state", "testchallenge") - r.lpush("pending-testchallenge", session) - # TODO: this causes the daemon to immediately attempt to test the - # challenge for completion, with no delay. - r.publish("requests", "testchallenge") else: r.lpush("pending-makechallenge", session) r.publish("requests", "makechallenge") @@ -170,13 +166,8 @@ def testchallenge(session): if debug: print "removing expired session", short(session) r.lrem("pending-requests", session) return - # Note that we can push this back into the original queue. - # TODO: need to add a way to make sure we don't test the same - # session too often. - # Conceivably, this could wait until the client announces - # that it has completed the challenges. Information about - # the client's reporting could be stored in the database. - # Then the CA doesn't need to poll prematurely. + if r.hget(session, "state") != "testchallenge": + return all_satisfied = True for i, name in enumerate(r.lrange("%s:names" % session, 0, -1)): challenge = "%s:%d" % (session, i) @@ -227,14 +218,7 @@ def testchallenge(session): r.publish("requests", "issue") else: # Some challenges are not verified. - # Put this session back on the stack to try to verify again. - r.lpush("pending-testchallenge", session) - # TODO: if we wanted the client to tell us when it believes - # it has completed the challenge, we should take this out and - # have the server publish the message in response to the message - # from the client. Also, the current version will cause the - # server to retest over and over again as fast as it's able. - r.publish("requests", "testchallenge") + pass def issue(session): if r.hget(session, "live") != "True": @@ -262,6 +246,8 @@ def issue(session): if debug: print "removing expired (issue-state!?) session", short(session) r.lrem("pending-requests", session) return + if r.hget(session, "state") != "issue": + return csr = r.hget(session, "csr") names = r.lrange("%s:names" % session, 0, -1) with issue_lock: @@ -322,9 +308,10 @@ for message in ps.listen(): if debug: print "expiring ancient session", short(session) r.hset(session, "live", False) else: - # if debug: print "going to %s for %s" % (queue, short(session)) if queue == "makechallenge": makechallenge(session) - elif queue == "testchallenge": testchallenge(session) + elif queue == "testchallenge": + with redis_lock(r, "lock-" + session): + testchallenge(session) elif queue == "issue": issue(session) if inactive: break diff --git a/server-ca/redis_lock.py b/server-ca/redis_lock.py index cdda4ff40..ac990e511 100644 --- a/server-ca/redis_lock.py +++ b/server-ca/redis_lock.py @@ -17,6 +17,10 @@ # implemented, only one process succeds in clearing and acquiring a # particular expired lock, even "when multiple clients detected an # expired lock and are trying to release it". +# +# The optional one_shot parameter causes the attempt to acquire the +# lock to instead raise a KeyError exception if someone else is already +# holding a valid lock. import time, random @@ -27,9 +31,10 @@ def valid(t): return float(t) > time.time() class redis_lock(object): - def __init__(self, redis, lock_name): + def __init__(self, redis, lock_name, one_shot=False): self.redis = redis self.lock_name = lock_name + self.one_shot = one_shot def __enter__(self): while True: @@ -40,6 +45,8 @@ class redis_lock(object): # "C4 sends GET lock.foo to check if the lock expired." existing_lock = self.redis.get(self.lock_name) if (not existing_lock) or valid(existing_lock): + if self.one_shot: + raise KeyError # "If it is not, it will sleep for some time and retry from # the start." time.sleep(1 + random.random()) From b4eaf10a490d01d770d49e005eb2b92d9492c237 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Wed, 3 Oct 2012 12:33:55 -0700 Subject: [PATCH 02/13] poll_interval variable to control frequency of client requests --- server-ca/chocolate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server-ca/chocolate.py b/server-ca/chocolate.py index ff50a6615..ae40a88ae 100755 --- a/server-ca/chocolate.py +++ b/server-ca/chocolate.py @@ -13,6 +13,8 @@ from CONFIG import max_names, max_csr_size, maximum_session_age from CONFIG import maximum_challenge_age, hashcash_expiry, extra_name_blacklist from CONFIG import cert_chain_file, debug +poll_interval = 10 + try: chocolate_server_name = open("SERVERNAME").read().rstrip() except IOError: @@ -320,6 +322,8 @@ class session(object): # If we're in testchallenge, tell the client about the challenges and their # current status. if state == "testchallenge": + # If the client claims to have completed some challenges, try to test + # them, if the client hasn't asked us to do so too recently. if m.completedchallenge: try: with redis_lock(sessions, "lock-" + self.id, one_shot=True): From 9cebf1e26815e872b56e4f4996a53cd6dbb15fa1 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Fri, 5 Oct 2012 14:44:02 -0700 Subject: [PATCH 03/13] make the client actually send completedchallenge --- trustify/client/client.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/trustify/client/client.py b/trustify/client/client.py index 1851ac75c..4d30477b4 100644 --- a/trustify/client/client.py +++ b/trustify/client/client.py @@ -455,14 +455,32 @@ def authenticate(): sys.exit(1) logger.info("Configured Apache for challenge; waiting for verification...") - logger.debug("waiting 3") - time.sleep(3) + did_it = chocolatemessage() + init(did_it) + did_it.session = r.session + # This will blindly assert that all of the challenges have been + # complied with, by simply copying them from the challenge data + # structure into a new completedchallenge structure. This is + # kind of crude, because the client could instead actually build up + # a completedchallenge structure piece-by-piece as it actually + # complies with challenges (and then send that structure for the + # server to look at). In the existing client, completedchallenge + # is only ever sent once _all_ of the (assumed to be dvsni) + # challenges have been met, and client-side failure to meet any + # challenge is immediately fatal to the client. In the existing + # server, the client's assertion that the client has met any + # (assumed to be dvsni) challenge(s) will result in the server + # scheduling a test of all challenges. + did_it.completedchallenge.extend(r.challenge) - r=decode(do(upstream, k)) + r=decode(do(upstream, did_it)) logger.debug(r) + delay = 5 while r.challenge or r.proceed.IsInitialized(): - logger.debug("waiting 5") - time.sleep(5) + if r.proceed.IsInitialized(): + delay = min(r.proceed.polldelay, 60) + logger.debug("waiting %d" % delay) + time.sleep(delay) k.session = r.session r = decode(do(upstream, k)) logger.debug(r) From ed9a02af659b49c96bf8ffe6d6d421786e127ef4 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Fri, 5 Oct 2012 17:47:54 -0700 Subject: [PATCH 04/13] only schedule sessions for testing if they aren't already scheduled for testing --- server-ca/chocolate.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/server-ca/chocolate.py b/server-ca/chocolate.py index ae40a88ae..f7c6aefd8 100755 --- a/server-ca/chocolate.py +++ b/server-ca/chocolate.py @@ -109,9 +109,15 @@ class session(object): def request_test(self): """Ask a daemon to test challenges.""" - # TODO: check whether this session is already in pending-testchallenge? - sessions.lpush("pending-testchallenge", self.id) - sessions.publish("requests", "testchallenge") + # There is a race condition between testing for membership and + # adding it, but it's quite difficult to "exploit" and the result + # of triggering it is just that the same session will be scheduled + # for testing twice. We use locking in the daemon to exclude the + # possibility of two daemon processes testing the same session at + # once, and check the session's state before beginning to test it. + if self.id not in sessions.lrange("pending-testchallenge", 0, -1): + sessions.lpush("pending-testchallenge", self.id) + sessions.publish("requests", "testchallenge") def request_made(self): """Has there already been a signing request made in this session?""" From 8ea8f361a126225769ea7aaf34f676e1628e247e Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Mon, 8 Oct 2012 18:05:02 -0700 Subject: [PATCH 05/13] more detailed comment --- server-ca/redis_lock.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server-ca/redis_lock.py b/server-ca/redis_lock.py index ac990e511..530cd0a4e 100644 --- a/server-ca/redis_lock.py +++ b/server-ca/redis_lock.py @@ -20,7 +20,8 @@ # # The optional one_shot parameter causes the attempt to acquire the # lock to instead raise a KeyError exception if someone else is already -# holding a valid lock. +# holding a valid lock. This is used in situations where a process +# doesn't insist on doing the actions guarded by the lock. import time, random From 4dfd31a3153942def5aa31f77f9cfc6c06a0e205 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Wed, 10 Oct 2012 12:15:33 -0700 Subject: [PATCH 06/13] implement requirement of hashcash *per subject name* --- server-ca/chocolate.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/server-ca/chocolate.py b/server-ca/chocolate.py index f7c6aefd8..13423cf5d 100755 --- a/server-ca/chocolate.py +++ b/server-ca/chocolate.py @@ -159,10 +159,11 @@ class session(object): self.die(r, r.BadRequest, uri="https://ca.example.com/failures/internalerror") return - def check_hashcash(self, h): - """Is the hashcash string h valid for a request to this server?""" + def check_hashcash(self, h, n): + """Is the hashcash string h valid for a request to this server for + signing n names?""" if hashcash.check(stamp=h, resource=chocolate_server_name, \ - bits=difficulty, check_expiration=hashcash_expiry): + bits=difficulty*n, check_expiration=hashcash_expiry): # sessions.sadd returns True upon adding to a set and # False if the item was already in the set. return sessions.sadd("spent-hashcash", h) @@ -243,7 +244,8 @@ class session(object): self.die(r, r.BadRequest, uri="https://ca.example.com/failures/recipient") return # Check hashcash before doing any crypto or database access. - if not m.request.clientpuzzle or not self.check_hashcash(m.request.clientpuzzle): + names = CSR.subject_names(csr) + if not m.request.clientpuzzle or not self.check_hashcash(m.request.clientpuzzle, len(names)): self.die(r, r.NeedClientPuzzle, uri="https://ca.example.com/failures/hashcash") return if self.request_made(): @@ -275,7 +277,6 @@ class session(object): if not CSR.csr_goodkey(csr): self.die(r, r.UnsafeKey) return - names = CSR.subject_names(csr) if len(names) == 0: self.die(r, r.BadCSR) return From 90944557bfe657147fcad916b3efda0090caae5f Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Wed, 10 Oct 2012 17:19:00 -0700 Subject: [PATCH 07/13] update client to send hashcash per-name --- trustify/client/client.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/trustify/client/client.py b/trustify/client/client.py index 4d30477b4..9a9602a9b 100644 --- a/trustify/client/client.py +++ b/trustify/client/client.py @@ -174,11 +174,11 @@ def drop_privs(): os.setgroups([]) os.setuid(nobody) -def make_request(server, m, csr, quiet=False): +def make_request(server, m, csr, names, quiet=False): m.request.recipient = server m.request.timestamp = int(time.time()) m.request.csr = csr - hashcash_cmd = ["hashcash", "-P", "-m", "-z", "12", "-b", `difficulty`, "-r", server] + hashcash_cmd = ["hashcash", "-P", "-m", "-z", "12", "-b", `difficulty*len(names)`, "-r", server] if quiet: hashcash = subprocess.Popen(hashcash_cmd, preexec_fn=drop_privs, shell= False, stdout=subprocess.PIPE, stderr=open("/dev/null", "w")).communicate()[0].rstrip() else: @@ -287,7 +287,7 @@ def challenge_factory(r, req_filepath, key_filepath, config): return challenges, dn -def send_request(key_pem, csr_pem, quiet=curses): +def send_request(key_pem, csr_pem, names, quiet=curses): global server upstream = "https://%s/chocolate.py" % server k=chocolatemessage() @@ -295,7 +295,7 @@ def send_request(key_pem, csr_pem, quiet=curses): init(k) init(m) logger.info("Creating request; generating hashcash...") - make_request(server, m, csr_pem, quiet=curses) + make_request(server, m, csr_pem, names, quiet=curses) sign(key_pem, m) logger.info("Created request; sending to server...") logger.debug(m) @@ -437,7 +437,7 @@ def authenticate(): logger.info("Generating key: " + key_file) logger.info("Creating CSR: " + req_file) - r, k = send_request(key_pem, csr_pem) + r, k = send_request(key_pem, csr_pem, names) challenges, dn = challenge_factory(r, os.path.abspath(req_file), os.path.abspath(key_file), config) From de867e26c8e36fadafa74608bbaea654009e3625 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Wed, 17 Oct 2012 18:36:24 -0700 Subject: [PATCH 08/13] reorganize daemons! This splits out the single daemon into four different daemons (for makechallenge, testchallenge, and issue, and for logging), switches all but the logging daemon to no longer use pubsub (but instead use brpop, which is a blocking queue pop), and tracks the number of times that a given session has been tested (limited to 3). There are also new scripts to try to start and stop all the daemons with a single command. --- server-ca/README | 2 + server-ca/TODO | 37 +++ server-ca/chocolate.py | 3 +- server-ca/daemon.py | 321 ---------------------- server-ca/daemons/daemon_common.py | 44 +++ server-ca/daemons/issue-daemon.py | 72 +++++ server-ca/daemons/logging-daemon.py | 29 ++ server-ca/daemons/makechallenge-daemon.py | 70 +++++ server-ca/daemons/testchallenge-daemon.py | 111 ++++++++ server-ca/start_daemons | 22 ++ server-ca/stop_daemons | 7 + 11 files changed, 395 insertions(+), 323 deletions(-) create mode 100644 server-ca/TODO delete mode 100644 server-ca/daemon.py create mode 100644 server-ca/daemons/daemon_common.py create mode 100755 server-ca/daemons/issue-daemon.py create mode 100755 server-ca/daemons/logging-daemon.py create mode 100755 server-ca/daemons/makechallenge-daemon.py create mode 100755 server-ca/daemons/testchallenge-daemon.py create mode 100755 server-ca/start_daemons create mode 100755 server-ca/stop_daemons diff --git a/server-ca/README b/server-ca/README index 123dc4921..6a5868631 100644 --- a/server-ca/README +++ b/server-ca/README @@ -14,6 +14,8 @@ chocolate.py - server-side, requires web.py (python-webpy), build-essential, python-dev, and swig) probably wants to run under a web server like lighttpd with fastcgi +daemons/{makechallenge,testchallenge,issue,logging}-daemon.py - + daemons to handle back-end implementation of protocol state transitions chocolate_protocol.proto - protocol definition; needs protobuf-compiler diff --git a/server-ca/TODO b/server-ca/TODO new file mode 100644 index 000000000..67f77c14f --- /dev/null +++ b/server-ca/TODO @@ -0,0 +1,37 @@ +# The queue mechanism with pending-* is supposed to control +# concurrency issues properly, but this needs verification +# to ensure that there are no possible race conditions. +# Generally, the server process (as distinct from the daemon) +# is not supposed to change sessions at all once they have +# been added to a queue, except for marking them no longer +# live if the server realizes that something bad has happened +# to them. There may be some exceptions, and they should all +# be analyzed for possible races. + +# TODO: check sessions' internal evidence for consistency +# with their queue membership (in case of crashes or bugs). +# In particular, check that a session in pending-makechallenge +# does not actually contain any challenges and that a +# session in pending-issue does not actually contain an +# issued cert. + +# TODO: write queue rebuilding script that uses sessions' +# internal state to decide which queue they go in (to +# run when starting daemon, in case there was a crash +# that caused a session not to be in any pending queue +# because the daemon was actively working on it during +# the crash); consider marking sessions "dirty" when +# beginning to actually modify their contents in order +# to allow dirty sessions to be deleted after a crash instead +# of placing them back on a queue. Or, we could just +# decide that a crash invalidates each and every pending +# request, period, while still allowing clients to look +# up successfully issued certs. + +# NOTE: The daemon enforces its own timeouts, which are +# defined in the ancient() function. These timeouts apply +# to any session that has been placed in a queue and can +# be completely independent of the session timeout policy +# in the server. Being marked as dead at any time by either +# the server or the daemon (due to timeout or error) causes +# a session to be treated as dead by both. diff --git a/server-ca/chocolate.py b/server-ca/chocolate.py index 13423cf5d..caa914667 100755 --- a/server-ca/chocolate.py +++ b/server-ca/chocolate.py @@ -84,6 +84,7 @@ class session(object): if not self.exists(): sessions.hset(self.id, "created", timestamp) sessions.hset(self.id, "lastpoll", 0) + sessions.hset(self.id, "times-tested", 0) sessions.hset(self.id, "live", True) sessions.lpush("active-requests", self.id) else: @@ -117,7 +118,6 @@ class session(object): # once, and check the session's state before beginning to test it. if self.id not in sessions.lrange("pending-testchallenge", 0, -1): sessions.lpush("pending-testchallenge", self.id) - sessions.publish("requests", "testchallenge") def request_made(self): """Has there already been a signing request made in this session?""" @@ -137,7 +137,6 @@ class session(object): sessions.hset(self.id, "client-addr", web.ctx.ip) sessions.hset(self.id, "state", "makechallenge") sessions.lpush("pending-makechallenge", self.id) - sessions.publish("requests", "makechallenge") return True def challenges(self): diff --git a/server-ca/daemon.py b/server-ca/daemon.py deleted file mode 100644 index 82f024749..000000000 --- a/server-ca/daemon.py +++ /dev/null @@ -1,321 +0,0 @@ -#!/usr/bin/env python - -# This daemon runs on the CA side to look for requests in -# the database that are waiting for actions to be taken: -# generating challenges, testing whether challenges have -# been met, and issuing certs when the challenges have been -# met. The daemon does not communicate with the client at -# all; it just notes changes to request state in the database, -# which the server will inform the client about when the -# client subsequently checks in. - -# The queue mechanism with pending-* is supposed to control -# concurrency issues properly, but this needs verification -# to ensure that there are no possible race conditions. -# Generally, the server process (as distinct from the daemon) -# is not supposed to change sessions at all once they have -# been added to a queue, except for marking them no longer -# live if the server realizes that something bad has happened -# to them. There may be some exceptions, and they should all -# be analyzed for possible races. - -# TODO: check sessions' internal evidence for consistency -# with their queue membership (in case of crashes or bugs). -# In particular, check that a session in pending-makechallenge -# does not actually contain any challenges and that a -# session in pending-issue does not actually contain an -# issued cert. -# TODO: write queue rebuilding script that uses sessions' -# internal state to decide which queue they go in (to -# run when starting daemon, in case there was a crash -# that caused a session not to be in any pending queue -# because the daemon was actively working on it during -# the crash); consider marking sessions "dirty" when -# beginning to actually modify their contents in order -# to allow dirty sessions to be deleted after a crash instead -# of placing them back on a queue. Or, we could just -# decide that a crash invalidates each and every pending -# request, period, while still allowing clients to look -# up successfully issued certs. -# TODO: implement multithreading to allow several parallel -# worker processes. -# -# NOTE: The daemon enforces its own timeouts, which are -# defined in the ancient() function. These timeouts apply -# to any session that has been placed in a queue and can -# be completely independent of the session timeout policy -# in the server. Being marked as dead at any time by either -# the server or the daemon (due to timeout or error) causes -# a session to be treated as dead by both. - -import redis, redis_lock, time, CSR, sys, signal, binascii -from sni_challenge.verify import verify_challenge -from Crypto import Random - -r = redis.Redis() -ps = r.pubsub() -issue_lock = redis_lock.redis_lock(r, "issue_lock") -# This lock guards the ability to issue certificates with "openssl ca", -# which has no locking of its own. We don't need locking for the updates -# that the daemon performs on the sessions in the database because the -# queues pending-makechallenge, pending-testchallenge, and pending-issue -# are updated atomically and the daemon only ever acts on sessions that it -# has removed from a queue. -# TODO: in a deployed system, the queue for issuing certs should probably -# be treated a first-come, first-issue fashion, so that a request doesn't -# time out while waiting to acquire the lock just because other requests -# happened to get it first. Another way of putting this is that there -# could be only one thread/process that deals with pending-issue sessions, -# even though there could be many that deal with pending-makechallenge and -# pending-testchallenge. Then we can guarantee that the oldest pending-issue -# requests are dealt with first, which is impossible to guarantee when -# multiple daemons may be opportunistically acquiring this lock. - -debug = "debug" in sys.argv -clean_shutdown = False - -def signal_handler(a, b): - global clean_shutdown - clean_shutdown = True - r.publish("exit", "clean-exit") - -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - -def short(session): - """Return the first 12 bytes of a session ID, or, for a - challenge ID, the challenge ID with the session ID truncated.""" - tmp = session.partition(":") - return tmp[0][:12] + "..." + tmp[1] + tmp[2] - -def ancient(session, state): - """Given that this session is in the specified named state, - decide whether the daemon should forcibly expire it for being too - old, even if no client request has caused the serve to mark the - session as expired. This is most relevant to truly abandoned - sessions that no client ever asks about.""" - age = int(time.time()) - int(r.hget(session, "created")) - if state == "makechallenge" and age > 120: - if debug: print "considered", short(session), "ancient" - return True - if state == "testchallenge" and age > 600: - if debug: print "considered", short(session), "ancient" - return True - return False - -def random(): - """Return 64 hex digits representing a new 32-byte random number.""" - return binascii.hexlify(Random.get_random_bytes(32)) - -def random_raw(): - """Return 32 random bytes.""" - return Random.get_random_bytes(32) - -def makechallenge(session): - if r.hget(session, "live") != "True": - # This session has died due to some other reason, like an - # illegal request or timeout, since it entered makechallenge - # state. Consequently, we're not allowed to advance its - # state any further, and it should be removed from the - # pending-requests queue and not pushed into any other queue. - # We don't have to remove it from pending-makechallenge - # because the caller has already done so. - if debug: print "removing expired session", short(session) - r.lrem("pending-requests", session) - return - # Currently only makes challenges of type 0 (DomainValidateSNI) - # This challenge type has three internal data parameters: - # dvsni:nonce, dvsni:r, dvsni:ext - # This challenge type sends three data parameters to the client: - # nonce, y = E(r), ext - # - # Make one challenge for each name. (This one-to-one relationship - # is not an inherent protocol requirement!) - names = r.lrange("%s:names" % session, 0, -1) - if debug: print "%s: new valid request" % session - if debug: print "%s: from requesting client at %s" % (short(session), r.hget(session, "client-addr")) - if debug: print "%s: for %d names: %s" % (short(session), len(names), ", ".join(names)) - for i, name in enumerate(names): - challenge = "%s:%d" % (session, i) - r.hset(challenge, "challtime", int(time.time())) - r.hset(challenge, "type", 0) # DomainValidateSNI - r.hset(challenge, "name", name) - r.hset(challenge, "satisfied", False) - r.hset(challenge, "failed", False) - r.hset(challenge, "dvsni:nonce", random()) - r.hset(challenge, "dvsni:r", random_raw()) - r.hset(challenge, "dvsni:ext", "1.3.3.7") - # Keep accurate count of how many challenges exist in this session. - r.hincrby(session, "challenges", 1) - if debug: print "created new challenge", short(challenge) - if True: # challenges have been created - r.hset(session, "state", "testchallenge") - else: - r.lpush("pending-makechallenge", session) - r.publish("requests", "makechallenge") - -def testchallenge(session): - if r.hget(session, "live") != "True": - # This session has died due to some other reason, like an - # illegal request or timeout, since it entered testchallenge - # state. Consequently, we're not allowed to advance its - # state any further, and it should be removed from the - # pending-requests queue and not pushed into any other queue. - # We don't have to remove it from pending-testchallenge - # because the caller has already done so. - if debug: print "removing expired session", short(session) - r.lrem("pending-requests", session) - return - if r.hget(session, "state") != "testchallenge": - return - all_satisfied = True - for i, name in enumerate(r.lrange("%s:names" % session, 0, -1)): - challenge = "%s:%d" % (session, i) - if debug: print "testing challenge", short(challenge) - challtime = int(r.hget(challenge, "challtime")) - challtype = int(r.hget(challenge, "type")) - name = r.hget(challenge, "name") - satisfied = r.hget(challenge, "satisfied") == "True" - failed = r.hget(challenge, "failed") == "True" - # TODO: check whether this challenge is too old - if not satisfied and not failed: - # if debug: print "challenge", short(challenge), "being tested" - if challtype == 0: # DomainValidateSNI - if debug: print "\tbeginning dvsni test to %s" % name - dvsni_nonce = r.hget(challenge, "dvsni:nonce") - dvsni_r = r.hget(challenge, "dvsni:r") - dvsni_ext = r.hget(challenge, "dvsni:ext") - direct_result, direct_reason = verify_challenge(name, dvsni_r, dvsni_nonce, False) - proxy_result, proxy_reason = verify_challenge(name, dvsni_r, dvsni_nonce, True) - if debug: - print "\t...direct probe: %s (%s)" % (direct_result, direct_reason) - print "\tTor proxy probe: %s (%s)" % (proxy_result, proxy_reason) - if direct_result and proxy_result: - r.hset(challenge, "satisfied", True) - else: - all_satisfied = False - # TODO: distinguish permanent and temporarily failures - # can cause a permanent failure under some conditions, causing - # the session to become dead. TODO: need to articulate what - # those conditions are - else: - # Don't know how to handle this challenge type - all_satisfied = False - elif not satisfied: - if debug: print "\tchallenge was not attempted" - all_satisfied = False - if all_satisfied: - # Challenges all succeeded, so we should prepare to issue - # the requested cert. - # TODO: double-check that there were > 0 challenges, - # so that we don't somehow mistakenly issue a cert in - # response to an empty list of challenges (even though - # the daemon that put this session on the queue should - # also have implicitly guaranteed this). - if debug: print "\t** All challenges satisfied; request %s GRANTED" % short(session) - r.hset(session, "state", "issue") - r.lpush("pending-issue", session) - r.publish("requests", "issue") - else: - # Some challenges are not verified. - pass - -def issue(session): - if r.hget(session, "live") != "True": - # This session has died due to some other reason, like an - # illegal request or timeout, since it entered testchallenge - # state. Consequently, we're not allowed to advance its - # state any further, and it should be removed from the - # pending-requests queue and not pushed into any other queue. - # We don't have to remove it from pending-testchallenge - # because the caller has already done so. - # - # Having a session in pending-issue die is a very weird case - # that probably suggests that timeouts are set incorrectly - # or that the client is misbehaving very badly. This means - # that a request passed all of its challenges but the - # session nonetheless died for some reason unrelated to failing - # challenges before the cert could be issued. Normally, this - # should never happen. - # - # TODO: This can definitely happen when there are extremely many - # sessions stuck in testchallenge state compared to the number of - # daemon processes to handle them, because each session in - # testchallenge gets tested once before any daemon gets around to - # issuing the cert. This is a bug. - if debug: print "removing expired (issue-state!?) session", short(session) - r.lrem("pending-requests", session) - return - if r.hget(session, "state") != "issue": - return - csr = r.hget(session, "csr") - names = r.lrange("%s:names" % session, 0, -1) - with issue_lock: - cert = CSR.issue(csr, names) - r.hset(session, "cert", cert) - if cert: # once issuing cert succeeded - if debug: print "%s: issued certificate for names: %s" % (short(session), ", ".join(names)) - r.hset(session, "state", "done") - r.lpush("pending-done", session) - # TODO: Note that we do not publish a pubsub message when - # the session enters done state, so the daemon will not - # actually act on it. Is that OK? - else: # should not be reached in deployed version - if debug: print "issuing for", short(session), "failed" - r.lpush("pending-issue", session) - r.publish("requests", "issue") - -# Dispatch table for how to react to pubsub messages. The key is -# the pubsub message and the value is a tuple of (queue name, function). -# The main loop will look in the specified queue for a pending session, -# and, if it finds one, it will call the specified function on it. -# Since the queue names are systematically related to the message names, -# we could probably remove the queue name field entirely. -dispatch = { "makechallenge": ("pending-makechallenge", makechallenge), - "testchallenge": ("pending-testchallenge", testchallenge), - "issue": ("pending-issue", issue), - "done": ("pending-done", lambda x: None) } - -# Main loop: act on queues notified via Redis pubsub mechanism. -# Currently, we ignore the specific details of which queue was -# notified and, upon any notification, repeatedly process a single -# item from each queue until all queues are empty. - -ps.subscribe(["requests"]) -ps.subscribe(["logs"]) -ps.subscribe(["exit"]) -for message in ps.listen(): - if message["type"] != "message": - continue - if message["channel"] == "logs": - if debug: print message["data"] - continue - if message["channel"] == "exit": - break - if message["channel"] == "requests": - # populated_queue would be used by a more sophisticated scheduler - populated_queue = message["data"] - while True: - inactive = True - for queue in ("makechallenge", "testchallenge", "issue"): - if clean_shutdown: - inactive = True - break - session = r.rpop("pending-" + queue) - if session: - inactive = False - if ancient(session, queue) and queue != "issue": - if debug: print "expiring ancient session", short(session) - r.hset(session, "live", False) - else: - if queue == "makechallenge": makechallenge(session) - elif queue == "testchallenge": - with redis_lock(r, "lock-" + session): - testchallenge(session) - elif queue == "issue": issue(session) - if inactive: - break - - if clean_shutdown: - print "daemon exiting cleanly" - break diff --git a/server-ca/daemons/daemon_common.py b/server-ca/daemons/daemon_common.py new file mode 100644 index 000000000..39746dc29 --- /dev/null +++ b/server-ca/daemons/daemon_common.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +# functions common to the various kinds of daemon + +# TODO: define a log function that sends a pubsub message to the +# logger daemon + +import time, binascii +from Crypto import Random + +def signal_handler(a, b): + global clean_shutdown + clean_shutdown = True + r.publish("exit", "clean-exit") + r.lpush("exit", "clean-exit") + +def short(session): + """Return the first 12 bytes of a session ID, or, for a + challenge ID, the challenge ID with the session ID truncated.""" + tmp = session.partition(":") + return tmp[0][:12] + "..." + tmp[1] + tmp[2] + +def ancient(session, state): + """Given that this session is in the specified named state, + decide whether the daemon should forcibly expire it for being too + old, even if no client request has caused the serve to mark the + session as expired. This is most relevant to truly abandoned + sessions that no client ever asks about.""" + age = int(time.time()) - int(r.hget(session, "created")) + if state == "makechallenge" and age > 120: + if debug: print "considered", short(session), "ancient" + return True + if state == "testchallenge" and age > 600: + if debug: print "considered", short(session), "ancient" + return True + return False + +def random(): + """Return 64 hex digits representing a new 32-byte random number.""" + return binascii.hexlify(Random.get_random_bytes(32)) + +def random_raw(): + """Return 32 random bytes.""" + return Random.get_random_bytes(32) diff --git a/server-ca/daemons/issue-daemon.py b/server-ca/daemons/issue-daemon.py new file mode 100755 index 000000000..0b5a8a9f6 --- /dev/null +++ b/server-ca/daemons/issue-daemon.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python + +# This daemon runs on the CA side to look for requests in +# the database that are waiting for a cert to be issued. + +import redis, redis_lock, CSR, sys, signal +from sni_challenge.verify import verify_challenge +from Crypto import Random + +r = redis.Redis() +ps = r.pubsub() +issue_lock = redis_lock.redis_lock(r, "issue_lock") +# This lock guards the ability to issue certificates with "openssl ca", +# which has no locking of its own. We don't need locking for the updates +# that the daemon performs on the sessions in the database because the +# queues pending-makechallenge, pending-testchallenge, and pending-issue +# are updated atomically and the daemon only ever acts on sessions that it +# has removed from a queue. + +debug = "debug" in sys.argv +clean_shutdown = False + +from daemon_common import signal_handler, short, ancient, random, random_raw + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + +def issue(session): + if r.hget(session, "live") != "True": + # This session has died due to some other reason, like an + # illegal request or timeout, since it entered testchallenge + # state. Consequently, we're not allowed to advance its + # state any further, and it should be removed from the + # pending-requests queue and not pushed into any other queue. + # We don't have to remove it from pending-testchallenge + # because the caller has already done so. + # + # Having a session in pending-issue die is a very weird case + # that probably suggests that timeouts are set incorrectly + # or that the client is misbehaving very badly. This means + # that a request passed all of its challenges but the + # session nonetheless died for some reason unrelated to failing + # challenges before the cert could be issued. Normally, this + # should never happen. + if debug: print "removing expired (issue-state!?) session", short(session) + r.lrem("pending-requests", session) + return + if r.hget(session, "state") != "issue": + return + csr = r.hget(session, "csr") + names = r.lrange("%s:names" % session, 0, -1) + with issue_lock: + cert = CSR.issue(csr, names) + r.hset(session, "cert", cert) + if cert: # once issuing cert succeeded + if debug: print "%s: issued certificate for names: %s" % (short(session), ", ".join(names)) + r.hset(session, "state", "done") + # r.lpush("pending-done", session) + else: # should not be reached in deployed version + if debug: print "issuing for", short(session), "failed" + r.lpush("pending-issue", session) + +while True: + (where, what) = r.brpop(["exit", "pending-issue"]) + if where == "exit": + r.lpush("exit", "exit") + break + elif where == "pending-issue": + issue(what) + if clean_shutdown: + print "daemon exiting cleanly" + break diff --git a/server-ca/daemons/logging-daemon.py b/server-ca/daemons/logging-daemon.py new file mode 100755 index 000000000..af0b2aee3 --- /dev/null +++ b/server-ca/daemons/logging-daemon.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python + +# This daemon runs on the CA side to handle logging. + +import redis, signal + +r = redis.Redis() +ps = r.pubsub() + +debug = "debug" in sys.argv +clean_shutdown = False + +from daemon_common import signal_handler + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + +ps.subscribe(["logs", "exit"]) +for message in ps.listen(): + if message["type"] != "message": + continue + if message["channel"] == "logs": + if debug: print message["data"] + continue + if message["channel"] == "exit": + break + if clean_shutdown: + print "daemon exiting cleanly" + break diff --git a/server-ca/daemons/makechallenge-daemon.py b/server-ca/daemons/makechallenge-daemon.py new file mode 100755 index 000000000..3260dba42 --- /dev/null +++ b/server-ca/daemons/makechallenge-daemon.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +# This daemon runs on the CA side to look for requests in +# the database that are waiting for challenges to be issued. + +import redis, redis_lock, time, sys, signal + +r = redis.Redis() +ps = r.pubsub() + +debug = "debug" in sys.argv +clean_shutdown = False + +from daemon_common import signal_handler, short, ancient, random, random_raw + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + +def makechallenge(session): + if r.hget(session, "live") != "True": + # This session has died due to some other reason, like an + # illegal request or timeout, since it entered makechallenge + # state. Consequently, we're not allowed to advance its + # state any further, and it should be removed from the + # pending-requests queue and not pushed into any other queue. + # We don't have to remove it from pending-makechallenge + # because the caller has already done so. + if debug: print "removing expired session", short(session) + r.lrem("pending-requests", session) + return + # Currently only makes challenges of type 0 (DomainValidateSNI) + # This challenge type has three internal data parameters: + # dvsni:nonce, dvsni:r, dvsni:ext + # This challenge type sends three data parameters to the client: + # nonce, y = E(r), ext + # + # Make one challenge for each name. (This one-to-one relationship + # is not an inherent protocol requirement!) + names = r.lrange("%s:names" % session, 0, -1) + if debug: print "%s: new valid request" % session + if debug: print "%s: from requesting client at %s" % (short(session), r.hget(session, "client-addr")) + if debug: print "%s: for %d names: %s" % (short(session), len(names), ", ".join(names)) + for i, name in enumerate(names): + challenge = "%s:%d" % (session, i) + r.hset(challenge, "challtime", int(time.time())) + r.hset(challenge, "type", 0) # DomainValidateSNI + r.hset(challenge, "name", name) + r.hset(challenge, "satisfied", False) + r.hset(challenge, "failed", False) + r.hset(challenge, "dvsni:nonce", random()) + r.hset(challenge, "dvsni:r", random_raw()) + r.hset(challenge, "dvsni:ext", "1.3.3.7") + # Keep accurate count of how many challenges exist in this session. + r.hincrby(session, "challenges", 1) + if debug: print "created new challenge", short(challenge) + if True: # challenges have been created + r.hset(session, "state", "testchallenge") + else: + r.lpush("pending-makechallenge", session) + +while True: + (where, what) = r.brpop(["exit", "pending-makechallenge"]) + if where == "exit": + r.lpush("exit", "exit") + break + elif where == "pending-makechallenge": + makechallenge(what) + if clean_shutdown: + print "daemon exiting cleanly" + break diff --git a/server-ca/daemons/testchallenge-daemon.py b/server-ca/daemons/testchallenge-daemon.py new file mode 100755 index 000000000..f7a95cc16 --- /dev/null +++ b/server-ca/daemons/testchallenge-daemon.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python + +# This daemon runs on the CA side to look for requests in +# the database that are waiting for the CA to test whether +# challenges have been met, and to perform this test. + +import redis, redis_lock, time, sys, signal +from sni_challenge.verify import verify_challenge + +r = redis.Redis() +ps = r.pubsub() + +debug = "debug" in sys.argv +clean_shutdown = False + +from daemon_common import signal_handler, short, ancient, random, random_raw + +def signal_handler(a, b): + global clean_shutdown + clean_shutdown = True + r.publish("exit", "clean-exit") + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + +def testchallenge(session): + if r.hget(session, "live") != "True": + # This session has died due to some other reason, like an + # illegal request or timeout, since it entered testchallenge + # state. Consequently, we're not allowed to advance its + # state any further, and it should be removed from the + # pending-requests queue and not pushed into any other queue. + # We don't have to remove it from pending-testchallenge + # because the caller has already done so. + if debug: print "removing expired session", short(session) + r.lrem("pending-requests", session) + return + if r.hget(session, "state") != "testchallenge": + return + if int(r.hincrby(session, "times-tested", 1)) > 3: + # This session has already been unsuccessfully tested three + # times. Clearly, something has gone wrong or the client is + # just trying to annoy us. Do not allow it to be tested again. + r.hset(session, "live", False) + r.lrem("pending-requests", session) + return + all_satisfied = True + for i, name in enumerate(r.lrange("%s:names" % session, 0, -1)): + challenge = "%s:%d" % (session, i) + if debug: print "testing challenge", short(challenge) + challtime = int(r.hget(challenge, "challtime")) + challtype = int(r.hget(challenge, "type")) + name = r.hget(challenge, "name") + satisfied = r.hget(challenge, "satisfied") == "True" + failed = r.hget(challenge, "failed") == "True" + # TODO: check whether this challenge is too old + if not satisfied and not failed: + # if debug: print "challenge", short(challenge), "being tested" + if challtype == 0: # DomainValidateSNI + if debug: print "\tbeginning dvsni test to %s" % name + dvsni_nonce = r.hget(challenge, "dvsni:nonce") + dvsni_r = r.hget(challenge, "dvsni:r") + dvsni_ext = r.hget(challenge, "dvsni:ext") + direct_result, direct_reason = verify_challenge(name, dvsni_r, dvsni_nonce, False) + proxy_result, proxy_reason = verify_challenge(name, dvsni_r, dvsni_nonce, True) + if debug: + print "\t...direct probe: %s (%s)" % (direct_result, direct_reason) + print "\tTor proxy probe: %s (%s)" % (proxy_result, proxy_reason) + if direct_result and proxy_result: + r.hset(challenge, "satisfied", True) + else: + all_satisfied = False + # TODO: distinguish permanent and temporarily failures + # can cause a permanent failure under some conditions, causing + # the session to become dead. TODO: need to articulate what + # those conditions are + else: + # Don't know how to handle this challenge type + all_satisfied = False + elif not satisfied: + if debug: print "\tchallenge was not attempted" + all_satisfied = False + if all_satisfied: + # Challenges all succeeded, so we should prepare to issue + # the requested cert. + # TODO: double-check that there were > 0 challenges, + # so that we don't somehow mistakenly issue a cert in + # response to an empty list of challenges (even though + # the daemon that put this session on the queue should + # also have implicitly guaranteed this). + if debug: print "\t** All challenges satisfied; request %s GRANTED" % short(session) + r.hset(session, "state", "issue") + r.lpush("pending-issue", session) + else: + # Some challenges were not verified. In the current + # design of this daemon, the client must contact + # us again to request that the session be placed back + # in pending-testchallenge! + pass + +while True: + (where, what) = r.brpop(["exit", "pending-testchallenge"]) + if where == "exit": + r.lpush("exit", "exit") + break + elif where == "pending-testchallenge": + with redis_lock(r, "lock-" + what): + testchallenge(what) + if clean_shutdown: + print "daemon exiting cleanly" + break diff --git a/server-ca/start_daemons b/server-ca/start_daemons new file mode 100755 index 000000000..21ccc299f --- /dev/null +++ b/server-ca/start_daemons @@ -0,0 +1,22 @@ +#!/bin/sh + +# By default, daemons are not being told to exit! +redis-cli del exit + +echo "Starting logger daemon..." +nohup ./logging-daemon.py & + +# TODO: an attempt to reconstruct or expire sessions from previous +# runs of the daemon should occur here. + +echo "Starting issue daemon..." +nohup daemons/issue-daemon.py & + +for instance in a b c +do + echo "Starting testchallenge daemon $instance..." + nohup daemons/testchallenge-daemon.py & +done + +echo "Starting makechallenge daemon..." +nohup daemons/makechallenge-daemon.py & diff --git a/server-ca/stop_daemons b/server-ca/stop_daemons new file mode 100755 index 000000000..7cb1310ba --- /dev/null +++ b/server-ca/stop_daemons @@ -0,0 +1,7 @@ +#!/bin/sh + +redis-cli lpush exit exit +redis-cli publish exit clean-exit + +# TODO: sleep a bit and then actually kill the daemon processes if they +# don't exit From 900120de95dc8ad66f7fa7aad0a26f933dca84f8 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Thu, 18 Oct 2012 17:20:59 -0700 Subject: [PATCH 09/13] move docstring to the right place --- server-ca/CSR.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server-ca/CSR.py b/server-ca/CSR.py index 85bffcd9a..1e907d130 100644 --- a/server-ca/CSR.py +++ b/server-ca/CSR.py @@ -37,8 +37,8 @@ def parse(csr): return False def modulusbits(key): - key = str(key) """How many bits are in the modulus of this key?""" + key = str(key) bio = M2Crypto.BIO.MemoryBuffer(key) pubkey = M2Crypto.RSA.load_pub_key_bio(bio) return len(pubkey) From 6fc950f28c25e7cc1181b5ce66d5def0ad6da262 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Thu, 18 Oct 2012 17:29:15 -0700 Subject: [PATCH 10/13] TODO on telling server when challenges couldn't be met --- trustify/client/client.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/trustify/client/client.py b/trustify/client/client.py index 9a9602a9b..0c1b26694 100644 --- a/trustify/client/client.py +++ b/trustify/client/client.py @@ -451,6 +451,8 @@ def authenticate(): for challenge in challenges: if not challenge.perform(quiet=curses): + # TODO: In this case the client should probably send a failure + # to the server. logger.fatal("challenge failed") sys.exit(1) logger.info("Configured Apache for challenge; waiting for verification...") From a70cda636a68c6641e29efe4df32beb1f3f3ac0e Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Mon, 22 Oct 2012 16:39:47 -0700 Subject: [PATCH 11/13] formalize disabling Observatory check for now --- server-ca/chocolate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server-ca/chocolate.py b/server-ca/chocolate.py index caa914667..4e90fe81c 100755 --- a/server-ca/chocolate.py +++ b/server-ca/chocolate.py @@ -289,11 +289,13 @@ class session(object): return try: # Check whether the SSL Observatory has seen a valid cert for this name. - if urllib2.urlopen("https://observatory.eff.org/check_name?domain_name=%s" % san).read().strip() != "False": + # XXX: This has been disabled because this API is unavailable + # or unreliable. + if False and urllib2.urlopen("https://observatory.eff.org/check_name?domain_name=%s" % san).read().strip() != "False": self.die(r, r.CannotIssueThatName, uri="https://ca.example.com/failures/observatory?%s" % san) return wildcard_variant = "*." + san.partition(".")[2] - if urllib2.urlopen("https://observatory.eff.org/check_name?domain_name=%s" % wildcard_variant).read().strip() != "False": + if False and urllib2.urlopen("https://observatory.eff.org/check_name?domain_name=%s" % wildcard_variant).read().strip() != "False": self.die(r, r.CannotIssueThatName, uri="https://ca.example.com/failures/observatory?%s" % san) return except urllib2.HTTPError: From ef7489d0800f4896bb876dc6023cead99e9ce6e2 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Wed, 24 Oct 2012 23:33:45 -0700 Subject: [PATCH 12/13] warning: we apparently need to replace urllib2 --- trustify/client/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/trustify/client/client.py b/trustify/client/client.py index 0c1b26694..9cd18343f 100644 --- a/trustify/client/client.py +++ b/trustify/client/client.py @@ -4,6 +4,7 @@ import M2Crypto # It is OK to use the upstream M2Crypto here instead of our modified # version. import urllib2 +# XXX TODO: per https://docs.google.com/document/pub?id=1roBIeSJsYq3Ntpf6N0PIeeAAvu4ddn7mGo6Qb7aL7ew, urllib2 is unsafe (!) and must be replaced import os, grp, pwd, sys, time, random, sys import hashlib import subprocess From 5fb6a5b07dd58f0c7d8edf65ee24635a685287f5 Mon Sep 17 00:00:00 2001 From: Seth Schoen Date: Mon, 5 Nov 2012 17:32:35 -0800 Subject: [PATCH 13/13] som eminor changes, fixes, and reorganization --- server-ca/CONFIG.py | 3 +- server-ca/chocolate.py | 3 +- server-ca/daemons/daemon_common.py | 44 --------- server-ca/daemons/issue-daemon.py | 72 -------------- server-ca/daemons/logging-daemon.py | 29 ------ server-ca/daemons/makechallenge-daemon.py | 70 -------------- server-ca/daemons/testchallenge-daemon.py | 111 ---------------------- server-ca/sni_challenge/verify.py | 4 +- server-ca/start_daemons | 6 +- 9 files changed, 9 insertions(+), 333 deletions(-) delete mode 100644 server-ca/daemons/daemon_common.py delete mode 100755 server-ca/daemons/issue-daemon.py delete mode 100755 server-ca/daemons/logging-daemon.py delete mode 100755 server-ca/daemons/makechallenge-daemon.py delete mode 100755 server-ca/daemons/testchallenge-daemon.py diff --git a/server-ca/CONFIG.py b/server-ca/CONFIG.py index 75017aa93..faaabe7c6 100644 --- a/server-ca/CONFIG.py +++ b/server-ca/CONFIG.py @@ -28,4 +28,5 @@ hashcash_expiry = 60*60 extra_name_blacklist = ["eff.org", "www.eff.org"] # Name of file containing cert chain -cert_chain_file = "chain.pem" +cert_chain_file = "demoCA/cacert.pem" +debug = True diff --git a/server-ca/chocolate.py b/server-ca/chocolate.py index 4e90fe81c..980790815 100755 --- a/server-ca/chocolate.py +++ b/server-ca/chocolate.py @@ -2,6 +2,7 @@ import web, redis, time, binascii, re, urllib2 import CSR +from redis_lock import redis_lock from trustify.protocol import hashcash from CSR import M2Crypto from Crypto import Random @@ -162,7 +163,7 @@ class session(object): """Is the hashcash string h valid for a request to this server for signing n names?""" if hashcash.check(stamp=h, resource=chocolate_server_name, \ - bits=difficulty*n, check_expiration=hashcash_expiry): + bits=difficulty, check_expiration=hashcash_expiry): # sessions.sadd returns True upon adding to a set and # False if the item was already in the set. return sessions.sadd("spent-hashcash", h) diff --git a/server-ca/daemons/daemon_common.py b/server-ca/daemons/daemon_common.py deleted file mode 100644 index 39746dc29..000000000 --- a/server-ca/daemons/daemon_common.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -# functions common to the various kinds of daemon - -# TODO: define a log function that sends a pubsub message to the -# logger daemon - -import time, binascii -from Crypto import Random - -def signal_handler(a, b): - global clean_shutdown - clean_shutdown = True - r.publish("exit", "clean-exit") - r.lpush("exit", "clean-exit") - -def short(session): - """Return the first 12 bytes of a session ID, or, for a - challenge ID, the challenge ID with the session ID truncated.""" - tmp = session.partition(":") - return tmp[0][:12] + "..." + tmp[1] + tmp[2] - -def ancient(session, state): - """Given that this session is in the specified named state, - decide whether the daemon should forcibly expire it for being too - old, even if no client request has caused the serve to mark the - session as expired. This is most relevant to truly abandoned - sessions that no client ever asks about.""" - age = int(time.time()) - int(r.hget(session, "created")) - if state == "makechallenge" and age > 120: - if debug: print "considered", short(session), "ancient" - return True - if state == "testchallenge" and age > 600: - if debug: print "considered", short(session), "ancient" - return True - return False - -def random(): - """Return 64 hex digits representing a new 32-byte random number.""" - return binascii.hexlify(Random.get_random_bytes(32)) - -def random_raw(): - """Return 32 random bytes.""" - return Random.get_random_bytes(32) diff --git a/server-ca/daemons/issue-daemon.py b/server-ca/daemons/issue-daemon.py deleted file mode 100755 index 0b5a8a9f6..000000000 --- a/server-ca/daemons/issue-daemon.py +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env python - -# This daemon runs on the CA side to look for requests in -# the database that are waiting for a cert to be issued. - -import redis, redis_lock, CSR, sys, signal -from sni_challenge.verify import verify_challenge -from Crypto import Random - -r = redis.Redis() -ps = r.pubsub() -issue_lock = redis_lock.redis_lock(r, "issue_lock") -# This lock guards the ability to issue certificates with "openssl ca", -# which has no locking of its own. We don't need locking for the updates -# that the daemon performs on the sessions in the database because the -# queues pending-makechallenge, pending-testchallenge, and pending-issue -# are updated atomically and the daemon only ever acts on sessions that it -# has removed from a queue. - -debug = "debug" in sys.argv -clean_shutdown = False - -from daemon_common import signal_handler, short, ancient, random, random_raw - -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - -def issue(session): - if r.hget(session, "live") != "True": - # This session has died due to some other reason, like an - # illegal request or timeout, since it entered testchallenge - # state. Consequently, we're not allowed to advance its - # state any further, and it should be removed from the - # pending-requests queue and not pushed into any other queue. - # We don't have to remove it from pending-testchallenge - # because the caller has already done so. - # - # Having a session in pending-issue die is a very weird case - # that probably suggests that timeouts are set incorrectly - # or that the client is misbehaving very badly. This means - # that a request passed all of its challenges but the - # session nonetheless died for some reason unrelated to failing - # challenges before the cert could be issued. Normally, this - # should never happen. - if debug: print "removing expired (issue-state!?) session", short(session) - r.lrem("pending-requests", session) - return - if r.hget(session, "state") != "issue": - return - csr = r.hget(session, "csr") - names = r.lrange("%s:names" % session, 0, -1) - with issue_lock: - cert = CSR.issue(csr, names) - r.hset(session, "cert", cert) - if cert: # once issuing cert succeeded - if debug: print "%s: issued certificate for names: %s" % (short(session), ", ".join(names)) - r.hset(session, "state", "done") - # r.lpush("pending-done", session) - else: # should not be reached in deployed version - if debug: print "issuing for", short(session), "failed" - r.lpush("pending-issue", session) - -while True: - (where, what) = r.brpop(["exit", "pending-issue"]) - if where == "exit": - r.lpush("exit", "exit") - break - elif where == "pending-issue": - issue(what) - if clean_shutdown: - print "daemon exiting cleanly" - break diff --git a/server-ca/daemons/logging-daemon.py b/server-ca/daemons/logging-daemon.py deleted file mode 100755 index af0b2aee3..000000000 --- a/server-ca/daemons/logging-daemon.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -# This daemon runs on the CA side to handle logging. - -import redis, signal - -r = redis.Redis() -ps = r.pubsub() - -debug = "debug" in sys.argv -clean_shutdown = False - -from daemon_common import signal_handler - -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - -ps.subscribe(["logs", "exit"]) -for message in ps.listen(): - if message["type"] != "message": - continue - if message["channel"] == "logs": - if debug: print message["data"] - continue - if message["channel"] == "exit": - break - if clean_shutdown: - print "daemon exiting cleanly" - break diff --git a/server-ca/daemons/makechallenge-daemon.py b/server-ca/daemons/makechallenge-daemon.py deleted file mode 100755 index 3260dba42..000000000 --- a/server-ca/daemons/makechallenge-daemon.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python - -# This daemon runs on the CA side to look for requests in -# the database that are waiting for challenges to be issued. - -import redis, redis_lock, time, sys, signal - -r = redis.Redis() -ps = r.pubsub() - -debug = "debug" in sys.argv -clean_shutdown = False - -from daemon_common import signal_handler, short, ancient, random, random_raw - -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - -def makechallenge(session): - if r.hget(session, "live") != "True": - # This session has died due to some other reason, like an - # illegal request or timeout, since it entered makechallenge - # state. Consequently, we're not allowed to advance its - # state any further, and it should be removed from the - # pending-requests queue and not pushed into any other queue. - # We don't have to remove it from pending-makechallenge - # because the caller has already done so. - if debug: print "removing expired session", short(session) - r.lrem("pending-requests", session) - return - # Currently only makes challenges of type 0 (DomainValidateSNI) - # This challenge type has three internal data parameters: - # dvsni:nonce, dvsni:r, dvsni:ext - # This challenge type sends three data parameters to the client: - # nonce, y = E(r), ext - # - # Make one challenge for each name. (This one-to-one relationship - # is not an inherent protocol requirement!) - names = r.lrange("%s:names" % session, 0, -1) - if debug: print "%s: new valid request" % session - if debug: print "%s: from requesting client at %s" % (short(session), r.hget(session, "client-addr")) - if debug: print "%s: for %d names: %s" % (short(session), len(names), ", ".join(names)) - for i, name in enumerate(names): - challenge = "%s:%d" % (session, i) - r.hset(challenge, "challtime", int(time.time())) - r.hset(challenge, "type", 0) # DomainValidateSNI - r.hset(challenge, "name", name) - r.hset(challenge, "satisfied", False) - r.hset(challenge, "failed", False) - r.hset(challenge, "dvsni:nonce", random()) - r.hset(challenge, "dvsni:r", random_raw()) - r.hset(challenge, "dvsni:ext", "1.3.3.7") - # Keep accurate count of how many challenges exist in this session. - r.hincrby(session, "challenges", 1) - if debug: print "created new challenge", short(challenge) - if True: # challenges have been created - r.hset(session, "state", "testchallenge") - else: - r.lpush("pending-makechallenge", session) - -while True: - (where, what) = r.brpop(["exit", "pending-makechallenge"]) - if where == "exit": - r.lpush("exit", "exit") - break - elif where == "pending-makechallenge": - makechallenge(what) - if clean_shutdown: - print "daemon exiting cleanly" - break diff --git a/server-ca/daemons/testchallenge-daemon.py b/server-ca/daemons/testchallenge-daemon.py deleted file mode 100755 index f7a95cc16..000000000 --- a/server-ca/daemons/testchallenge-daemon.py +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env python - -# This daemon runs on the CA side to look for requests in -# the database that are waiting for the CA to test whether -# challenges have been met, and to perform this test. - -import redis, redis_lock, time, sys, signal -from sni_challenge.verify import verify_challenge - -r = redis.Redis() -ps = r.pubsub() - -debug = "debug" in sys.argv -clean_shutdown = False - -from daemon_common import signal_handler, short, ancient, random, random_raw - -def signal_handler(a, b): - global clean_shutdown - clean_shutdown = True - r.publish("exit", "clean-exit") - -signal.signal(signal.SIGTERM, signal_handler) -signal.signal(signal.SIGINT, signal_handler) - -def testchallenge(session): - if r.hget(session, "live") != "True": - # This session has died due to some other reason, like an - # illegal request or timeout, since it entered testchallenge - # state. Consequently, we're not allowed to advance its - # state any further, and it should be removed from the - # pending-requests queue and not pushed into any other queue. - # We don't have to remove it from pending-testchallenge - # because the caller has already done so. - if debug: print "removing expired session", short(session) - r.lrem("pending-requests", session) - return - if r.hget(session, "state") != "testchallenge": - return - if int(r.hincrby(session, "times-tested", 1)) > 3: - # This session has already been unsuccessfully tested three - # times. Clearly, something has gone wrong or the client is - # just trying to annoy us. Do not allow it to be tested again. - r.hset(session, "live", False) - r.lrem("pending-requests", session) - return - all_satisfied = True - for i, name in enumerate(r.lrange("%s:names" % session, 0, -1)): - challenge = "%s:%d" % (session, i) - if debug: print "testing challenge", short(challenge) - challtime = int(r.hget(challenge, "challtime")) - challtype = int(r.hget(challenge, "type")) - name = r.hget(challenge, "name") - satisfied = r.hget(challenge, "satisfied") == "True" - failed = r.hget(challenge, "failed") == "True" - # TODO: check whether this challenge is too old - if not satisfied and not failed: - # if debug: print "challenge", short(challenge), "being tested" - if challtype == 0: # DomainValidateSNI - if debug: print "\tbeginning dvsni test to %s" % name - dvsni_nonce = r.hget(challenge, "dvsni:nonce") - dvsni_r = r.hget(challenge, "dvsni:r") - dvsni_ext = r.hget(challenge, "dvsni:ext") - direct_result, direct_reason = verify_challenge(name, dvsni_r, dvsni_nonce, False) - proxy_result, proxy_reason = verify_challenge(name, dvsni_r, dvsni_nonce, True) - if debug: - print "\t...direct probe: %s (%s)" % (direct_result, direct_reason) - print "\tTor proxy probe: %s (%s)" % (proxy_result, proxy_reason) - if direct_result and proxy_result: - r.hset(challenge, "satisfied", True) - else: - all_satisfied = False - # TODO: distinguish permanent and temporarily failures - # can cause a permanent failure under some conditions, causing - # the session to become dead. TODO: need to articulate what - # those conditions are - else: - # Don't know how to handle this challenge type - all_satisfied = False - elif not satisfied: - if debug: print "\tchallenge was not attempted" - all_satisfied = False - if all_satisfied: - # Challenges all succeeded, so we should prepare to issue - # the requested cert. - # TODO: double-check that there were > 0 challenges, - # so that we don't somehow mistakenly issue a cert in - # response to an empty list of challenges (even though - # the daemon that put this session on the queue should - # also have implicitly guaranteed this). - if debug: print "\t** All challenges satisfied; request %s GRANTED" % short(session) - r.hset(session, "state", "issue") - r.lpush("pending-issue", session) - else: - # Some challenges were not verified. In the current - # design of this daemon, the client must contact - # us again to request that the session be placed back - # in pending-testchallenge! - pass - -while True: - (where, what) = r.brpop(["exit", "pending-testchallenge"]) - if where == "exit": - r.lpush("exit", "exit") - break - elif where == "pending-testchallenge": - with redis_lock(r, "lock-" + what): - testchallenge(what) - if clean_shutdown: - print "daemon exiting cleanly" - break diff --git a/server-ca/sni_challenge/verify.py b/server-ca/sni_challenge/verify.py index 627ed4494..334b778a6 100644 --- a/server-ca/sni_challenge/verify.py +++ b/server-ca/sni_challenge/verify.py @@ -68,8 +68,8 @@ def verify_challenge(address, r, nonce, socksify=False): sni_support.set_sni_ext(conn.ssl, sni_name) try: conn.connect((address, 443)) - except: - return False, "Connection to SSL Server failed" + except Exception, e: + return False, "Connection to SSL Server failed (%s)" % str(e) cert_chain = conn.get_peer_cert_chain() diff --git a/server-ca/start_daemons b/server-ca/start_daemons index 21ccc299f..3457c4c53 100755 --- a/server-ca/start_daemons +++ b/server-ca/start_daemons @@ -10,13 +10,13 @@ nohup ./logging-daemon.py & # runs of the daemon should occur here. echo "Starting issue daemon..." -nohup daemons/issue-daemon.py & +nohup ./issue-daemon.py & for instance in a b c do echo "Starting testchallenge daemon $instance..." - nohup daemons/testchallenge-daemon.py & + nohup ./testchallenge-daemon.py & done echo "Starting makechallenge daemon..." -nohup daemons/makechallenge-daemon.py & +nohup ./makechallenge-daemon.py &