From 204a6adc4854b95b52b6143712463b082f015067 Mon Sep 17 00:00:00 2001 From: emdee Date: Sun, 27 Nov 2022 01:10:18 +0000 Subject: [PATCH] Async added and removed --- exclude_badExits.bash | 2 +- exclude_badExits.py | 631 ++++++++++++++++++++++++++---------------- support_onions.py | 4 +- trustor_poc.py | 104 +++++-- 4 files changed, 483 insertions(+), 258 deletions(-) diff --git a/exclude_badExits.bash b/exclude_badExits.bash index d5d46aa..fd8eafb 100644 --- a/exclude_badExits.bash +++ b/exclude_badExits.bash @@ -14,7 +14,7 @@ LARGS=( # you may have a special python for installed packages EXE=`which python3.bash` LARGS+=( - --strict_nodes 0 + --strict_nodes 1 --points_timeout 120 --proxy-host 127.0.0.1 --proxy-port $SOCKS_PORT diff --git a/exclude_badExits.py b/exclude_badExits.py index 4561c6b..ad72d6d 100644 --- a/exclude_badExits.py +++ b/exclude_badExits.py @@ -136,8 +136,14 @@ from support_onions import (bAreWeConnected, icheck_torrc, lIntroductionPoints, yKNOWN_NODNS, zResolveDomain) from trustor_poc import TrustorError, idns_validate -from trustor_poc import oDownloadUrlUrllib3 as oDownloadUrl - +try: + import xxxhttpx + import asyncio + from trustor_poc import oDownloadUrlHttpx +except: + httpx = None + from trustor_poc import oDownloadUrlUrllib3Socks as oDownloadUrl + global LOG import logging import warnings @@ -157,16 +163,32 @@ aTRUST_DB_INDEX = {} aRELAYS_DB = {} aRELAYS_DB_INDEX = {} aFP_EMAIL = {} +aDOMAIN_FPS = {} sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/" # You can call this while bootstrapping sEXCLUDE_EXIT_KEY = 'ExcludeNodes' sINCLUDE_EXIT_KEY = 'ExitNodes' -sINCLUDE_GUARD_KEY = 'EntryNodes' -oBAD_NODES = {} oBAD_ROOT = 'BadNodes' -oBAD_NODES[oBAD_ROOT] = {} -oBAD_NODES[oBAD_ROOT]['ExcludeNodes'] = {} +oBAD_NODES = safe_load(""" +BadNodes: + ExcludeDomains: [] + ExcludeNodes: + BadExit: [] +""") + +sGOOD_ROOT = 'GoodNodes' +sINCLUDE_GUARD_KEY = 'EntryNodes' +sEXCLUDE_DOMAINS = 'ExcludeDomains' +oGOOD_NODES = safe_load(""" +GoodNodes: + EntryNodes: [] + Relays: + ExitNodes: [] + IntroductionPoints: [] + Onions: [] + Services: [] +""") lKNOWN_NODNS = [] tMAYBE_NODNS = set() @@ -184,21 +206,19 @@ def lYamlBadNodes(sFile, oBAD_NODES = safe_load(oFd) # BROKEN -# root = 'ExcludeNodes' +# root = sEXCLUDE_EXIT_KEY # for elt in o[oBAD_ROOT][root][section].keys(): # if lWanted and elt not in lWanted: continue # # l += o[oBAD_ROOT][root][section][elt] - l = oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] + l = oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS))) - root = 'ExcludeDomains' + root = sEXCLUDE_DOMAINS if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]: tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root]) return l -oGOOD_NODES = {} -oGOOD_ROOT = 'GoodNodes' def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): global oGOOD_NODES l = [] @@ -207,8 +227,8 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): with open(sFile, 'rt') as oFd: o = safe_load(oFd) oGOOD_NODES = o - if 'GuardNodes' in o[oGOOD_ROOT].keys(): - l = o[oGOOD_ROOT]['GuardNodes'] + if 'EntryNodes' in o[sGOOD_ROOT].keys(): + l = o[sGOOD_ROOT]['EntryNodes'] # yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml return l @@ -236,18 +256,20 @@ lAT_REPS = ['[]', ' at ', '(at)', '[at]', '', '(att)', '_at_', lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>', '<:dot:>', '|dot--|', ] -lNO_EMAIL = ['', - 'not@needed.com', +lNO_EMAIL = [ + '', '', '', - 'not a person ', - r'', '@snowden', 'ano ano@fu.dk', 'anonymous', 'anonymous@buzzzz.com', 'check http://highwaytohoell.de', + 'no-spam@tor.org', 'no@no.no', + 'noreply@bytor.com', + 'not a person ', + 'not@needed.com', 'not@needed.com', 'not@re.al', 'nothanks', @@ -255,6 +277,7 @@ lNO_EMAIL = ['', 'ur@mom.com', 'your@e-mail', 'your@email.com', + r'', ] def sCleanEmail(s): s = s.lower() @@ -297,17 +320,26 @@ def aCleanContact(a): a.update({'fps': []}) return a -def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050): +def bVerifyContact(a=None, fp=None, https_cafile=None): + global aFP_EMAIL global tBAD_URLS global lKNOWN_NODNS + global aTRUST_DB + global aTRUST_DB_INDEX + assert a + assert fp + assert https_cafile + keys = list(a.keys()) a = aCleanContact(a) + a['fp'] = fp if 'email' not in keys: a['email'] = '' if 'ciissversion' not in keys: aFP_EMAIL[fp] = a['email'] LOG.warn(f"{fp} 'ciissversion' not in {keys}") - a['ciissversion'] = 2 + return a + # test the url for fps and add it to the array if 'proof' not in keys: aFP_EMAIL[fp] = a['email'] @@ -343,7 +375,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) aFP_EMAIL[fp] = a['email'] LOG.debug(f"{fp} {domain} does not resolve") lKNOWN_NODNS.append(domain) - return {} + return a if a['proof'] in ['dns-rsa']: # only support uri for now @@ -354,16 +386,56 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) dnssec_DS_file='dnssec-root-trust', ) == 0: pass - LOG.warn(f"{fp} proof={a['proof']} not supported yet") + LOG.warn(f"{fp} proof={a['proof']} - assumed good") + a['fps'] = [fp] + aTRUST_DB_INDEX[fp] = a return a + return True +# async +# If we keep a cache of FPs that we have gotten by downloading a URL +# we can avoid re-downloading the URL of other FP in the list of relays. +# If we paralelize the gathering of the URLs, we may have simultaneous +# gathers of the same URL from different relays, defeating the advantage +# of going parallel. The cache is global aDOMAIN_FPS. +def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None): + global aFP_EMAIL + global tBAD_URLS + global lKNOWN_NODNS + global aDOMAIN_FPS + + assert a + assert fp + assert https_cafile + + r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile) + if r is not True: + return r + + domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/') + if domain in aDOMAIN_FPS.keys(): + a['fps'] = aDOMAIN_FPS[domain] + return a + # LOG.debug(f"{len(keys)} contact fields for {fp}") url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt" + if url in aDOMAIN_FPS.keys(): + a['fps'] = aDOMAIN_FPS[url] + return a + if bAreWeConnected() is False: + raise SystemExit("we are not connected") try: - LOG.debug(f"Downloading from {domain} for {fp}") - o = oDownloadUrl(url, https_cafile, - timeout=timeout, host=host, port=port, - content_type='text/plain') + if httpx: + LOG.debug(f"Downloading from {domain} for {fp}") + # await + o = oDownloadUrl(url, https_cafile, + timeout=timeout, host=host, port=port, + content_type='text/plain') + else: + LOG.debug(f"Downloading from {domain} for {fp}") + o = oDownloadUrl(url, https_cafile, + timeout=timeout, host=host, port=port, + content_type='text/plain') # requests response: text "reason", "status_code" except AttributeError as e: LOG.exception(f"AttributeError downloading from {domain} {e}") @@ -384,34 +456,57 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) except (BaseException) as e: LOG.error(f"Exception {type(e)} downloading from {domain} {e}") else: - if hasattr(o, 'status'): - status_code = o.status - else: - status_code = o.status_code - if status_code >= 300: - aFP_EMAIL[fp] = a['email'] - LOG.warn(f"Error from {domain} {status_code} {o.reason}") - # any reason retry? - tBAD_URLS.add(a['url']) - return a + a = aContactFps(oargs, a, o, domain) + LOG.debug(f"Downloaded from {domain} {len(a['fps'])} FPs for {fp}") + aDOMAIN_FPS[domain] = a['fps'] + url = a['url'] + aDOMAIN_FPS[url] = a['fps'] + return a - if hasattr(o, 'text'): - data = o.text - else: - data = str(o.data, 'UTF-8') - l = data.upper().strip().split('\n') - LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes") +def aContactFps(oargs, a, o, domain): + global aFP_EMAIL + global tBAD_URLS + global lKNOWN_NODNS + global aDOMAIN_FPS - a['modified'] = int(time.time()) - if not l: - LOG.warn(f"Downloading from {domain} empty for {fp}") - else: - a['fps'] = [elt.strip() for elt in l if elt \ - and not elt.startswith('#')] - LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") - for elt in a['fps']: - if len(elt) != 40: - LOG.warn(f"len !=40 from {domain} '{elt}'") + if hasattr(o, 'status'): + status_code = o.status + else: + status_code = o.status_code + if status_code >= 300: + aFP_EMAIL[fp] = a['email'] + LOG.warn(f"Error from {domain} {status_code} {o.reason}") + # any reason retry? + tBAD_URLS.add(a['url']) + return a + + if hasattr(o, 'text'): + data = o.text + else: + data = str(o.data, 'UTF-8') + l = data.upper().strip().split('\n') + LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes") + if oargs.wellknown_output: + sdir = os.path.join(oargs.wellknown_output, domain, + '.well-known', 'tor-relay') + try: + if not os.path.isdir(sdir): + os.makedirs(sdir) + sfile = os.path.join(sdir, "rsa-fingerprint.txt") + with open(sfile, 'wt') as oFd: + oFd.write(data) + except Exception as e: + LOG.warn(f"Error wirting {sfile} {e}") + + a['modified'] = int(time.time()) + if not l: + LOG.warn(f"Downloaded from {domain} empty for {fp}") + else: + a['fps'] = [elt.strip() for elt in l if elt \ + and len(elt) == 40 \ + and not elt.startswith('#')] + LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") + aDOMAIN_FPS[domain] = a['fps'] return a def aParseContact(contact, fp): @@ -432,16 +527,18 @@ def aParseContact(contact, fp): return {} for elt in lelts: if ':' not in elt: + if elt == 'DFRI': + # oddball + continue # hoster:Quintex Alliance Consulting LOG.warn(f"no : in {elt} for {contact} in {fp}") - continue + return {} (key , val,) = elt.split(':', 1) if key == '': continue key = key.rstrip(':') a[key] = val a = aCleanContact(a) -# LOG.debug(f"{fp} {len(a.keys())} fields") return a def aParseContactYaml(contact, fp): @@ -508,8 +605,9 @@ def oMainArgparser(_=None): default=os.path.join(ETC_DIR, 'badcontacts.yaml'), help="Yaml file of bad contacts that bad FPs are using") - parser.add_argument('--strict_nodes', type=int, default=0, choices=[0, 1], - help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable") + parser.add_argument('--strict_nodes', type=str, default=0, + choices=['0', '1'], + help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable") parser.add_argument('--wait_boot', type=int, default=120, help="Seconds to wait for Tor to booststrap") parser.add_argument('--points_timeout', type=int, default=0, @@ -528,10 +626,25 @@ def oMainArgparser(_=None): parser.add_argument('--relays_output', type=str, default=os.path.join(ETC_DIR, 'relays.json'), help="Write the download relays in json to a file") + parser.add_argument('--wellknown_output', type=str, + default=os.path.join(ETC_DIR, 'https'), + help="Write the well-known files to a directory") parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'), help="Write the proof data of the included nodes to a YAML file") return parser +def vwrite_good_contacts(oargs): + global aTRUST_DB + good_contacts_tmp = oargs.good_contacts + '.tmp' + with open(good_contacts_tmp, 'wt') as oFYaml: + yaml.dump(aTRUST_DB, oFYaml) + oFYaml.close() + if os.path.exists(oargs.good_contacts): + bak = oargs.good_contacts +'.bak' + os.rename(oargs.good_contacts, bak) + os.rename(good_contacts_tmp, oargs.good_contacts) + LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}") + def vwrite_badnodes(oargs, oBAD_NODES, slen): if oargs.bad_nodes: tmp = oargs.bad_nodes +'.tmp' @@ -560,6 +673,7 @@ def lget_onionoo_relays(oargs): import requests adata = {} if oargs.relays_output and os.path.exists(oargs.relays_output): + # and less than a day old? LOG.info(f"Getting OO relays from {oargs.relays_output}") try: with open(oargs.relays_output, 'rt') as ofd: @@ -581,9 +695,9 @@ def lget_onionoo_relays(oargs): port=oargs.proxy_port, content_type='') if hasattr(o, 'text'): - data = o.text + sdata = o.text else: - data = str(o.data, 'UTF-8') + sdata = str(o.data, 'UTF-8') except Exception as e: # simplejson.errors.JSONDecodeError # urllib3.exceptions import ConnectTimeoutError, NewConnectionError @@ -592,7 +706,7 @@ def lget_onionoo_relays(oargs): return [] else: LOG.debug(f"Downloaded {surl} {len(sdata)} bytes") - adata = json.loads(data) + adata = json.loads(sdata) else: odata = requests.get(surl, verify=sCAfile) try: @@ -675,15 +789,101 @@ def vwritefinale(oargs, lNotInaRELAYS_DB): # https://onionoo.torproject.org/details LOG.info(f"although it's often broken") -def iMain(lArgs): +def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0): global aTRUST_DB global aTRUST_DB_INDEX - global oBAD_NODES - global oGOOD_NODES - global lKNOWN_NODNS - global aRELAYS_DB - global aRELAYS_DB_INDEX + sofar = '' + fp = b['fp'] + # need to skip urllib3.exceptions.MaxRetryError + if not b or 'fps' not in b or not b['fps'] or not b['url']: + LOG.warn(f"{fp} did NOT VERIFY {sofar}") + LOG.debug(f"{fp} {b} {sofar}") + # If it's giving contact info that doesnt check out + # it could be a bad exit with fake contact info + texclude_set.add(fp) + aBadContacts[fp] = b + return None + if fp not in b['fps']: + LOG.warn(f"{fp} the FP IS NOT in the list of fps {sofar}") + # assume a fp is using a bogus contact + texclude_set.add(fp) + aBadContacts[fp] = b + return False + + LOG.info(f"{fp} GOOD {b['url']} {sofar}") + # add our contact info to the trustdb + aTRUST_DB[fp] = b + for elt in b['fps']: + aTRUST_DB_INDEX[elt] = b + + return True + +def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB): + global aTRUST_DB + global aTRUST_DB_INDEX + + if not is_valid_fingerprint(relay.fingerprint): + LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) + return None + + fp = relay.fingerprint + if aRELAYS_DB and fp not in aRELAYS_DB.keys(): + LOG.warn(f"{fp} not in aRELAYS_DB") + lNotInaRELAYS_DB += [fp] + + if not relay.exit_policy.is_exiting_allowed(): + if sEXCLUDE_EXIT_KEY == sEXCLUDE_EXIT_KEY: + pass # LOG.debug(f"{fp} not an exit {sofar}") + else: + pass # LOG.warn(f"{fp} not an exit {sofar}") + # return None + + # great contact had good fps and we are in them + if fp in aTRUST_DB_INDEX.keys(): + # a cached entry + return None + + if type(relay.contact) == bytes: + # dunno + relay.contact = str(relay.contact, 'UTF-8') + + # fail if the contact is empty + if ('Empty' in lConds and not relay.contact): + LOG.info(f"{fp} skipping empty contact - Empty {sofar}") + texclude_set.add(fp) + return None + + contact = sCleanEmail(relay.contact) + # fail if the contact has no email - unreliable + if ('NoEmail' in lConds and relay.contact and + ('@' not in contact and 'email:' not in contact)): + LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}") + LOG.debug(f"{fp} {relay.contact} {sofar}") + texclude_set.add(fp) + return None + + # fail if the contact does not pass + if ('NotGood' in lConds and relay.contact and + ('ciissversion:' not in relay.contact)): + LOG.info(f"{fp} skipping no ciissversion in contact {sofar}") + LOG.debug(f"{fp} {relay.contact} {sofar}") + texclude_set.add(fp) + return None + + # fail if the contact does not have url: to pass + if relay.contact and 'url' not in relay.contact: + LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}") + LOG.debug(f"{fp} {relay.contact} {sofar}") + if ('NotGood' in lConds): texclude_set.add(fp) + return None + + return True + +def oMainPreamble(lArgs): + global aTRUST_DB + global aTRUST_DB_INDEX + parser = oMainArgparser() oargs = parser.parse_args(lArgs) @@ -691,21 +891,12 @@ def iMain(lArgs): if bAreWeConnected() is False: raise SystemExit("we are not connected") - if os.path.exists(oargs.proxy_ctl): - controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl) - else: - port =int(oargs.proxy_ctl) - controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port) - - vwait_for_controller(controller, oargs.wait_boot) - sFile = oargs.torrc if sFile and os.path.exists(sFile): icheck_torrc(sFile, oargs) - twhitelist_set = set() sFile = oargs.good_contacts - if False and sFile and os.path.exists(sFile): + if sFile and os.path.exists(sFile): try: with open(sFile, 'rt') as oFd: aTRUST_DB = safe_load(oFd) @@ -726,8 +917,16 @@ def iMain(lArgs): except Exception as e: LOG.exception(f"Error reading YAML TrustDB {sFile} {e}") - if oargs.good_contacts: - good_contacts_tmp = oargs.good_contacts + '.tmp' + return oargs + +def oStemController(oargs): + if os.path.exists(oargs.proxy_ctl): + controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl) + else: + port =int(oargs.proxy_ctl) + controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port) + + vwait_for_controller(controller, oargs.wait_boot) elt = controller.get_conf('UseMicrodescriptors') if elt != '0': @@ -740,25 +939,31 @@ def iMain(lArgs): if elt and elt != '{??}': LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already") + return controller + +def tWhitelistSet(oargs, controller): + twhitelist_set = set() + twhitelist_set.update(set(lYamlGoodNodes(oargs.good_nodes))) - LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} GuardNodes from {oargs.good_nodes}") + LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} EntryNodes from {oargs.good_nodes}") - global oGOOD_NODES t = set() - if 'IntroductionPoints' in oGOOD_NODES[oGOOD_ROOT]['Relays'].keys(): - t = set(oGOOD_NODES[oGOOD_ROOT]['Relays']['IntroductionPoints']) + if sGOOD_ROOT in oGOOD_NODES and 'Relays' in oGOOD_NODES[sGOOD_ROOT] and \ + 'IntroductionPoints' in oGOOD_NODES[sGOOD_ROOT]['Relays'].keys(): + t = set(oGOOD_NODES[sGOOD_ROOT]['Relays']['IntroductionPoints']) + w = set() - if 'Services' in oGOOD_NODES[oGOOD_ROOT].keys(): - w = set(oGOOD_NODES[oGOOD_ROOT]['Services']) + if sGOOD_ROOT in oGOOD_NODES and 'Services' in oGOOD_NODES[sGOOD_ROOT].keys(): + w = set(oGOOD_NODES[sGOOD_ROOT]['Services']) twhitelist_set.update(w) if len(w) > 0: LOG.info(f"Whitelist {len(t)} relays from Services") w = set() - if 'Onions' in oGOOD_NODES[oGOOD_ROOT].keys(): + if 'Onions' in oGOOD_NODES[sGOOD_ROOT].keys(): # Provides the descriptor for a hidden service. The **address** is the # '.onion' address of the hidden service - w = set(oGOOD_NODES[oGOOD_ROOT]['Onions']) + w = set(oGOOD_NODES[sGOOD_ROOT]['Onions']) if oargs.white_onions: w.update(oargs.white_onions.split(',')) if oargs.points_timeout > 0: @@ -768,6 +973,9 @@ def iMain(lArgs): LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} IPs for onions") twhitelist_set.update(t) + return twhitelist_set + +def tExcludeSet(oargs): texclude_set = set() if oargs.bad_nodes and os.path.exists(oargs.bad_nodes): if False and oargs.bad_sections: @@ -778,150 +986,117 @@ def iMain(lArgs): section=sEXCLUDE_EXIT_KEY)) LOG.info(f"Preloaded {len(texclude_set)} bad fps") + return texclude_set + +# async +def iMain(lArgs): + global aTRUST_DB + global aTRUST_DB_INDEX + global oBAD_NODES + global oGOOD_NODES + global lKNOWN_NODNS + global aRELAYS_DB + global aRELAYS_DB_INDEX + global tBAD_URLS + + oargs = oMainPreamble(lArgs) + controller = oStemController(oargs) + twhitelist_set = tWhitelistSet(oargs, controller) + texclude_set = tExcludeSet(oargs) + ttrust_db_index = aTRUST_DB_INDEX.keys() tdns_urls = set() iFakeContact = 0 iTotalContacts = 0 aBadContacts = {} lNotInaRELAYS_DB = [] - aRELAYS_DB = {elt['fingerprint'].upper(): elt for - elt in lget_onionoo_relays(oargs) - if 'fingerprint' in elt} - lConds = oargs.bad_on.split(',') iR = 0 relays = controller.get_server_descriptors() + lqueue = [] + socksu = f"socks5://{oargs.proxy_host}:{oargs.proxy_port}" for relay in relays: iR += 1 - if not is_valid_fingerprint(relay.fingerprint): - LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) - continue - relay.fingerprint = relay.fingerprint.upper() + fp = relay.fingerprint = relay.fingerprint.upper() sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" - fp = relay.fingerprint - if aRELAYS_DB and fp not in aRELAYS_DB.keys(): - LOG.warn(f"{fp} not in aRELAYS_DB") - lNotInaRELAYS_DB += [fp] - - if not relay.exit_policy.is_exiting_allowed(): - if sEXCLUDE_EXIT_KEY == 'ExcludeNodes': - pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}") - else: - pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}") - # continue - - # great contact had good fps and we are in them - if relay.fingerprint in aTRUST_DB_INDEX.keys(): - # a cached entry - continue - - if type(relay.contact) == bytes: - # dunno - relay.contact = str(relay.contact, 'UTF-8') - - # fail if the contact is empty - if ('Empty' in lConds and not relay.contact): - LOG.info(f"{fp} skipping empty contact - Empty {sofar}") - texclude_set.add(relay.fingerprint) - continue - - contact = sCleanEmail(relay.contact) - # fail if the contact has no email - unreliable - if ('NoEmail' in lConds and relay.contact and - ('@' not in contact and 'email:' not in contact)): - LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}") - LOG.debug(f"{fp} {relay.contact} {sofar}") - texclude_set.add(relay.fingerprint) - continue - - # fail if the contact does not pass - if ('NotGood' in lConds and relay.contact and - ('ciissversion:' not in relay.contact)): - LOG.info(f"{fp} skipping no ciissversion in contact {sofar}") - LOG.debug(f"{fp} {relay.contact} {sofar}") - texclude_set.add(relay.fingerprint) - continue - + lConds = oargs.bad_on.split(',') + r = bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB) + if r is not True: continue # if it has a ciissversion in contact we count it in total iTotalContacts += 1 - - # fail if the contact does not have url: to pass - if relay.contact and 'url' not in relay.contact: - LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}") - LOG.debug(f"{fp} {relay.contact} {sofar}") - if ('NotGood' in lConds): texclude_set.add(fp) - continue - + # only proceed if 'NotGood' not in lConds: if 'NotGood' not in lConds: continue # fail if the contact does not have url: to pass - a = aParseContact(relay.contact, relay.fingerprint) + a = aParseContact(relay.contact, fp) if not a: - LOG.warn(f"{relay.fingerprint} contact did not parse {sofar}") + LOG.warn(f"{fp} contact did not parse {sofar}") texclude_set.add(fp) continue if 'url' in a and a['url']: # fail if the contact uses a url we already know is bad if a['url'] in tBAD_URLS: - LOG.info(f"{relay.fingerprint} skipping in tBAD_URLS {a['url']} {sofar}") - LOG.debug(f"{relay.fingerprint} {a} {sofar}") - # The fp is using a contact with a URL we know is bad - iFakeContact += 1 - texclude_set.add(relay.fingerprint) + LOG.info(f"{fp} skipping in tBAD_URLS {a['url']} {sofar}") + LOG.debug(f"{fp} {a} {sofar}") + texclude_set.add(fp) continue domain = a['url'].replace('https://', '').replace('http://', '') # fail if the contact uses a domain we already know does not resolve if domain in lKNOWN_NODNS: # The fp is using a contact with a URL we know is bogus - LOG.info(f"{relay.fingerprint} skipping in lKNOWN_NODNS {a} {sofar}") - LOG.debug(f"{relay.fingerprint} {relay} {sofar}") - iFakeContact += 1 - texclude_set.add(relay.fingerprint) + LOG.info(f"{fp} skipping in lKNOWN_NODNS {a} {sofar}") + LOG.debug(f"{fp} {relay} {sofar}") + texclude_set.add(fp) continue - + # drop through + if 'dns-rsa' in relay.contact.lower(): # skip if the contact uses a dns-rsa url we dont handle - target = f"{relay.fingerprint}.{domain}" + target = f"{fp}.{domain}" LOG.info(f"skipping 'dns-rsa' {target} {sofar}") tdns_urls.add(target) continue if 'proof:uri-rsa' in relay.contact.lower(): - # list(a.values())[0] - b = aVerifyContact(a, - relay.fingerprint, - oargs.https_cafile, - timeout=oargs.timeout, - host=oargs.proxy_host, - port=oargs.proxy_port) - # need to skip urllib3.exceptions.MaxRetryError - if not b or 'fps' not in b or not b['fps'] or not b['url']: - LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}") - LOG.debug(f"{relay.fingerprint} {b} {sofar}") - # If it's giving contact info that doesnt check out - # it could be a bad exit with fake contact info - texclude_set.add(relay.fingerprint) - aBadContacts[relay.fingerprint] = b - continue - - if relay.fingerprint not in b['fps']: - LOG.warn(f"{relay.fingerprint} the FP IS NOT in the list of fps {sofar}") - # assume a fp is using a bogus contact - texclude_set.add(relay.fingerprint) + if domain in aDOMAIN_FPS.keys(): continue + a['fp'] = fp + if httpx: + lqueue.append(asyncio.create_task( + aVerifyContact(a=a, + fp=fp, + https_cafile=oargs.https_cafile, + timeout=oargs.timeout, + host=oargs.proxy_host, + port=oargs.proxy_port, + oargs=oargs))) + else: + b = aVerifyContact(a=a, + fp=fp, + https_cafile=oargs.https_cafile, + timeout=oargs.timeout, + host=oargs.proxy_host, + port=oargs.proxy_port, + oargs=oargs) + r = bProcessContact(b, texclude_set, aBadContacts, iFakeContact) + if r is False: + iFakeContact += 1 + + if httpx: + # for b in asyncio.as_completed(lqueue): + for b in lqueue: + # r = await b + r = b + r = bProcessContact(r, texclude_set, aBadContacts, iFakeContact) + if r is False: iFakeContact += 1 - aBadContacts[relay.fingerprint] = b - continue - - LOG.info(f"{relay.fingerprint} GOOD {b['url']} {sofar}") - # add our contact info to the trustdb - aTRUST_DB[relay.fingerprint] = b - for elt in b['fps']: - aTRUST_DB_INDEX[elt] = b - + elif r is True: + # iGoodContact += 1 + pass + LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") texclude_set = texclude_set.difference(twhitelist_set) # accept the dns-rsa urls for now until we test them @@ -932,7 +1107,7 @@ def iMain(lArgs): with open(oargs.torrc_output, 'wt') as oFTorrc: oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n") oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n") - oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])}\n") + oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n") LOG.info(f"Wrote tor configuration to {oargs.torrc_output}") oFTorrc.close() @@ -943,21 +1118,14 @@ def iMain(lArgs): oFYaml.close() if oargs.good_contacts != '' and aTRUST_DB: - with open(good_contacts_tmp, 'wt') as oFYaml: - yaml.dump(aTRUST_DB, oFYaml) - oFYaml.close() - if os.path.exists(oargs.good_contacts): - bak = oargs.good_contacts +'.bak' - os.rename(oargs.good_contacts, bak) - os.rename(good_contacts_tmp, oargs.good_contacts) - LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}") + vwrite_good_contacts(oargs) - oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] = list(texclude_set) - oBAD_NODES[oBAD_ROOT]['ExcludeDomains'] = lKNOWN_NODNS + oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] = list(texclude_set) + oBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS vwrite_badnodes(oargs, oBAD_NODES, str(len(texclude_set))) oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys()) - # GuardNodes are readonl + # EntryNodes are readony vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys())) vwritefinale(oargs, lNotInaRELAYS_DB) @@ -965,50 +1133,48 @@ def iMain(lArgs): retval = 0 try: logging.getLogger('stem').setLevel(30) - try: - if texclude_set: + if texclude_set: + try: LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(texclude_set)} net bad exit relays") - controller.set_conf(sEXCLUDE_EXIT_KEY, texclude_set) + controller.set_conf(sEXCLUDE_EXIT_KEY, list(texclude_set)) + except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa + LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor {e}") + LOG.debug(repr(texclude_set)) + retval += 1 - except stem.SocketClosed as e: # noqa - LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor") - retval += 1 + if aTRUST_DB_INDEX.keys(): + l = [elt for elt in aTRUST_DB_INDEX.keys() if len (elt) == 40] + try: + LOG.info(f"{sINCLUDE_EXIT_KEY} {len(l)} good relays") + controller.set_conf(sINCLUDE_EXIT_KEY, l) + except (Exception, stem.InvalidRequest, stem.SocketClosed) as e: # noqa + LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor {e}") + LOG.debug(repr(l)) + retval += 1 - try: - if aTRUST_DB_INDEX.keys(): - LOG.info(f"{sINCLUDE_EXIT_KEY} {len(aTRUST_DB_INDEX.keys())} good relays") - controller.set_conf(sINCLUDE_EXIT_KEY, aTRUST_DB_INDEX.keys()) - except stem.SocketClosed as e: # noqa - LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor") - retval += 1 - - try: - if 'GuardNodes' in oGOOD_NODES[oGOOD_ROOT].keys(): - LOG.info(f"{sINCLUDE_GUARD_KEY} {len(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])} guard nodes") + if 'EntryNodes' in oGOOD_NODES[sGOOD_ROOT].keys(): + try: + LOG.info(f"{sINCLUDE_GUARD_KEY} {len(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])} guard nodes") # FixMe for now override StrictNodes it may be unusable otherwise controller.set_conf(sINCLUDE_GUARD_KEY, - oGOOD_NODES[oGOOD_ROOT]['GuardNodes']) - cur = controller.get_conf('StrictNodes') - if oargs.strict_nodes and int(cur) != oargs.strict_nodes: - LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}") - controller.set_conf('StrictNodes', oargs.strict_nodes) - else: - LOG.info(f"StrictNodes is set to {cur}") - except stem.SocketClosed as e: # noqa - LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor") - retval += 1 + oGOOD_NODES[sGOOD_ROOT]['EntryNodes']) + except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa + LOG.error(f"Failed setting {sINCLUDE_GUARD_KEY} guard nodes in Tor {e}") + LOG.debug(repr(list(oGOOD_NODES[sGOOD_ROOT]['EntryNodes']))) + retval += 1 + + cur = controller.get_conf('StrictNodes') + if oargs.strict_nodes and int(cur) != oargs.strict_nodes: + LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}") + controller.set_conf('StrictNodes', oargs.strict_nodes) + else: + LOG.info(f"StrictNodes is set to {cur}") - except InvalidRequest as e: - # Unacceptable option value: Invalid router list. - LOG.error(str(e)) - retval = 1 - return retval except KeyboardInterrupt: return 0 except Exception as e: LOG.exception(str(e)) retval = 2 - return retval finally: # wierd we are getting stem errors during the final return # with a traceback that doesnt correspond to any real flow @@ -1027,6 +1193,7 @@ def iMain(lArgs): if __name__ == '__main__': try: + # i = asyncio.run(iMain(sys.argv[1:])) i = iMain(sys.argv[1:]) except IncorrectPassword as e: LOG.error(e) diff --git a/support_onions.py b/support_onions.py index 6da6529..426c1fd 100644 --- a/support_onions.py +++ b/support_onions.py @@ -33,9 +33,12 @@ bHAVE_TORR = shutil.which('tor-resolve') # in the wild we'll keep a copy here so we can avoid restesting yKNOWN_NODNS = """ --- + - a9.wtf - heraldonion.org - linkspartei.org - pineapple.cx + - privacylayer.xyz + - prsv.ch - thingtohide.nl - tor-exit-2.aa78i2efsewr0neeknk.xyz - tor-exit-3.aa78i2efsewr0neeknk.xyz @@ -44,7 +47,6 @@ yKNOWN_NODNS = """ - verification-for-nusenu.net """ # - 0x0.is -# - a9.wtf # - aklad5.com # - artikel5ev.de # - arvanode.net diff --git a/trustor_poc.py b/trustor_poc.py index 4527e0c..7638c71 100644 --- a/trustor_poc.py +++ b/trustor_poc.py @@ -7,10 +7,15 @@ import datetime import os import re import sys +import ipaddress +import warnings + + +import urllib3.util +from urllib3.util import parse_url as urlparse from stem.control import Controller # from stem.util.tor_tools import * -from urllib3.util import parse_url as urlparse try: # unbound is not on pypi @@ -20,11 +25,13 @@ except: global LOG import logging -import warnings - warnings.filterwarnings('ignore') LOG = logging.getLogger() +logging.getLogger("urllib3").setLevel(logging.INFO) +# import urllib3.contrib.pyopenssl +# urllib3.contrib.pyopenssl.inject_into_urllib3() + # download this python library from # https://github.com/erans/torcontactinfoparser # sys.path.append('/home/....') @@ -211,7 +218,7 @@ def find_validation_candidates(controller, result[domain] = {prooftype: [fingerprint]} return result -def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): +def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain', session=None): import requests # socks proxy used for outbound web requests (for validation of proofs) proxy = {'https': "socks5h://{host}:{port}"} @@ -225,6 +232,7 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): # urllib3.connection WARNING Certificate did not match expected hostname: head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers) except Exception as e: + LOG.exception(f"{e}") raise TrustorError(f"HTTP HEAD request failed for {uri} {e}") if head.status_code >= 300: @@ -234,15 +242,15 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): if not os.path.exists(sCAfile): raise TrustorError(f"File not found CAfile {sCAfile}") + if session is None: session = requests.sessions.Session() try: - with requests.sessions.Session() as session: - oReqResp = session.request(method="get", url=uri, - proxies=proxy, - timeout=timeout, - headers=headers, - allow_redirects=False, - verify=True - ) + oReqResp = session.request(method="get", url=uri, + proxies=proxy, + timeout=timeout, + headers=headers, + allow_redirects=False, + verify=True + ) except: LOG.warn("HTTP GET request failed for %s" % uri) raise @@ -257,13 +265,61 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url)) return oReqResp -logging.getLogger("urllib3").setLevel(logging.INFO) -# import urllib3.contrib.pyopenssl -# urllib3.contrib.pyopenssl.inject_into_urllib3() +# There's no point in using asyncio because of duplicate urls in the tasks +async def oDownloadUrlHttpx(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain'): + import httpcore + import asyncio + import httpx + + # socks proxy used for outbound web requests (for validation of proofs) + if host and port: + proxy = "socks5://{host}:{port}" + else: + proxy = '' + # we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files + # https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#uri-rsa + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'} -import ipaddress + LOG.debug("fetching %s...." % uri) + async with httpx.AsyncClient(proxies=proxy) as client: + try: + # https://www.python-httpx.org/advanced/ + head = await client.head(uri, timeout=timeout, headers=headers) + except Exception as e: + LOG.exception(f"{e}") + raise TrustorError(f"HTTP HEAD request failed for {uri} {e}") -import urllib3.util + if head.status_code >= 300: + raise TrustorError(f"HTTP Errorcode {head.status_code}") + if content_type and not head.headers['Content-Type'].startswith(content_type): + raise TrustorError(f"HTTP Content-Type != {content_type}" ) + if not os.path.exists(sCAfile): + raise TrustorError(f"File not found CAfile {sCAfile}") + + try: + oReqResp = await client.get(url=uri, + timeout=timeout, + headers=headers, + max_redirects=0, + verify=sCAfile, + ) + except (asyncio.exceptions.CancelledError, + httpcore.PoolTimeout, + Exception,) as e: + LOG.warn(f"HTTP GET request failed for %s {e}" % uri) + raise + if oReqResp.status_code != 200: + LOG.warn(f"HTTP Errorcode {head.status_code}") + raise TrustorError(f"HTTP Errorcode {head.status_code}") + if not oReqResp.headers['Content-Type'].startswith('text/plain'): + LOG.warn(f"HTTP Content-Type != text/plain") + raise TrustorError(f"HTTP Content-Type != text/plain") + + # check for redirects (not allowed as per spec) + if oReqResp.url != uri: + LOG.error(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url)) + raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url)) + return oReqResp def ballow_subdomain_matching(hostname, dnsnames): @@ -276,7 +332,6 @@ def ballow_subdomain_matching(hostname, dnsnames): from urllib3.util.ssl_match_hostname import (CertificateError, _dnsname_match, _ipaddress_match) - def my_match_hostname(cert, hostname): """Verify that *cert* (in decoded format as returned by SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 @@ -370,13 +425,14 @@ urllib3.connection._match_hostname = _my_match_hostname from urllib3.contrib.socks import SOCKSProxyManager - # from urllib3 import Retry -def oDownloadUrlUrllib3(uri, sCAfile, - timeout=30, - host='127.0.0.1', - port=9050, - content_type=''): +def oDownloadUrlUrllib3Socks(uri, + sCAfile, + timeout=30, + host='127.0.0.1', + port=9050, + session=None, + content_type='text/plain'): """Theres no need to use requests here and it adds too many layers on the SSL to be able to get at things """