diff --git a/exclude_badExits.py b/exclude_badExits.py index f19c34c..74ec82b 100644 --- a/exclude_badExits.py +++ b/exclude_badExits.py @@ -86,6 +86,7 @@ import re import socket import time import argparse +import string from io import StringIO import ipaddr # list(ipaddress._find_address_range(ipaddress.IPv4Network('172.16.0.0/12')) @@ -112,9 +113,6 @@ try: import coloredlogs except ImportError as e: coloredlogs = False - -from trustor_poc import oDownloadUrl, idns_validate, TrustorError -from support_onions import icheck_torrc, bAreWeConnected, lIntroductionPoints, zResolveDomain, vwait_for_controller, yKNOWN_NODNS global LOG import logging @@ -122,6 +120,12 @@ import warnings warnings.filterwarnings('ignore') LOG = logging.getLogger() +import requests +from trustor_poc import oDownloadUrlUrllib3 as oDownloadUrl +from trustor_poc import idns_validate, TrustorError +from support_onions import icheck_torrc, bAreWeConnected, lIntroductionPoints, zResolveDomain, vwait_for_controller, yKNOWN_NODNS +LOG.info("imported HTTPSAdapter") + ETC_DIR = '/etc/tor/yaml' aTRUST_DB = {} aTRUST_DB_INDEX = {} @@ -145,28 +149,37 @@ def oMakeController(sSock='', port=9051): oBAD_NODES = {} oBAD_ROOT = 'BadNodes' +oBAD_NODES[oBAD_ROOT] = {} +oBAD_NODES[oBAD_ROOT]['ExcludeNodes'] = {} + +lKNOWN_NODNS = [] +lMAYBE_NODNS = [] def lYamlBadNodes(sFile, section=sEXCLUDE_EXIT_KEY, lWanted=['BadExit']): global oBAD_NODES + global lKNOWN_NODNS + global lMAYBE_NODNS + l = [] if not yaml: return l if os.path.exists(sFile): with open(sFile, 'rt') as oFd: oBAD_NODES = yaml.safe_load(oFd) - # BROKEN + # BROKEN # root = 'ExcludeNodes' # for elt in o[oBAD_ROOT][root][section].keys(): # if lWanted and elt not in lWanted: continue # # l += o[oBAD_ROOT][root][section][elt] - global lKNOWN_NODNS + l = oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] + root = 'ExcludeDomains' if root not in oBAD_NODES[oBAD_ROOT] or not oBAD_NODES[oBAD_ROOT][root]: - oBAD_NODES[oBAD_ROOT][root] = yaml.safe_load(StringIO(yKNOWN_NODNS)) + lMAYBE_NODNS = yaml.safe_load(StringIO(yKNOWN_NODNS)) else: - lKNOWN_NODNS = oBAD_NODES[oBAD_ROOT][root] + lMAYBE_NODNS = oBAD_NODES[oBAD_ROOT][root] return l oGOOD_NODES = {} @@ -185,6 +198,22 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): # yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml return l +def bdomain_is_bad(domain): + global lKNOWN_NODNS + if domain in lKNOWN_NODNS: return True + if domain in lMAYBE_NODNS: + ip = zResolveDomain(domain) + if ip == '': + LOG.debug(f"{fp} {domain} does not resolve") + lKNOWN_NODNS.append(domain) + lMAYBE_NODNS.remove(domain) + return True + + if '@' in domain: + LOG.warn(f"@ in domain {domain}") + return True + return False + tBAD_URLS = set() lATS = ['abuse', 'email'] lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory'] @@ -225,6 +254,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) if aTRUST_DB_INDEX and fp in aTRUST_DB_INDEX.keys(): aCachedContact = aTRUST_DB_INDEX[fp] if aCachedContact['email'] == a['email']: + LOG.info(f"{fp} in aTRUST_DB_INDEX") return aCachedContact if 'url' not in keys: @@ -237,18 +267,17 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) aFP_EMAIL[fp] = a['email'] LOG.debug(f"{fp} 'uri' but not 'url' in {keys}") # drop through - - if a['url'].startswith('http:'): - a['url'] = 'https:' +a['url'][5:] - elif not a['url'].startswith('https:'): - a['url'] = 'https:' +a['url'] + + c = a['url'].lstrip('https://').lstrip('http://').strip('/') + a['url'] = 'https://' +c # domain should be a unique key for contacts - domain = a['url'][8:].strip('/') - if lKNOWN_NODNS and domain in lKNOWN_NODNS: - LOG.warn(f"{domain} in lKNOWN_NODNS") - return {} - ip = zResolveDomain(domain, lKNOWN_NODNS) + domain = a['url'][8:] + if bdomain_is_bad(domain): + LOG.warn(f"{domain} is bad from {a['url']}") + LOG.info(f"{domain} is bad from {a}") + return a + ip = zResolveDomain(domain) if ip == '': aFP_EMAIL[fp] = a['email'] LOG.debug(f"{fp} {domain} does not resolve") @@ -268,9 +297,10 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) return a LOG.debug(f"{len(keys)} contact fields for {fp}") + url="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt" try: LOG.debug(f"Downloading from {domain} for {fp}") - o = oDownloadUrl(domain, https_cafile, + o = oDownloadUrl(url, https_cafile, timeout=timeout, host=host, port=port) # requests response: text "reason", "status_code" except AttributeError as e: @@ -288,20 +318,52 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) except (BaseException ) as e: LOG.error(f"Exception {type(e)} downloading from {domain} {e}") else: - if o.status_code >= 300: + if hasattr(o, 'status'): + status_code = o.status + else: + status_code = o.status_code + if status_code >= 300: aFP_EMAIL[fp] = a['email'] - LOG.warn(f"Error from {domain} {o.status_code} {o.reason}") + LOG.warn(f"Error from {domain} {status_code} {o.reason}") # any reason retry? tBAD_URLS.add(a['url']) return a - l = o.text.upper().strip().split('\n') - a['modified'] = time.time() + if hasattr(o, 'text'): + data = o.text + else: + data = str(o.data, 'UTF-8') + l = data.upper().strip().split('\n') + LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes") + + a['modified'] = int(time.time()) if not l: LOG.warn(f"Downloading from {domain} empty for {fp}") else: a['fps'] = [elt for elt in l if elt and len(elt) == 40 and not elt.startswith('#')] + LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") + return a + +def aParseContactYaml(contact, fp): + """ + See the Tor ContactInfo Information Sharing Specification v2 + https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ + """ + lelts = contact.split() + a = {} + if len(lelts) % 1 != 0: + LOG.warn(f"bad contact for {fp} odd number of components") + LOG.debug(f"{fp} {a}") + return a + key = '' + for elt in lets: + if key == '': + key = elt + continue + a[key] = elt + key = '' + LOG.debug(f"{fp} {len(a.keys())} fields") return a def aParseContact(contact, fp): @@ -419,7 +481,7 @@ def oMainArgparser(_=None): parser.add_argument('--good_nodes', type=str, default=os.path.join(ETC_DIR, 'torrc-goodnodes.yaml'), - help="Yaml file of good nodes that should not be excluded") + help="Yaml file of good info that should not be excluded") parser.add_argument('--bad_nodes', type=str, default=os.path.join(ETC_DIR, 'torrc-badnodes.yaml'), help="Yaml file of bad nodes that should also be excluded") @@ -434,7 +496,9 @@ def oMainArgparser(_=None): help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable") parser.add_argument('--wait_boot', type=int, default=120, help="Seconds to wait for Tor to booststrap") - parser.add_argument('--log_level', type=int, default=20, + parser.add_argument('--points_timeout', type=int, default=0, + help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs") + parser.add_argument('--log_level', type=int, default=10, help="10=debug 20=info 30=warn 40=error") parser.add_argument('--bad_sections', type=str, default='MyBadExit', @@ -501,7 +565,7 @@ def iMain(lArgs): # but... for k,v in aTRUST_DB.items(): if 'modified' not in v.keys(): - v['modified'] = time.time() + v['modified'] = int(time.time()) aTRUST_DB_INDEX[k] = v if 'fps' in aTRUST_DB[k].keys(): for fp in aTRUST_DB[k]['fps']: @@ -546,9 +610,11 @@ def iMain(lArgs): w = set(oGOOD_NODES[oGOOD_ROOT]['Services']) if oArgs.white_services: w.update(oArgs.white_services.split(',')) - t.update(lIntroductionPoints(controller, w)) + if oArgs.points_timeout > 0: + LOG.info(f"{len(w)} services will be checked from IntroductionPoints") + t.update(lIntroductionPoints(controller, w, itimeout=oArgs.points_timeout)) if len(t) > 0: - LOG.info(f"IntroductionPoints {len(t)} nodes") + LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} services") twhitelist_set.update(t) texclude_set = set() @@ -556,17 +622,15 @@ def iMain(lArgs): if False and oArgs.bad_sections: # BROKEN sections = oArgs.bad_sections.split(',') - lexclude_list = set(lYamlBadNodes(oArgs.bad_nodes, + texclude_set = set(lYamlBadNodes(oArgs.bad_nodes, lWanted=sections, section=sEXCLUDE_EXIT_KEY)) - else: - texclude_set = set(lYamlBadNodes(oArgs.bad_nodes)) - - LOG.info(f"lYamlBadNodes {len(texclude_set)}") + LOG.info(f"Preloaded {len(texclude_set)} bad fps") ttrust_db_index = aTRUST_DB_INDEX.keys() - iDnsContact = 0 + tdns_contacts = set() iFakeContact = 0 + iTotalContacts = 0 aBadContacts = {} lConds = oArgs.contact.split(',') @@ -580,12 +644,12 @@ def iMain(lArgs): continue relay.fingerprint = relay.fingerprint.upper() - sofar = f"G:{len(aTRUST_DB.keys())} U:{iDnsContact} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} #{iR}" + sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_contacts)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" if not relay.exit_policy.is_exiting_allowed(): if sEXCLUDE_EXIT_KEY == 'ExcludeNodes': - LOG.debug(f"{relay.fingerprint} not an exit {sofar}") + pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}") else: - LOG.warn(f"{relay.fingerprint} not an exit {sofar}") + pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}") # continue # great contact had good fps and we are in them @@ -598,43 +662,51 @@ def iMain(lArgs): relay.contact = str(relay.contact, 'UTF-8') if ('Empty' in lConds and not relay.contact) or \ - ('NoEmail' in lConds and relay.contact and not '@' in relay.contact): + ('NoEmail' in lConds and relay.contact and not 'email:' in relay.contact): texclude_set.add(relay.fingerprint) continue if not relay.contact or not 'ciissversion:' in relay.contact: # should be unreached 'Empty' should always be in lConds continue + iTotalContacts += 1 + + if relay.contact and not 'url:' in relay.contact: + LOG.info(f"{relay.fingerprint} skipping bad contact - no url: {sofar}") + LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}") + texclude_set.add(relay.fingerprint) + continue c = relay.contact.lower() # first rough cut i = c.find('url:') - if i >=0: c = c[i+4:] + if i >=0: + c = c[i+4:] i = c.find(' ') if i >=0: c = c[:i] - c = c.replace('https://', '').replace('http://', '').strip('/') + c = c.lstrip('https://').lstrip('http://').strip('/') i = c.find('/') if i >=0: c = c[:i] domain = c - LOG.info(f"{relay.fingerprint} domain={domain}") - if domain and domain in lKNOWN_NODNS: - LOG.info(f"{relay.fingerprint} skipping in lKNOWN_NODNS {domain} {sofar}") + if domain and bdomain_is_bad(domain): + LOG.info(f"{relay.fingerprint} skipping bad {domain} {sofar}") + LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}") texclude_set.add(relay.fingerprint) continue if domain: - ip = zResolveDomain(domain, lKNOWN_NODNS) + ip = zResolveDomain(domain) if not ip: LOG.warn(f"{relay.fingerprint} {domain} did not resolve {sofar}") texclude_set.add(relay.fingerprint) + lKNOWN_NODNS.append(domain) iFakeContact += 1 continue if 'dns-rsa' in relay.contact.lower(): - target = f"{relay.fingerprint}.{domain}" - + target = f"{relay.fingerprint}.{domain}" LOG.info(f"skipping 'dns-rsa' {target} {sofar}") - iDnsContact += 1 + tdns_contacts.add(target) elif 'proof:uri-rsa' in relay.contact.lower(): a = aParseContact(relay.contact, relay.fingerprint) @@ -646,6 +718,7 @@ def iMain(lArgs): if a['url'] in tBAD_URLS: # The fp is using a contact with a URL we know is bad LOG.info(f"{relay.fingerprint} skipping in tBAD_URLS {a['url']} {sofar}") + LOG.debug(f"{relay.fingerprint} {a} {sofar}") iFakeContact += 1 texclude_set.add(relay.fingerprint) continue @@ -653,6 +726,7 @@ def iMain(lArgs): if domain in lKNOWN_NODNS: # The fp is using a contact with a URL we know is bogus LOG.info(f"{relay.fingerprint} skipping in lKNOWN_NODNS {a['url']} {sofar}") + LOG.debug(f"{relay.fingerprint} {a} {sofar}") iFakeContact += 1 texclude_set.add(relay.fingerprint) continue @@ -667,6 +741,7 @@ def iMain(lArgs): if not b or not 'fps' in b or not b['fps'] or not b['url']: LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}") + LOG.debug(f"{relay.fingerprint} {b} {sofar}") # If it's giving contact info that doesnt check out # it could be a bad exit with fake contact info texclude_set.add(relay.fingerprint) @@ -692,17 +767,19 @@ def iMain(lArgs): yaml.dump(aTRUST_DB, indent=2, stream=oFYaml) oFYaml.close() + LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") texclude_set = texclude_set.difference(twhitelist_set) + LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}") if oArgs.proof_output and aTRUST_DB: with open(proof_output_tmp, 'wt') as oFYaml: yaml.dump(aTRUST_DB, indent=2, stream=oFYaml) - LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oArgs.proof_output}") oFYaml.close() if os.path.exists(oArgs.proof_output): bak = oArgs.proof_output +'.bak' os.rename(oArgs.proof_output, bak) os.rename(proof_output_tmp, oArgs.proof_output) + LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oArgs.proof_output}") if oArgs.torrc_output and texclude_set: with open(oArgs.torrc_output, 'wt') as oFTorrc: @@ -725,7 +802,6 @@ def iMain(lArgs): oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys()) # GuardNodes are readonl vwrite_goodnodes(oArgs, oGOOD_NODES, str(len(ttrust_db_index))) - retval = 0 try: logging.getLogger('stem').setLevel(30) @@ -762,6 +838,7 @@ def iMain(lArgs): LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor") retval += 1 + LOG.info("dns-rsa domains:\n{'\n'.join(tdns_contacts)}") return retval except InvalidRequest as e: diff --git a/https_adapter.py b/https_adapter.py index 2d490b1..3cee1e8 100644 --- a/https_adapter.py +++ b/https_adapter.py @@ -13,7 +13,6 @@ import urllib3 from urllib3.util import parse_url from urllib3.util.retry import Retry from urllib3.util import Timeout as TimeoutSauce -from urllib3.util.ssl_match_hostname import match_hostname as match_hostname DEFAULT_POOLBLOCK = False DEFAULT_POOLSIZE = 10 @@ -264,78 +263,3 @@ class HTTPSAdapter(HTTPAdapter): return self.build_response(request, resp) -def ballow_subdomain_matching(hostname, dnsnames): - for elt in dnsnames: - if len(split(hostname, '.')) > len(split(elt, '.')) and \ - hostname.endswith(elt): - # parent - return True - return False - -def my_match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 - rules are followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError( - "empty or no certificate, match_hostname needs a " - "SSL socket or SSL context with either " - "CERT_OPTIONAL or CERT_REQUIRED" - ) - try: - # Divergence from upstream: ipaddress can't handle byte str - host_ip = ipaddress.ip_address(_to_unicode(hostname)) - except (UnicodeError, ValueError): - # ValueError: Not an IP address (common case) - # UnicodeError: Divergence from upstream: Have to deal with ipaddress not taking - # byte strings. addresses should be all ascii, so we consider it not - # an ipaddress in this case - host_ip = None - except AttributeError: - # Divergence from upstream: Make ipaddress library optional - if ipaddress is None: - host_ip = None - else: # Defensive - raise - dnsnames = [] - san = cert.get("subjectAltName", ()) - for key, value in san: - if key == "DNS": - if host_ip is None and _dnsname_match(value, hostname): - return - dnsnames.append(value) - elif key == "IP Address": - if host_ip is not None and _ipaddress_match(value, host_ip): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get("subject", ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == "commonName": - if _dnsname_match(value, hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - # soften this to allow subdomain matching - if ballow_subdomain_matching(hostname, dnsnames): - return - raise CertificateError( - "hostname %r " - "doesn't match any of %s" % (hostname, ", ".join(map(repr, dnsnames))) - ) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0])) - else: - raise CertificateError( - "no appropriate commonName or subjectAltName fields were found" - ) - -urllib3.util.ssl_match_hostname.match_hostnaem = my_match_hostname diff --git a/support_onions.py b/support_onions.py index 4be62e0..93ae123 100644 --- a/support_onions.py +++ b/support_onions.py @@ -157,43 +157,48 @@ def lIntroductionPoints(controller=None, lOnions=[], itimeout=120, log_level=10) from stem.descriptor.hidden_service import HiddenServiceDescriptorV3 from stem.client.datatype import LinkByFingerprint - + from stem import Timeout + from queue import Empty + if type(lOnions) not in [set, tuple, list]: lOnions = list(lOnions) if controller is None: controller = oGetStemController(log_level=log_level) l = [] - try: - for elt in lOnions: + for elt in lOnions: LOG.info(f"controller.get_hidden_service_descriptor {elt}") - desc = controller.get_hidden_service_descriptor(elt, - await_result=True, - timeout=itimeout) -# LOG.log(40, f"{dir(desc)} get_hidden_service_descriptor") - # timeouts 20 sec - # mistakenly a HSv2 descriptor - hs_address = HiddenServiceDescriptorV3.from_str(str(desc)) # reparse as HSv3 - oInnerLayer = hs_address.decrypt(elt) -# LOG.log(40, f"{dir(oInnerLayer)}") + try: + desc = controller.get_hidden_service_descriptor(elt, + await_result=True, + timeout=itimeout) + # LOG.log(40, f"{dir(desc)} get_hidden_service_descriptor") + # timeouts 20 sec + # mistakenly a HSv2 descriptor + hs_address = HiddenServiceDescriptorV3.from_str(str(desc)) # reparse as HSv3 + oInnerLayer = hs_address.decrypt(elt) + # LOG.log(40, f"{dir(oInnerLayer)}") - # IntroductionPointV3 - n = oInnerLayer.introduction_points - if not n: - LOG.warn(f"NO introduction points for {elt}") + # IntroductionPointV3 + n = oInnerLayer.introduction_points + if not n: + LOG.warn(f"NO introduction points for {elt}") + continue + LOG.info(f"{elt} {len(n)} introduction points") + lp = [] + for introduction_point in n: + for linkspecifier in introduction_point.link_specifiers: + if isinstance(linkspecifier, LinkByFingerprint): + # LOG.log(40, f"Getting fingerprint for {linkspecifier}") + if hasattr(linkspecifier, 'fingerprint'): + assert len(linkspecifier.value) == 20 + lp += [bin_to_hex(linkspecifier.value)] + LOG.info(f"{len(lp)} introduction points for {elt}") + l += lp + except (Empty, Timeout, ) as e: + LOG.warn(f"Timed out getting introduction points for {elt}") continue - LOG.info(f"{elt} {len(n)} introduction points") - lp = [] - for introduction_point in n: - for linkspecifier in introduction_point.link_specifiers: - if isinstance(linkspecifier, LinkByFingerprint): -# LOG.log(40, f"Getting fingerprint for {linkspecifier}") - if hasattr(linkspecifier, 'fingerprint'): - assert len(linkspecifier.value) == 20 - lp += [bin_to_hex(linkspecifier.value)] - LOG.info(f"{len(lp)} introduction points for {elt}") - l += lp - except Exception as e: - LOG.exception(e) + except Exception as e: + LOG.exception(e) return l def zResolveDomain(domain): diff --git a/trustor_poc.py b/trustor_poc.py index 0cc1cb1..965e0ad 100644 --- a/trustor_poc.py +++ b/trustor_poc.py @@ -10,8 +10,8 @@ import datetime import requests from stem.control import Controller from stem.util.tor_tools import * -from urllib.parse import urlparse -from urllib3.util.retry import Retry +# from urllib.parse import urlparse +from urllib3.util import parse_url as urlparse try: # unbound is not on pypi @@ -207,8 +207,7 @@ def find_validation_candidates(controller, trusted_domains=[],validation_cache=[ result[domain] = {prooftype : [fingerprint]} return result -def oDownloadUrl(domain, sCAfile, timeout=30, host='127.0.0.1', port=9050): - uri="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt" +def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): # socks proxy used for outbound web requests (for validation of proofs) proxy = {'https': 'socks5h://' +host +':' +str(port)} # we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files @@ -229,24 +228,12 @@ def oDownloadUrl(domain, sCAfile, timeout=30, host='127.0.0.1', port=9050): raise TrustorError(f"HTTP Content-Type != text/plain") if not os.path.exists(sCAfile): raise TrustorError(f"File not found CAfile {sCAfile}") - try: - from https_adapter import HTTPSAdapter - except Exception as e: - LOG.warn(f"Could not import HTTPSAdapter {e}") - HTTPSAdapter = None - raise SystemExit(f"{e}") - else: - LOG.info(f"Loaded HTTPSAdapter") try: with requests.sessions.Session() as session: - if HTTPSAdapter: - retries = Retry(connect=3, read=2, redirect=0) - # FixMe: upgrade to TLS1.3 - session.mount("https://", HTTPSAdapter(pool_maxsize=1, - max_retries=retries,)) oReqResp = session.request(method="get", url=uri, - proxies=proxy, timeout=timeout, + proxies=proxy, + timeout=timeout, headers=headers, allow_redirects=False, verify=True @@ -265,8 +252,182 @@ def oDownloadUrl(domain, sCAfile, timeout=30, host='127.0.0.1', port=9050): raise TrustorError(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.url)) return oReqResp +logging.getLogger("urllib3").setLevel(logging.INFO) +#import urllib3.contrib.pyopenssl +#urllib3.contrib.pyopenssl.inject_into_urllib3() + +import urllib3.util +import ipaddress +def ballow_subdomain_matching(hostname, dnsnames): + for elt in dnsnames: + if len(hostname.split('.')) > len(elt.split('.')) and \ + hostname.endswith(elt): + # parent + return True + return False + +from urllib3.util.ssl_match_hostname import (CertificateError, + match_hostname, + _dnsname_match, + _ipaddress_match, + ) +def my_match_hostname(cert, hostname): + """Verify that *cert* (in decoded format as returned by + SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125 + rules are followed, but IP addresses are not accepted for *hostname*. + + CertificateError is raised on failure. On success, the function + returns nothing. + """ + if not cert: + raise ValueError( + "empty or no certificate, match_hostname needs a " + "SSL socket or SSL context with either " + "CERT_OPTIONAL or CERT_REQUIRED" + ) + try: + # Divergence from upstream: ipaddress can't handle byte str + host_ip = ipaddress.ip_address(hostname) + except (UnicodeError, ValueError): + # ValueError: Not an IP address (common case) + # UnicodeError: Divergence from upstream: Have to deal with ipaddress not taking + # byte strings. addresses should be all ascii, so we consider it not + # an ipaddress in this case + host_ip = None + except AttributeError: + # Divergence from upstream: Make ipaddress library optional + if ipaddress is None: + host_ip = None + else: # Defensive + raise + dnsnames = [] + san = cert.get("subjectAltName", ()) + for key, value in san: + if key == "DNS": + if host_ip is None and _dnsname_match(value, hostname): + return + dnsnames.append(value) + elif key == "IP Address": + if host_ip is not None and _ipaddress_match(value, host_ip): + return + dnsnames.append(value) + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName + for sub in cert.get("subject", ()): + for key, value in sub: + # XXX according to RFC 2818, the most specific Common Name + # must be used. + if key == "commonName": + if _dnsname_match(value, hostname): + return + dnsnames.append(value) + if len(dnsnames) > 1: + # soften this to allow subdomain matching + if ballow_subdomain_matching(hostname, dnsnames): + LOG.warn(f"Allowing {hostname} in {dnsnames}") + return + raise CertificateError( + "hostname %r " + "doesn't match any of %s" % (hostname, ", ".join(map(repr, dnsnames))) + ) + elif len(dnsnames) == 1: + raise CertificateError("hostname %r doesn't match %r" % (hostname, dnsnames[0])) + else: + raise CertificateError( + "no appropriate commonName or subjectAltName fields were found" + ) +match_hostname = my_match_hostname +from urllib3.util.ssl_ import ( + is_ipaddress, +) +def _my_match_hostname(cert, asserted_hostname): + # Our upstream implementation of ssl.match_hostname() + # only applies this normalization to IP addresses so it doesn't + # match DNS SANs so we do the same thing! + stripped_hostname = asserted_hostname.strip("u[]") + if is_ipaddress(stripped_hostname): + asserted_hostname = stripped_hostname + try: + my_match_hostname(cert, asserted_hostname) + except CertificateError as e: + log.warning( + "Certificate did not match hostname: %s. Certificate: %s", + asserted_hostname, + cert, + ) + # Add cert to exception and reraise so client code can inspect + # the cert when catching the exception, if they want to + e._peer_cert = cert + raise +from urllib3.connection import _match_hostname, HTTPSConnection +urllib3.connection._match_hostname = _my_match_hostname + +from urllib3.contrib.socks import SOCKSProxyManager +from urllib3 import Retry +def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): + """Theres no need to use requests here and it + adds too many layers on the SSL to be able to get at things + """ + domain = urlparse(uri).netloc + # socks proxy used for outbound web requests (for validation of proofs) + proxy = SOCKSProxyManager(f'socks5h://{host}:{port}/', + num_pools=1, + timeout=timeout, + cert_reqs='CERT_REQUIRED', + assert_hostname=domain, + ca_certs=sCAfile) + + # we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files + # https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#uri-rsa + headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'} + + LOG.debug("fetching %s...." % uri) + try: + # grr. fix urllib3 + # Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless + # retries are disabled, in which case the causing exception will be raised. + head = proxy.request('HEAD', uri, + headers=headers, + redirect=False, + retries=False) + except Exception as e: + LOG.error(f"HTTP HEAD request failed for {uri} {e}") + raise + + if head.status >= 300: + raise TrustorError(f"HTTP Errorcode {head.status}") + if not head.headers['Content-Type'].startswith('text/plain'): + raise TrustorError(f"HTTP Content-Type != text/plain") + if not os.path.exists(sCAfile): + raise TrustorError(f"File not found CAfile {sCAfile}") + + try: + oReqResp = proxy.request("GET", uri, + headers=headers, + redirect=False, + ) + except Exception as e: + LOG.warn(f"HTTP GET request failed for {uri} {e}") + raise + if oReqResp.status != 200: + raise TrustorError(f"HTTP Errorcode {head.status}") + if not oReqResp.headers['Content-Type'].startswith('text/plain'): + raise TrustorError(f"HTTP Content-Type != text/plain") + + #check for redirects (not allowed as per spec) + if oReqResp.geturl() != uri: + LOG.error(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl())) + raise TrustorError(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl())) + oReqResp.decode_content = True + + return oReqResp +import urllib3.connectionpool +urllib3.connectionpool.VerifiedHTTPSConnection = HTTPSConnection + def lDownloadUrlFps(domain, sCAfile, timeout=30, host='127.0.0.1', port=9050): - o = oDownloadUrl(domain, sCAfile, timeout=timeout, host=host, port=port) + uri="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt" + o = oDownloadUrlRequests(uri, sCAfile, timeout=timeout, host=host, port=port) well_known_content = o.text.upper().strip().split('\n') well_known_content = [i for i in well_known_content if i and len(i) == 40] return well_known_content @@ -365,6 +526,8 @@ def configure_tor(controller, trusted_fingerprints, exitonly=True): LOG.exception('Failed to set ExitNodes tor config to trusted relays') sys.exit(20) + + if __name__ == '__main__': trust_config = 'trust_config' assert os.path.exists(trust_config)