using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; using System.Linq; using System.Reflection; namespace Bit.Core.Utilities { // ref: https://github.com/danesparza/domainname-parser public class DomainName { private readonly string _subDomain = string.Empty; private readonly string _domain = string.Empty; private readonly string _tld = string.Empty; private readonly TLDRule _tldRule = null; public string SubDomain => _subDomain; public string Domain => _domain; public string Sld => _domain; public string Tld => _tld; public TLDRule Rule => _tldRule; public string BaseDomain => $"{_domain}.{_tld}"; public DomainName(string tld, string sld, string subDdomain, TLDRule tldRule) { _tld = tld; _domain = sld; _subDomain = subDdomain; _tldRule = tldRule; } public static bool TryParse(string domainString, out DomainName result) { bool retval = false; // Our temporary domain parts: var tld = string.Empty; var sld = string.Empty; var subdomain = string.Empty; TLDRule _tldrule = null; result = null; try { // Try parsing the domain name ... this might throw formatting exceptions ParseDomainName(domainString, out tld, out sld, out subdomain, out _tldrule); // Construct a new DomainName object and return it result = new DomainName(tld, sld, subdomain, _tldrule); // Return 'true' retval = true; } catch { // Looks like something bad happened -- return 'false' retval = false; } return retval; } public static bool TryParseBaseDomain(string domainString, out string result) { var retVal = TryParse(domainString, out DomainName domain); result = domain?.BaseDomain; return retVal; } private static void ParseDomainName(string domainString, out string TLD, out string SLD, out string SubDomain, out TLDRule MatchingRule) { // Make sure domain is all lowercase domainString = domainString.ToLower(); TLD = string.Empty; SLD = string.Empty; SubDomain = string.Empty; MatchingRule = null; // If the fqdn is empty, we have a problem already if (domainString.Trim() == string.Empty) { throw new ArgumentException("The domain cannot be blank"); } // Next, find the matching rule: MatchingRule = FindMatchingTLDRule(domainString); // At this point, no rules match, we have a problem if (MatchingRule == null) { throw new FormatException("The domain does not have a recognized TLD"); } // Based on the tld rule found, get the domain (and possibly the subdomain) var tempSudomainAndDomain = string.Empty; var tldIndex = 0; // First, determine what type of rule we have, and set the TLD accordingly switch (MatchingRule.Type) { case TLDRule.RuleType.Normal: tldIndex = domainString.LastIndexOf("." + MatchingRule.Name); tempSudomainAndDomain = domainString.Substring(0, tldIndex); TLD = domainString.Substring(tldIndex + 1); break; case TLDRule.RuleType.Wildcard: // This finds the last portion of the TLD... tldIndex = domainString.LastIndexOf("." + MatchingRule.Name); tempSudomainAndDomain = domainString.Substring(0, tldIndex); // But we need to find the wildcard portion of it: tldIndex = tempSudomainAndDomain.LastIndexOf("."); tempSudomainAndDomain = domainString.Substring(0, tldIndex); TLD = domainString.Substring(tldIndex + 1); break; case TLDRule.RuleType.Exception: tldIndex = domainString.LastIndexOf("."); tempSudomainAndDomain = domainString.Substring(0, tldIndex); TLD = domainString.Substring(tldIndex + 1); break; } // See if we have a subdomain: List lstRemainingParts = new List(tempSudomainAndDomain.Split('.')); // If we have 0 parts left, there is just a tld and no domain or subdomain // If we have 1 part, it's the domain, and there is no subdomain // If we have 2+ parts, the last part is the domain, the other parts (combined) are the subdomain if (lstRemainingParts.Count > 0) { // Set the domain: SLD = lstRemainingParts[lstRemainingParts.Count - 1]; // Set the subdomain, if there is one to set: if (lstRemainingParts.Count > 1) { // We strip off the trailing period, too SubDomain = tempSudomainAndDomain.Substring(0, tempSudomainAndDomain.Length - SLD.Length - 1); } } } private static TLDRule FindMatchingTLDRule(string domainString) { // Split our domain into parts (based on the '.') // ...Put these parts in a list // ...Make sure these parts are in reverse order (we'll be checking rules from the // right -most pat of the domain) var lstDomainParts = domainString.Split('.').ToList(); lstDomainParts.Reverse(); // Begin building our partial domain to check rules with: var checkAgainst = string.Empty; // Our 'matches' collection: var ruleMatches = new List(); foreach (string domainPart in lstDomainParts) { // Add on our next domain part: checkAgainst = string.Format("{0}.{1}", domainPart, checkAgainst); // If we end in a period, strip it off: if (checkAgainst.EndsWith(".")) { checkAgainst = checkAgainst.Substring(0, checkAgainst.Length - 1); } var rules = Enum.GetValues(typeof(TLDRule.RuleType)).Cast(); foreach (var rule in rules) { // Try to match rule: if (TLDRulesCache.Instance.TLDRuleLists[rule].TryGetValue(checkAgainst, out TLDRule result)) { ruleMatches.Add(result); } //Debug.WriteLine(string.Format("Domain part {0} matched {1} {2} rules", // checkAgainst, result == null ? 0 : 1, rule)); } } // Sort our matches list (longest rule wins, according to : var results = from match in ruleMatches orderby match.Name.Length descending select match; // Take the top result (our primary match): TLDRule primaryMatch = results.Take(1).SingleOrDefault(); if (primaryMatch != null) { //Debug.WriteLine(string.Format("Looks like our match is: {0}, which is a(n) {1} rule.", // primaryMatch.Name, primaryMatch.Type)); } else { //Debug.WriteLine(string.Format("No rules matched domain: {0}", domainString)); } return primaryMatch; } public class TLDRule : IComparable { public string Name { get; private set; } public RuleType Type { get; private set; } public TLDRule(string ruleInfo) { // Parse the rule and set properties accordingly: if (ruleInfo.StartsWith("*")) { Type = RuleType.Wildcard; Name = ruleInfo.Substring(2); } else if (ruleInfo.StartsWith("!")) { Type = RuleType.Exception; Name = ruleInfo.Substring(1); } else { Type = RuleType.Normal; Name = ruleInfo; } } public int CompareTo(TLDRule other) { if (other == null) { return -1; } return Name.CompareTo(other.Name); } public enum RuleType { Normal, Wildcard, Exception } } private class TLDRulesCache { private static volatile TLDRulesCache _uniqueInstance; private static object _syncObj = new object(); private static object _syncList = new object(); private IDictionary> _lstTLDRules; private TLDRulesCache() { // Initialize our internal list: _lstTLDRules = GetTLDRules(); } public static TLDRulesCache Instance { get { if (_uniqueInstance == null) { lock (_syncObj) { if (_uniqueInstance == null) { _uniqueInstance = new TLDRulesCache(); } } } return (_uniqueInstance); } } public IDictionary> TLDRuleLists { get { return _lstTLDRules; } set { _lstTLDRules = value; } } public static void Reset() { lock (_syncObj) { _uniqueInstance = null; } } private IDictionary> GetTLDRules() { var results = new Dictionary>(); var rules = Enum.GetValues(typeof(TLDRule.RuleType)).Cast(); foreach (var rule in rules) { results[rule] = new Dictionary(StringComparer.CurrentCultureIgnoreCase); } var ruleStrings = ReadRulesData(); // Strip out any lines that are: // a.) A comment // b.) Blank var filteredRuleString = ruleStrings.Where(ruleString => !ruleString.StartsWith("//") && ruleString.Trim().Length != 0); foreach (var ruleString in filteredRuleString) { var result = new TLDRule(ruleString); results[result.Type][result.Name] = result; } // Return our results: if (CoreHelpers.InDebugMode()) { Debug.WriteLine(string.Format("Loaded {0} rules into cache.", results.Values.Sum(r => r.Values.Count))); } return results; } private IEnumerable ReadRulesData() { var assembly = typeof(TLDRulesCache).GetTypeInfo().Assembly; var stream = assembly.GetManifestResourceStream("Bit.Core.Resources.public_suffix_list.dat"); string line; using (var reader = new StreamReader(stream)) { while ((line = reader.ReadLine()) != null) { yield return line; } } } } } }