//=============================================================================== // RFC2047 (Encoded Word) Decoder // https://github.com/grumpydev/RFC2047-Encoded-Word-Encoder-Decoder/blob/master/EncodedWord/RFC2047.cs // http://tools.ietf.org/html/rfc2047 //=============================================================================== // Copyright © Steven Robbins. All rights reserved. // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY // OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT // LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY AND // FITNESS FOR A PARTICULAR PURPOSE. //=============================================================================== namespace EncodedWord { using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; /// /// Provides support for decoding RFC2047 (Encoded Word) encoded text /// public static class RFC2047 { /// /// Regex for parsing encoded word sections /// From http://tools.ietf.org/html/rfc2047#section-3 /// encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" /// private static readonly Regex EncodedWordFormatRegEx = new Regex(@"=\?(?.*?)\?(?[qQbB])\?(?.*?)\?=", RegexOptions.Singleline | RegexOptions.Compiled); /// /// Regex for removing CRLF SPACE separators from between encoded words /// private static readonly Regex EncodedWordSeparatorRegEx = new Regex(@"\?=\r\n =\?", RegexOptions.Compiled); /// /// Replacement string for removing CRLF SPACE separators /// private const string SeparatorReplacement = @"?==?"; /// /// The maximum line length allowed /// private const int MaxLineLength = 75; /// /// Regex for "Q-Encoding" hex bytes from http://tools.ietf.org/html/rfc2047#section-4.2 /// private static readonly Regex QEncodingHexCodeRegEx = new Regex(@"(=(?[0-9a-fA-F][0-9a-fA-F]))", RegexOptions.Compiled); /// /// Regex for replacing _ with space as declared in http://tools.ietf.org/html/rfc2047#section-4.2 /// private static readonly Regex QEncodingSpaceRegEx = new Regex("_", RegexOptions.Compiled); /// /// Format for an encoded string /// private const string EncodedStringFormat = @"=?{0}?{1}?{2}?="; /// /// Special characters, as defined by RFC2047 /// private static readonly char[] SpecialCharacters = { '(', ')', '<', '>', '@', ',', ';', ':', '<', '>', '/', '[', ']', '?', '.', '=', '\t' }; /// /// Represents a content encoding type defined in RFC2047 /// public enum ContentEncoding { /// /// Unknown / invalid encoding /// Unknown, /// /// "Q Encoding" (reduced character set) encoding /// http://tools.ietf.org/html/rfc2047#section-4.2 /// QEncoding, /// /// Base 64 encoding /// http://tools.ietf.org/html/rfc2047#section-4.1 /// Base64 } /// /// Encode a string into RFC2047 /// /// Plain string to encode /// Content encoding to use /// Character set used by plainString /// Encoded string public static string Encode(string plainString, ContentEncoding contentEncoding = ContentEncoding.QEncoding, string characterSet = "iso-8859-1") { if (String.IsNullOrEmpty(plainString)) { return String.Empty; } if (contentEncoding == ContentEncoding.Unknown) { throw new ArgumentException("contentEncoding cannot be unknown for encoding.", "contentEncoding"); } if (!IsSupportedCharacterSet(characterSet)) { throw new ArgumentException("characterSet is not supported", "characterSet"); } var textEncoding = Encoding.GetEncoding(characterSet); var encoder = GetContentEncoder(contentEncoding); var encodedContent = encoder.Invoke(plainString, textEncoding); return BuildEncodedString(characterSet, contentEncoding, encodedContent); } /// /// Decode a string containing RFC2047 encoded sections /// /// String contaning encoded sections /// Decoded string public static string Decode(string encodedString) { // Remove separators var decodedString = EncodedWordSeparatorRegEx.Replace(encodedString, SeparatorReplacement); return EncodedWordFormatRegEx.Replace( decodedString, m => { var contentEncoding = GetContentEncodingType(m.Groups["encoding"].Value); if (contentEncoding == ContentEncoding.Unknown) { // Regex should never match, but return anyway return string.Empty; } var characterSet = m.Groups["charset"].Value; if (!IsSupportedCharacterSet(characterSet)) { // Fall back to iso-8859-1 if invalid/unsupported character set found characterSet = @"iso-8859-1"; } var textEncoding = Encoding.GetEncoding(characterSet); var contentDecoder = GetContentDecoder(contentEncoding); var encodedText = m.Groups["encodedtext"].Value; return contentDecoder.Invoke(encodedText, textEncoding); }); } /// /// Determines if a character set is supported /// /// Character set name /// Bool representing whether the character set is supported private static bool IsSupportedCharacterSet(string characterSet) { return Encoding.GetEncodings() .Where(e => String.Equals(e.Name, characterSet, StringComparison.InvariantCultureIgnoreCase)) .Any(); } /// /// Gets the content encoding type from the encoding character /// /// Content contentEncodingCharacter character /// ContentEncoding type private static ContentEncoding GetContentEncodingType(string contentEncodingCharacter) { switch (contentEncodingCharacter) { case "Q": case "q": return ContentEncoding.QEncoding; case "B": case "b": return ContentEncoding.Base64; default: return ContentEncoding.Unknown; } } /// /// Gets the content decoder delegate for the given content encoding type /// /// Content encoding type /// Decoding delegate private static Func GetContentDecoder(ContentEncoding contentEncoding) { switch (contentEncoding) { case ContentEncoding.Base64: return DecodeBase64; case ContentEncoding.QEncoding: return DecodeQEncoding; default: // Will never get here, but return a "null" delegate anyway return (s, e) => String.Empty; } } /// /// Gets the content encoder delegate for the given content encoding type /// /// Content encoding type /// Encoding delegate private static Func GetContentEncoder(ContentEncoding contentEncoding) { switch (contentEncoding) { case ContentEncoding.Base64: return EncodeBase64; case ContentEncoding.QEncoding: return EncodeQEncoding; default: // Will never get here, but return a "null" delegate anyway return (s, e) => String.Empty; } } /// /// Decodes a base64 encoded string /// /// Encoded text /// Encoding instance for the code page required /// Decoded string private static string DecodeBase64(string encodedText, Encoding textEncoder) { var encodedBytes = Convert.FromBase64String(encodedText); return textEncoder.GetString(encodedBytes); } /// /// Encodes a base64 encoded string /// /// Plain text /// Encoding instance for the code page required /// Encoded string private static string EncodeBase64(string plainText, Encoding textEncoder) { var plainTextBytes = textEncoder.GetBytes(plainText); return Convert.ToBase64String(plainTextBytes); } /// /// Decodes a "Q encoded" string /// /// Encoded text /// Encoding instance for the code page required /// Decoded string private static string DecodeQEncoding(string encodedText, Encoding textEncoder) { var decodedText = QEncodingSpaceRegEx.Replace(encodedText, " "); decodedText = QEncodingHexCodeRegEx.Replace( decodedText, m => { var hexString = m.Groups["hexcode"].Value; int characterValue; if (!int.TryParse(hexString, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out characterValue)) { return String.Empty; } return textEncoder.GetString(new[] { (byte)characterValue }); }); return decodedText; } /// /// Encodes a "Q encoded" string /// /// Plain text /// Encoding instance for the code page required /// Encoded string private static string EncodeQEncoding(string plainText, Encoding textEncoder) { if (textEncoder.GetByteCount(plainText) != plainText.Length) { throw new ArgumentException("Q encoding only supports single byte encodings", "textEncoder"); } var specialBytes = textEncoder.GetBytes(SpecialCharacters); var sb = new StringBuilder(plainText.Length); var plainBytes = textEncoder.GetBytes(plainText); // Replace "high" values for (int i = 0; i < plainBytes.Length; i++) { if (plainBytes[i] <= 127 && !specialBytes.Contains(plainBytes[i])) { sb.Append(Convert.ToChar(plainBytes[i])); } else { sb.Append("="); sb.Append(Convert.ToString(plainBytes[i], 16).ToUpper()); } } return sb.ToString().Replace(" ", "_"); } /// /// Builds the full encoded string representation /// /// Characterset to use /// Content encoding to use /// Content, encoded to the above parameters /// Valid RFC2047 string private static string BuildEncodedString(string characterSet, ContentEncoding contentEncoding, string encodedContent) { var encodingCharacter = String.Empty; switch (contentEncoding) { case ContentEncoding.Base64: encodingCharacter = "B"; break; case ContentEncoding.QEncoding: encodingCharacter = "Q"; break; } var wrapperLength = string.Format(EncodedStringFormat, characterSet, encodingCharacter, String.Empty).Length; var chunkLength = MaxLineLength - wrapperLength; if (encodedContent.Length <= chunkLength) { return string.Format(EncodedStringFormat, characterSet, encodingCharacter, encodedContent); } var sb = new StringBuilder(); foreach (var chunk in SplitStringByLength(encodedContent, chunkLength)) { sb.AppendFormat(EncodedStringFormat, characterSet, encodingCharacter, chunk); sb.Append("\r\n "); } return sb.ToString(); } /// /// Splits a string into chunks /// /// Input string /// Size of each chunk /// String collection of chunked strings public static IEnumerable SplitStringByLength(this string inputString, int chunkSize) { for (int index = 0; index < inputString.Length; index += chunkSize) { yield return inputString.Substring(index, Math.Min(chunkSize, inputString.Length - index)); } } } }