網友提問,希望了解中文編碼解析工具關於信件標題(如=?x-gbk?q?=B5=C4=B7=AD=D2=EB?=)的解碼邏輯,特整理為程式範例並加註說明如下:

using System;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
 
namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            string s = "=?big5?B?UmU6IKZVpuyl66bxLKdAt36o06RGLi4u?=";
            Console.WriteLine(decodeMailSubject(s));
            s = "=?big5?Q?=B6=C2=B7t=B0=F5=A6=E6=BA=FC?=";
            Console.WriteLine(decodeMailSubject(s));
            Console.Read();
        }
 
        static string decodeMailSubject(string raw)
        {
            //若字串結尾是?=會識別失敗,加上一個空白
              raw += " ";
            StringBuilder sb = new StringBuilder();
            //先解出一段一段的=?..... ?=
            foreach (Match m in Regex.Matches(raw,
                "=[?](?<enc>.+?)[?](?<type>.+?)[?](?<body>.+?)[?]=[^0-9A-Z]"))
            {
                string enc = m.Groups["enc"].Value.ToLower();
                string type = m.Groups["type"].Value.ToLower();
                string body = m.Groups["body"].Value;
                Encoding encoder = null;
                //識別出Encoding
                if (enc == "gbk" || enc == "x-gbk")
                    encoder = Encoding.GetEncoding(936);
                else if (enc == "big5")
                    encoder = Encoding.GetEncoding(950);
                else if (enc == "utf-8")
                    encoder = Encoding.UTF8;
                else
                {
                    return "不支援編碼格式[" + m.Groups["enc"].Value + "]!";
                }
                if (type == "q")
                {
                    body = body.Replace("=", "%");
                    body = HttpUtility.UrlDecode(body, encoder);
                    raw = raw.Replace(m.Value, body);
                }
                else if (type == "b")
                {
                    byte[] buff = Convert.FromBase64String(body);
                    raw = raw.Replace(m.Value, encoder.GetString(buff));
                }
            }
            return raw;
        }
    }
}

Comments

Be the first to post a comment

Post a comment