網友發問:「 LINQ 可以對物件陣列進行 Pivot 轉換嗎?」 Sure, Of Course, Why Not?

對 Pivot 印象模糊的同學可先溫習這篇:Using PIVOT In SQL 2005,而本文也直接用該文的 Log 統計當例子。

假設原始資料長這樣:

LogTime  StatusCode Cnt
-------- ---------- -----------
06:40:05 200        5
06:40:05 302        1
06:40:06 304        1
06:40:06 200        10
06:40:07 200        5
06:40:07 500        3
06:40:08 200        11
06:40:08 404        1

我們希望以時間為 Key,將資料整合成一秒鐘一筆,欄位則為 200、302、304... 等各狀態出現的次數。

LogTime  200         302         304         401         404         500
-------- ----------- ----------- ----------- ----------- ----------- -----------
06:40:05 5           1           NULL        NULL        NULL        NULL
06:40:06 10          NULL        1           NULL        NULL        NULL
06:40:07 5           NULL        NULL        NULL        NULL        3
06:40:08 11          NULL        NULL        NULL        1           NULL

要用 LINQ 達成此一效果,可先 GroupBy(o => o.LogTime) 以 LogTime 分群,再找出該群資料中狀態分別為 200、320、304... 之次數加總,例如:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;

namespace LinqPivot
{
    class Program
    {
        class LogEntry
        {
            public string LogTime { get; set; }
            public string StatusCode { get; set; }
            public int Count { get; set; }

            public LogEntry(string logTime, string statusCode, int count)
            {
                LogTime = logTime;
                StatusCode = statusCode;
                Count = count;
            }
        }

        static List<LogEntry> rawData = new List<LogEntry>()
        {
            new LogEntry("06:40:05", "200", 5),
            new LogEntry("06:40:05", "302", 1),
            new LogEntry("06:40:06", "304", 1),
            new LogEntry("06:40:06", "200", 10),
            new LogEntry("06:40:07", "200", 5),
            new LogEntry("06:40:07", "500", 3),
            new LogEntry("06:40:08", "200", 11),
            new LogEntry("06:40:08", "404", 1),
        };

        static void Main(string[] args)
        {
            var res = rawData.GroupBy(o => o.LogTime)
                .Select(o => new
                {
                    LogTime = o.Key,
                    S200 = o.Where(p => p.StatusCode == "200").Sum(p => p.Count),
                    S302 = o.Where(p => p.StatusCode == "302").Sum(p => p.Count),
                    S304 = o.Where(p => p.StatusCode == "304").Sum(p => p.Count),
                    S401 = o.Where(p => p.StatusCode == "401").Sum(p => p.Count),
                    S404 = o.Where(p => p.StatusCode == "404").Sum(p => p.Count),
                    S500 = o.Where(p => p.StatusCode == "500").Sum(p => p.Count)
                }).ToArray();
            Console.WriteLine(JsonConvert.SerializeObject(res, Formatting.Indented));
            Console.ReadLine();
        }

    }
}

這樣就能輕鬆完成樞紐轉換:

[
  {
    "LogTime": "06:40:05",
    "S200": 5,
    "S302": 1,
    "S304": 0,
    "S401": 0,
    "S404": 0,
    "S500": 0
  },
  {
    "LogTime": "06:40:06",
    "S200": 10,
    "S302": 0,
    "S304": 1,
    "S401": 0,
    "S404": 0,
    "S500": 0
  },
  {
    "LogTime": "06:40:07",
    "S200": 5,
    "S302": 0,
    "S304": 0,
    "S401": 0,
    "S404": 0,
    "S500": 3
  },
  {
    "LogTime": "06:40:08",
    "S200": 11,
    "S302": 0,
    "S304": 0,
    "S401": 0,
    "S404": 1,
    "S500": 0
  }
]

若嫌寫死 SXXX = o.Where(p => p.StatusCode == "XXX").Sum(p => p.Count) 太醜,且項目一有變動得改程式太麻煩。沒關係,C# 的武器很多,我們可以招喚 ExpandoObject 上場殺敵。(延伸閱讀:既然要動態就動個痛快 - ExpandoObject)

static void Main(string[] args)
{
    var allCols = rawData.Select(o => o.StatusCode).Distinct().OrderBy(o => o).ToList();
    var res = rawData.GroupBy(o => o.LogTime)
        .Select(o =>
        {
            dynamic d = new ExpandoObject();
            d.LogTime = o.Key;
            var dict =
                d as IDictionary<string, object>;
            allCols.ForEach(c =>
            {
                dict["S" + c] = o.Where(p => p.StatusCode == c).Sum(p => p.Count);
            });
            return d;
        }).ToArray();
    Console.WriteLine(JsonConvert.SerializeObject(res, Formatting.Indented));
    Console.ReadLine();
}

這樣子,SXXX 項目會依資料裡出現過的種類自動生成,讓程式再清爽一些,日後若增加新項目也不用改程式。(但這做法有個缺點,因資料未出現 StatusCode = '404',故結果不會包含 S404。如規格要求不管有無資料都要維持固定項目,可改成 var allCols = "200,302,304,401,404,500".Split(',').ToList())

[
  {
    "LogTime": "06:40:05",
    "S200": 5,
    "S302": 1,
    "S304": 0,
    "S404": 0,
    "S500": 0
  },
  {
    "LogTime": "06:40:06",
    "S200": 10,
    "S302": 0,
    "S304": 1,
    "S404": 0,
    "S500": 0
  },
  {
    "LogTime": "06:40:07",
    "S200": 5,
    "S302": 0,
    "S304": 0,
    "S404": 0,
    "S500": 3
  },
  {
    "LogTime": "06:40:08",
    "S200": 11,
    "S302": 0,
    "S304": 0,
    "S404": 1,
    "S500": 0
  }
]

等等,用 JSON 顯示就想了事,別打混,給我老老實實劇一張像 SQL 查詢結果的表格!

好吧,請看示範,一樣靠 LINQ 打通關。DumpResult() 函式接收 IEnumerable<object> 作為輸入參數,強型別類別或 ExpandoObject 通知,強型別物件用 Reflection .GetProperties() 列舉屬性名及取得屬性內容,ExpandoObject 則轉為 IDictionary<string, object> 進行操作。 為了使結果更美觀,我還加入依欄位最長內容自動調欄寬功能,邏輯有點小複雜,但照樣難不倒 LINQ:

static void Main(string[] args)
{
    var allCols = rawData.Select(o => o.StatusCode).Distinct().OrderBy(o => o).ToList();

    var res1 = rawData.GroupBy(o => o.LogTime)
        .Select(o => new
        {
            LogTime = o.Key,
            S200 = o.Where(p => p.StatusCode == "200").Sum(p => p.Count),
            S302 = o.Where(p => p.StatusCode == "302").Sum(p => p.Count),
            S304 = o.Where(p => p.StatusCode == "304").Sum(p => p.Count),
            S401 = o.Where(p => p.StatusCode == "401").Sum(p => p.Count),
            S404 = o.Where(p => p.StatusCode == "404").Sum(p => p.Count),
            S500 = o.Where(p => p.StatusCode == "500").Sum(p => p.Count)
        }).ToArray();
    DumpResult(res1);
    Console.WriteLine();
    var res2 = rawData.GroupBy(o => o.LogTime)
        .Select(o =>
        {
            dynamic d = new ExpandoObject();
            d.LogTime = o.Key;
            var dict =
                d as IDictionary<string, object>;
            allCols.ForEach(c =>
            {
                dict["S" + c] = o.Where(p => p.StatusCode == c).Sum(p => p.Count);
            });
            return d;
        }).ToArray();
    DumpResult(res2);
    Console.ReadLine();
}

static void DumpResult(IEnumerable<object> result)
{
    if (result == null || !result.Any())
        Console.WriteLine("No Data");
    var isExpando = result.First() is IDictionary<string, object>;
    Dictionary<string, int> cols;
    Func<object, string, string> getPropValue;
    if (isExpando)
    {
        cols = (result.First() as IDictionary<string, object>)
            .Keys.ToDictionary(o => o, o => o.Length);
        getPropValue = (obj, propName) =>
            (obj as IDictionary<string, object>)[propName].ToString();
    }
    else
    {
        var props = result.First().GetType().GetProperties().ToDictionary(o => o.Name, o => o);
        cols = props.ToDictionary(o => o.Key, o => o.Key.Length);
        getPropValue = (obj, propName) => props[propName].GetValue(obj).ToString();
    }

    //巡迴欄位值找出最大寬度
    var data = result.Select(o =>
    {
        return cols.Keys.ToList().Select(c =>
        {
            var value = getPropValue(o, c);
            cols[c] = Math.Max(cols[c], value.Length);
            return value;
        }).ToArray();
    }).ToList();

    Console.WriteLine(string.Join(" ", cols.Select(o => o.Key.PadLeft(o.Value))));
    Console.WriteLine(string.Join(" ", cols.Select(o => new string('-', o.Value))));
    var colWidths = cols.Values.ToArray();
    foreach (var row in data)
    {
        var idx = 0;
        Console.WriteLine(string.Join(" ", 
            row.Select(o => o.PadLeft(colWidths[idx++])).ToArray()));
    }

}

實測結果,完美!

 LogTime S200 S302 S304 S401 S404 S500
-------- ---- ---- ---- ---- ---- ----
06:40:05    5    1    0    0    0    0
06:40:06   10    0    1    0    0    0
06:40:07    5    0    0    0    0    3
06:40:08   11    0    0    0    1    0

 LogTime S200 S302 S304 S404 S500
-------- ---- ---- ---- ---- ----
06:40:05    5    1    0    0    0
06:40:06   10    0    1    0    0
06:40:07    5    0    0    0    3
06:40:08   11    0    0    1    0

前一篇 LINQ 推坑文發表後,不少讀者迴響都集中在用 LINQ 查詢資料庫的效能問題,我還是想強調,把 LINQ 侷限於查詢資料庫是件很可惜的事,接連這兩篇文章的重點都在如何活用 LINQ 操作 .NET 物件、陣列取代繁瑣的 for 迴圈及資料轉換及比對, 以寫出更簡潔的程式。跟 Regular Expression 一樣,LINQ 有點難度需要學習,但上手後絕對值回票價。

回到 LINQ 查詢資料庫的效能上,我認為 LINQ 查資料庫效能不佳的狀況並非無法避免,只是得累積足夠的知識經驗(意思是要知道很多眉角),我個人偏好直接控制 SQL 語法而非透過 LINQ 操作,基本上是一種取捨,犠牲跨資料庫優勢及學習底層 SQL 語法的心力換取對 SQL 效能的直接掌控。這部分在舊文 閒聊:用 LINQ 還是自己寫 SQL? 有更詳細的闡述,有興趣的朋友可以參考。

該不該用 LINQ 查詢資料庫見仁見智,但放著 LINQ 不用徒手寫迴圈搞物件肯定不智,推薦大家趕快上車。

To answering reader's question, this article demostrates how to use LINQ to convert object array to pivot table.


Comments

# by wellxion

LINQ to Object才是重點沒錯啊XDD LINQ to SQL只是LINQ一部份而已 怎麼用簡潔有效率的方式操作資料集合才是LINQ的精隨

Post a comment