【答客問】用 LINQ 實作 Pivot 轉換
1 |
網友發問:「 LINQ 可以對物件陣列進行 Pivot 轉換嗎?」 Sure, Of Course, Why Not?
對 Pivot 印象模糊的同學可先溫習這篇:Using PIVOT In SQL 2005,而本文也直接用該文的 Log 統計當例子。
假設原始資料長這樣:
LogTime StatusCode Cnt
-------- ---------- -----------
06:40:05 200 5
06:40:05 302 1
06:40:06 304 1
06:40:06 200 10
06:40:07 200 5
06:40:07 500 3
06:40:08 200 11
06:40:08 404 1
我們希望以時間為 Key,將資料整合成一秒鐘一筆,欄位則為 200、302、304... 等各狀態出現的次數。
LogTime 200 302 304 401 404 500
-------- ----------- ----------- ----------- ----------- ----------- -----------
06:40:05 5 1 NULL NULL NULL NULL
06:40:06 10 NULL 1 NULL NULL NULL
06:40:07 5 NULL NULL NULL NULL 3
06:40:08 11 NULL NULL NULL 1 NULL
要用 LINQ 達成此一效果,可先 GroupBy(o => o.LogTime) 以 LogTime 分群,再找出該群資料中狀態分別為 200、320、304... 之次數加總,例如:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Newtonsoft.Json;
namespace LinqPivot
{
class Program
{
class LogEntry
{
public string LogTime { get; set; }
public string StatusCode { get; set; }
public int Count { get; set; }
public LogEntry(string logTime, string statusCode, int count)
{
LogTime = logTime;
StatusCode = statusCode;
Count = count;
}
}
static List<LogEntry> rawData = new List<LogEntry>()
{
new LogEntry("06:40:05", "200", 5),
new LogEntry("06:40:05", "302", 1),
new LogEntry("06:40:06", "304", 1),
new LogEntry("06:40:06", "200", 10),
new LogEntry("06:40:07", "200", 5),
new LogEntry("06:40:07", "500", 3),
new LogEntry("06:40:08", "200", 11),
new LogEntry("06:40:08", "404", 1),
};
static void Main(string[] args)
{
var res = rawData.GroupBy(o => o.LogTime)
.Select(o => new
{
LogTime = o.Key,
S200 = o.Where(p => p.StatusCode == "200").Sum(p => p.Count),
S302 = o.Where(p => p.StatusCode == "302").Sum(p => p.Count),
S304 = o.Where(p => p.StatusCode == "304").Sum(p => p.Count),
S401 = o.Where(p => p.StatusCode == "401").Sum(p => p.Count),
S404 = o.Where(p => p.StatusCode == "404").Sum(p => p.Count),
S500 = o.Where(p => p.StatusCode == "500").Sum(p => p.Count)
}).ToArray();
Console.WriteLine(JsonConvert.SerializeObject(res, Formatting.Indented));
Console.ReadLine();
}
}
}
這樣就能輕鬆完成樞紐轉換:
[
{
"LogTime": "06:40:05",
"S200": 5,
"S302": 1,
"S304": 0,
"S401": 0,
"S404": 0,
"S500": 0
},
{
"LogTime": "06:40:06",
"S200": 10,
"S302": 0,
"S304": 1,
"S401": 0,
"S404": 0,
"S500": 0
},
{
"LogTime": "06:40:07",
"S200": 5,
"S302": 0,
"S304": 0,
"S401": 0,
"S404": 0,
"S500": 3
},
{
"LogTime": "06:40:08",
"S200": 11,
"S302": 0,
"S304": 0,
"S401": 0,
"S404": 1,
"S500": 0
}
]
若嫌寫死 SXXX = o.Where(p => p.StatusCode == "XXX").Sum(p => p.Count) 太醜,且項目一有變動得改程式太麻煩。沒關係,C# 的武器很多,我們可以招喚 ExpandoObject 上場殺敵。(延伸閱讀:既然要動態就動個痛快 - ExpandoObject)
static void Main(string[] args)
{
var allCols = rawData.Select(o => o.StatusCode).Distinct().OrderBy(o => o).ToList();
var res = rawData.GroupBy(o => o.LogTime)
.Select(o =>
{
dynamic d = new ExpandoObject();
d.LogTime = o.Key;
var dict =
d as IDictionary<string, object>;
allCols.ForEach(c =>
{
dict["S" + c] = o.Where(p => p.StatusCode == c).Sum(p => p.Count);
});
return d;
}).ToArray();
Console.WriteLine(JsonConvert.SerializeObject(res, Formatting.Indented));
Console.ReadLine();
}
這樣子,SXXX 項目會依資料裡出現過的種類自動生成,讓程式再清爽一些,日後若增加新項目也不用改程式。(但這做法有個缺點,因資料未出現 StatusCode = '404',故結果不會包含 S404。如規格要求不管有無資料都要維持固定項目,可改成 var allCols = "200,302,304,401,404,500".Split(',').ToList()
)
[
{
"LogTime": "06:40:05",
"S200": 5,
"S302": 1,
"S304": 0,
"S404": 0,
"S500": 0
},
{
"LogTime": "06:40:06",
"S200": 10,
"S302": 0,
"S304": 1,
"S404": 0,
"S500": 0
},
{
"LogTime": "06:40:07",
"S200": 5,
"S302": 0,
"S304": 0,
"S404": 0,
"S500": 3
},
{
"LogTime": "06:40:08",
"S200": 11,
"S302": 0,
"S304": 0,
"S404": 1,
"S500": 0
}
]
等等,用 JSON 顯示就想了事,別打混,給我老老實實劇一張像 SQL 查詢結果的表格!
好吧,請看示範,一樣靠 LINQ 打通關。DumpResult() 函式接收 IEnumerable<object> 作為輸入參數,強型別類別或 ExpandoObject 通知,強型別物件用 Reflection .GetProperties() 列舉屬性名及取得屬性內容,ExpandoObject 則轉為 IDictionary<string, object> 進行操作。 為了使結果更美觀,我還加入依欄位最長內容自動調欄寬功能,邏輯有點小複雜,但照樣難不倒 LINQ:
static void Main(string[] args)
{
var allCols = rawData.Select(o => o.StatusCode).Distinct().OrderBy(o => o).ToList();
var res1 = rawData.GroupBy(o => o.LogTime)
.Select(o => new
{
LogTime = o.Key,
S200 = o.Where(p => p.StatusCode == "200").Sum(p => p.Count),
S302 = o.Where(p => p.StatusCode == "302").Sum(p => p.Count),
S304 = o.Where(p => p.StatusCode == "304").Sum(p => p.Count),
S401 = o.Where(p => p.StatusCode == "401").Sum(p => p.Count),
S404 = o.Where(p => p.StatusCode == "404").Sum(p => p.Count),
S500 = o.Where(p => p.StatusCode == "500").Sum(p => p.Count)
}).ToArray();
DumpResult(res1);
Console.WriteLine();
var res2 = rawData.GroupBy(o => o.LogTime)
.Select(o =>
{
dynamic d = new ExpandoObject();
d.LogTime = o.Key;
var dict =
d as IDictionary<string, object>;
allCols.ForEach(c =>
{
dict["S" + c] = o.Where(p => p.StatusCode == c).Sum(p => p.Count);
});
return d;
}).ToArray();
DumpResult(res2);
Console.ReadLine();
}
static void DumpResult(IEnumerable<object> result)
{
if (result == null || !result.Any())
Console.WriteLine("No Data");
var isExpando = result.First() is IDictionary<string, object>;
Dictionary<string, int> cols;
Func<object, string, string> getPropValue;
if (isExpando)
{
cols = (result.First() as IDictionary<string, object>)
.Keys.ToDictionary(o => o, o => o.Length);
getPropValue = (obj, propName) =>
(obj as IDictionary<string, object>)[propName].ToString();
}
else
{
var props = result.First().GetType().GetProperties().ToDictionary(o => o.Name, o => o);
cols = props.ToDictionary(o => o.Key, o => o.Key.Length);
getPropValue = (obj, propName) => props[propName].GetValue(obj).ToString();
}
//巡迴欄位值找出最大寬度
var data = result.Select(o =>
{
return cols.Keys.ToList().Select(c =>
{
var value = getPropValue(o, c);
cols[c] = Math.Max(cols[c], value.Length);
return value;
}).ToArray();
}).ToList();
Console.WriteLine(string.Join(" ", cols.Select(o => o.Key.PadLeft(o.Value))));
Console.WriteLine(string.Join(" ", cols.Select(o => new string('-', o.Value))));
var colWidths = cols.Values.ToArray();
foreach (var row in data)
{
var idx = 0;
Console.WriteLine(string.Join(" ",
row.Select(o => o.PadLeft(colWidths[idx++])).ToArray()));
}
}
實測結果,完美!
LogTime S200 S302 S304 S401 S404 S500
-------- ---- ---- ---- ---- ---- ----
06:40:05 5 1 0 0 0 0
06:40:06 10 0 1 0 0 0
06:40:07 5 0 0 0 0 3
06:40:08 11 0 0 0 1 0
LogTime S200 S302 S304 S404 S500
-------- ---- ---- ---- ---- ----
06:40:05 5 1 0 0 0
06:40:06 10 0 1 0 0
06:40:07 5 0 0 0 3
06:40:08 11 0 0 1 0
前一篇 LINQ 推坑文發表後,不少讀者迴響都集中在用 LINQ 查詢資料庫的效能問題,我還是想強調,把 LINQ 侷限於查詢資料庫是件很可惜的事,接連這兩篇文章的重點都在如何活用 LINQ 操作 .NET 物件、陣列取代繁瑣的 for 迴圈及資料轉換及比對, 以寫出更簡潔的程式。跟 Regular Expression 一樣,LINQ 有點難度需要學習,但上手後絕對值回票價。
回到 LINQ 查詢資料庫的效能上,我認為 LINQ 查資料庫效能不佳的狀況並非無法避免,只是得累積足夠的知識經驗(意思是要知道很多眉角),我個人偏好直接控制 SQL 語法而非透過 LINQ 操作,基本上是一種取捨,犠牲跨資料庫優勢及學習底層 SQL 語法的心力換取對 SQL 效能的直接掌控。這部分在舊文 閒聊:用 LINQ 還是自己寫 SQL? 有更詳細的闡述,有興趣的朋友可以參考。
該不該用 LINQ 查詢資料庫見仁見智,但放著 LINQ 不用徒手寫迴圈搞物件肯定不智,推薦大家趕快上車。
To answering reader's question, this article demostrates how to use LINQ to convert object array to pivot table.
Comments
# by wellxion
LINQ to Object才是重點沒錯啊XDD LINQ to SQL只是LINQ一部份而已 怎麼用簡潔有效率的方式操作資料集合才是LINQ的精隨