From 91617ec10b8601c607364a130c52855115bda014 Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Thu, 29 May 2025 17:22:13 +0800
Subject: [PATCH 1/9] ex1

---
 ex1 | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 ex1

diff --git a/ex1 b/ex1
new file mode 100644
index 0000000..8662d37
--- /dev/null
+++ b/ex1
@@ -0,0 +1,22 @@
+1. 前處理
+- 資料來源：`data/creditcard.csv`
+- 刪除 `Time` 欄位，對 `Amount` 做 StandardScaler。
+
+2. 監督式實驗：SMOTE + RandomForest
+- SMOTE 過採樣後的訓練集：正/負樣本比例接近平衡。
+- RandomForest 參數：`n_estimators=100, class_weight='balanced'`。
+- 結果：
+  - Precision、Recall、F1-score、ROC AUC 如下表。
+
+| 類別 | Precision | Recall | F1    |
+|----|---------|-------|-------|
+| 0  | …       | …     | …     |
+| 1  | …       | …     | …     |
+
+3. 非監督式實驗：KMeans(k=3)
+- 對全資料做標準化後聚成三群，每群以多數真實標籤做預測
+- 結果：
+  - Precision、Recall、F1-score 如下。
+
+4. 結論
+- 監督式方法效果遠優於非監督式。

From 14015c2e0fbf209dbc508ea984c8ebfd02415441 Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Thu, 29 May 2025 17:40:24 +0800
Subject: [PATCH 2/9] Add files via upload

---
 ex1.py |  88 +++++++++++++++++++++++++++++++++++++++++++++++++
 ex2.py | 101 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 ex1.py
 create mode 100644 ex2.py

diff --git a/ex1.py b/ex1.py
new file mode 100644
index 0000000..2aba3ad
--- /dev/null
+++ b/ex1.py
@@ -0,0 +1,88 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from sklearn.model_selection   import train_test_split
+from sklearn.preprocessing     import StandardScaler
+from imblearn.over_sampling    import SMOTE
+from sklearn.ensemble          import RandomForestClassifier
+from sklearn.cluster           import KMeans
+from sklearn.metrics           import (
+    classification_report,
+    confusion_matrix,
+    roc_auc_score
+)
+
+# 固定參數
+RANDOM_SEED = 42
+TEST_SIZE   = 0.3
+
+def supervised_pipeline(X_train, X_test, y_train, y_test):
+    """監督式：SMOTE + RandomForest"""
+    sm = SMOTE(random_state=RANDOM_SEED)
+    X_res, y_res = sm.fit_resample(X_train, y_train)
+
+    clf = RandomForestClassifier(
+        n_estimators=100,
+        class_weight='balanced',
+        random_state=RANDOM_SEED
+    )
+    clf.fit(X_res, y_res)
+
+    y_pred = clf.predict(X_test)
+    y_prob = clf.predict_proba(X_test)[:,1]
+
+    print("\n--- 監督式學習：SMOTE + RandomForest ---")
+    print(classification_report(y_test, y_pred, digits=4))
+    print("Confusion Matrix:")
+    print(confusion_matrix(y_test, y_pred))
+    print(f"ROC AUC: {roc_auc_score(y_test, y_prob):.4f}")
+
+def unsupervised_pipeline(X_all, y_all):
+    """非監督式：KMeans(k=3) 異常偵測"""
+    # 全資料標準化
+    X_scaled = StandardScaler().fit_transform(X_all)
+
+    k = 3
+    km = KMeans(n_clusters=k, random_state=RANDOM_SEED).fit(X_scaled)
+    labels = km.labels_
+
+    # 群內多數標籤當預測
+    y_pred = np.zeros_like(labels)
+    for c in range(k):
+        mask = (labels == c)
+        majority = pd.Series(y_all[mask]).mode()[0]
+        y_pred[mask] = majority
+
+    print("\n--- 非監督式學習：KMeans (k=3) ---")
+    print(classification_report(y_all, y_pred, digits=4))
+    print("Confusion Matrix:")
+    print(confusion_matrix(y_all, y_pred))
+
+def main():
+    # 1. 讀檔 & 前處理
+    data = pd.read_csv("data/creditcard.csv")
+    data = data.drop(columns=['Time'])
+    data['Amount'] = StandardScaler().fit_transform(
+        data['Amount'].values.reshape(-1,1)
+    )
+
+    X = data.drop(columns=['Class']).values
+    y = data['Class'].values
+
+    # 2. 切 supervised 的 train/test
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y,
+        test_size=TEST_SIZE,
+        random_state=RANDOM_SEED,
+        stratify=y
+    )
+
+    # 3. 執行監督式流程
+    supervised_pipeline(X_train, X_test, y_train, y_test)
+
+    # 4. 執行非監督式流程（用全部資料評估）
+    unsupervised_pipeline(X, y)
+
+if __name__ == "__main__":
+    main()
diff --git a/ex2.py b/ex2.py
new file mode 100644
index 0000000..fb0cc6c
--- /dev/null
+++ b/ex2.py
@@ -0,0 +1,101 @@
+import numpy as np
+import pandas as pd
+
+from sklearn.model_selection   import train_test_split
+from sklearn.preprocessing     import StandardScaler
+from sklearn.ensemble          import IsolationForest
+from xgboost                   import XGBClassifier
+from sklearn.metrics           import (
+    classification_report,
+    confusion_matrix,
+    roc_auc_score,
+    f1_score
+)
+
+# 固定參數
+RANDOM_SEED = 42
+TEST_SIZE   = 0.3
+
+def evaluate_pipeline(cont_list, percentile_list):
+    # 讀檔 & 前處理
+    df = pd.read_csv("data/creditcard.csv")
+    df = df.drop(columns=["Time"])
+    df["Amount"] = StandardScaler().fit_transform(
+        df["Amount"].values.reshape(-1, 1)
+    )
+    X = df.drop(columns=["Class"]).values
+    y = df["Class"].values
+
+    # 切分
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y,
+        test_size=TEST_SIZE,
+        random_state=RANDOM_SEED,
+        stratify=y
+    )
+
+    # 訓練 XGBoost（全資料）
+    xgb = XGBClassifier(
+        n_estimators=100,
+        random_state=RANDOM_SEED,
+        use_label_encoder=False,
+        eval_metric="logloss"
+    )
+    xgb.fit(X_train, y_train)
+
+    best_cfg = None
+    best_f1  = 0
+
+    # 掃描不同的 contamination
+    for cont in cont_list:
+        iso = IsolationForest(
+            contamination=cont,
+            random_state=RANDOM_SEED
+        )
+        iso.fit(X_train[y_train==0])
+
+        # decision_function 取分數
+        scores = -iso.decision_function(X_test)
+
+        # 在這個 contamination 下，掃描不同的 percentile 作為 threshold
+        for pct in percentile_list:
+            thr = np.percentile(scores, pct)
+            mask_anom = (scores >= thr)
+
+            # 合併預測
+            y_pred = np.zeros_like(y_test)
+            if mask_anom.any():
+                y_pred[mask_anom] = xgb.predict(X_test[mask_anom])
+
+            # 計算 F1
+            f1 = f1_score(y_test, y_pred)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_cfg = (cont, pct, thr, f1)
+
+    cont, pct, thr, f1 = best_cfg
+    print(f"\n最佳配置 → contamination={cont}, percentile={pct:.1f}, thr={thr:.3f}")
+    print(f"對應 F1 = {f1:.4f}\n")
+
+    # 用最佳配置重跑一次並印最終報告
+    iso = IsolationForest(contamination=cont, random_state=RANDOM_SEED)
+    iso.fit(X_train[y_train==0])
+    scores = -iso.decision_function(X_test)
+    mask_anom = (scores >= thr)
+
+    y_pred = np.zeros_like(y_test)
+    y_pred[mask_anom] = xgb.predict(X_test[mask_anom])
+    y_prob = np.zeros_like(y_test, dtype=float)
+    y_prob[mask_anom] = xgb.predict_proba(X_test[mask_anom])[:,1]
+
+    print("=== 最終評估 ===")
+    print(classification_report(y_test, y_pred, digits=4))
+    print("Confusion Matrix:")
+    print(confusion_matrix(y_test, y_pred))
+    print(f"ROC AUC: {roc_auc_score(y_test, y_prob):.4f}")
+
+if __name__ == "__main__":
+    # 自訂 contamination 與 percentile 的範圍
+    cons = [0.001, 0.002, 0.005, 0.01]
+    pers = [99, 99.5, 99.8, 99.9]
+    evaluate_pipeline(cons, pers)

From f1bc134dd6736bd0e803e400c0c8274ed2feca31 Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Sat, 14 Jun 2025 15:37:55 +0800
Subject: [PATCH 3/9] =?UTF-8?q?=E8=B3=87=E6=96=99=E5=A4=BE=E4=B8=8A?=
 =?UTF-8?q?=E5=82=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ACS111151_ex/README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 ACS111151_ex/README.md

diff --git a/ACS111151_ex/README.md b/ACS111151_ex/README.md
new file mode 100644
index 0000000..85958b5
--- /dev/null
+++ b/ACS111151_ex/README.md
@@ -0,0 +1 @@
+作業一放這裡

From 1cad0baf44f9e42658d9e6b1d46139d32995e365 Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Sat, 14 Jun 2025 15:38:42 +0800
Subject: [PATCH 4/9] delete ex1
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

作業一在資料夾裡
---
 ex1 | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/ex1 b/ex1
index 8662d37..8b13789 100644
--- a/ex1
+++ b/ex1
@@ -1,22 +1 @@
-1. 前處理
-- 資料來源：`data/creditcard.csv`
-- 刪除 `Time` 欄位，對 `Amount` 做 StandardScaler。
 
-2. 監督式實驗：SMOTE + RandomForest
-- SMOTE 過採樣後的訓練集：正/負樣本比例接近平衡。
-- RandomForest 參數：`n_estimators=100, class_weight='balanced'`。
-- 結果：
-  - Precision、Recall、F1-score、ROC AUC 如下表。
-
-| 類別 | Precision | Recall | F1    |
-|----|---------|-------|-------|
-| 0  | …       | …     | …     |
-| 1  | …       | …     | …     |
-
-3. 非監督式實驗：KMeans(k=3)
-- 對全資料做標準化後聚成三群，每群以多數真實標籤做預測
-- 結果：
-  - Precision、Recall、F1-score 如下。
-
-4. 結論
-- 監督式方法效果遠優於非監督式。

From 9f94b5be317b7d2fb53fd6e2fa2fda0cfe976a4e Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Sat, 14 Jun 2025 15:39:27 +0800
Subject: [PATCH 5/9] delete ex1.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

作業一在資料夾裡
---
 ex1.py | 87 ----------------------------------------------------------
 1 file changed, 87 deletions(-)

diff --git a/ex1.py b/ex1.py
index 2aba3ad..d3f5a12 100644
--- a/ex1.py
+++ b/ex1.py
@@ -1,88 +1 @@
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
 
-from sklearn.model_selection   import train_test_split
-from sklearn.preprocessing     import StandardScaler
-from imblearn.over_sampling    import SMOTE
-from sklearn.ensemble          import RandomForestClassifier
-from sklearn.cluster           import KMeans
-from sklearn.metrics           import (
-    classification_report,
-    confusion_matrix,
-    roc_auc_score
-)
-
-# 固定參數
-RANDOM_SEED = 42
-TEST_SIZE   = 0.3
-
-def supervised_pipeline(X_train, X_test, y_train, y_test):
-    """監督式：SMOTE + RandomForest"""
-    sm = SMOTE(random_state=RANDOM_SEED)
-    X_res, y_res = sm.fit_resample(X_train, y_train)
-
-    clf = RandomForestClassifier(
-        n_estimators=100,
-        class_weight='balanced',
-        random_state=RANDOM_SEED
-    )
-    clf.fit(X_res, y_res)
-
-    y_pred = clf.predict(X_test)
-    y_prob = clf.predict_proba(X_test)[:,1]
-
-    print("\n--- 監督式學習：SMOTE + RandomForest ---")
-    print(classification_report(y_test, y_pred, digits=4))
-    print("Confusion Matrix:")
-    print(confusion_matrix(y_test, y_pred))
-    print(f"ROC AUC: {roc_auc_score(y_test, y_prob):.4f}")
-
-def unsupervised_pipeline(X_all, y_all):
-    """非監督式：KMeans(k=3) 異常偵測"""
-    # 全資料標準化
-    X_scaled = StandardScaler().fit_transform(X_all)
-
-    k = 3
-    km = KMeans(n_clusters=k, random_state=RANDOM_SEED).fit(X_scaled)
-    labels = km.labels_
-
-    # 群內多數標籤當預測
-    y_pred = np.zeros_like(labels)
-    for c in range(k):
-        mask = (labels == c)
-        majority = pd.Series(y_all[mask]).mode()[0]
-        y_pred[mask] = majority
-
-    print("\n--- 非監督式學習：KMeans (k=3) ---")
-    print(classification_report(y_all, y_pred, digits=4))
-    print("Confusion Matrix:")
-    print(confusion_matrix(y_all, y_pred))
-
-def main():
-    # 1. 讀檔 & 前處理
-    data = pd.read_csv("data/creditcard.csv")
-    data = data.drop(columns=['Time'])
-    data['Amount'] = StandardScaler().fit_transform(
-        data['Amount'].values.reshape(-1,1)
-    )
-
-    X = data.drop(columns=['Class']).values
-    y = data['Class'].values
-
-    # 2. 切 supervised 的 train/test
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y,
-        test_size=TEST_SIZE,
-        random_state=RANDOM_SEED,
-        stratify=y
-    )
-
-    # 3. 執行監督式流程
-    supervised_pipeline(X_train, X_test, y_train, y_test)
-
-    # 4. 執行非監督式流程（用全部資料評估）
-    unsupervised_pipeline(X, y)
-
-if __name__ == "__main__":
-    main()

From 3fa2cf80c8f8c972e41565e6780acad753d11a75 Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Sat, 14 Jun 2025 15:40:12 +0800
Subject: [PATCH 6/9] delete ex2.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

作業二在資料夾2
---
 ex2.py | 100 ---------------------------------------------------------
 1 file changed, 100 deletions(-)

diff --git a/ex2.py b/ex2.py
index fb0cc6c..d3f5a12 100644
--- a/ex2.py
+++ b/ex2.py
@@ -1,101 +1 @@
-import numpy as np
-import pandas as pd
 
-from sklearn.model_selection   import train_test_split
-from sklearn.preprocessing     import StandardScaler
-from sklearn.ensemble          import IsolationForest
-from xgboost                   import XGBClassifier
-from sklearn.metrics           import (
-    classification_report,
-    confusion_matrix,
-    roc_auc_score,
-    f1_score
-)
-
-# 固定參數
-RANDOM_SEED = 42
-TEST_SIZE   = 0.3
-
-def evaluate_pipeline(cont_list, percentile_list):
-    # 讀檔 & 前處理
-    df = pd.read_csv("data/creditcard.csv")
-    df = df.drop(columns=["Time"])
-    df["Amount"] = StandardScaler().fit_transform(
-        df["Amount"].values.reshape(-1, 1)
-    )
-    X = df.drop(columns=["Class"]).values
-    y = df["Class"].values
-
-    # 切分
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y,
-        test_size=TEST_SIZE,
-        random_state=RANDOM_SEED,
-        stratify=y
-    )
-
-    # 訓練 XGBoost（全資料）
-    xgb = XGBClassifier(
-        n_estimators=100,
-        random_state=RANDOM_SEED,
-        use_label_encoder=False,
-        eval_metric="logloss"
-    )
-    xgb.fit(X_train, y_train)
-
-    best_cfg = None
-    best_f1  = 0
-
-    # 掃描不同的 contamination
-    for cont in cont_list:
-        iso = IsolationForest(
-            contamination=cont,
-            random_state=RANDOM_SEED
-        )
-        iso.fit(X_train[y_train==0])
-
-        # decision_function 取分數
-        scores = -iso.decision_function(X_test)
-
-        # 在這個 contamination 下，掃描不同的 percentile 作為 threshold
-        for pct in percentile_list:
-            thr = np.percentile(scores, pct)
-            mask_anom = (scores >= thr)
-
-            # 合併預測
-            y_pred = np.zeros_like(y_test)
-            if mask_anom.any():
-                y_pred[mask_anom] = xgb.predict(X_test[mask_anom])
-
-            # 計算 F1
-            f1 = f1_score(y_test, y_pred)
-            if f1 > best_f1:
-                best_f1 = f1
-                best_cfg = (cont, pct, thr, f1)
-
-    cont, pct, thr, f1 = best_cfg
-    print(f"\n最佳配置 → contamination={cont}, percentile={pct:.1f}, thr={thr:.3f}")
-    print(f"對應 F1 = {f1:.4f}\n")
-
-    # 用最佳配置重跑一次並印最終報告
-    iso = IsolationForest(contamination=cont, random_state=RANDOM_SEED)
-    iso.fit(X_train[y_train==0])
-    scores = -iso.decision_function(X_test)
-    mask_anom = (scores >= thr)
-
-    y_pred = np.zeros_like(y_test)
-    y_pred[mask_anom] = xgb.predict(X_test[mask_anom])
-    y_prob = np.zeros_like(y_test, dtype=float)
-    y_prob[mask_anom] = xgb.predict_proba(X_test[mask_anom])[:,1]
-
-    print("=== 最終評估 ===")
-    print(classification_report(y_test, y_pred, digits=4))
-    print("Confusion Matrix:")
-    print(confusion_matrix(y_test, y_pred))
-    print(f"ROC AUC: {roc_auc_score(y_test, y_prob):.4f}")
-
-if __name__ == "__main__":
-    # 自訂 contamination 與 percentile 的範圍
-    cons = [0.001, 0.002, 0.005, 0.01]
-    pers = [99, 99.5, 99.8, 99.9]
-    evaluate_pipeline(cons, pers)

From 2504308d55f51086db06a391369c8a115fb9cbf9 Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Sat, 14 Jun 2025 15:41:55 +0800
Subject: [PATCH 7/9] =?UTF-8?q?=E4=BD=9C=E6=A5=AD=E4=B8=80=E4=B8=8A?=
 =?UTF-8?q?=E5=82=B3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ACS111151_ex/ex1.ipynb | 116 +++++++++++++++++++++++++++++++++++++++++
 ACS111151_ex/ex1.md    |  22 ++++++++
 2 files changed, 138 insertions(+)
 create mode 100644 ACS111151_ex/ex1.ipynb
 create mode 100644 ACS111151_ex/ex1.md

diff --git a/ACS111151_ex/ex1.ipynb b/ACS111151_ex/ex1.ipynb
new file mode 100644
index 0000000..fa60f66
--- /dev/null
+++ b/ACS111151_ex/ex1.ipynb
@@ -0,0 +1,116 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dQc5pfBVV_SF"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "from sklearn.model_selection   import train_test_split\n",
+        "from sklearn.preprocessing     import StandardScaler\n",
+        "from imblearn.over_sampling    import SMOTE\n",
+        "from sklearn.ensemble          import RandomForestClassifier\n",
+        "from sklearn.cluster           import KMeans\n",
+        "from sklearn.metrics           import (\n",
+        "    classification_report,\n",
+        "    confusion_matrix,\n",
+        "    roc_auc_score\n",
+        ")\n",
+        "\n",
+        "# 固定參數\n",
+        "RANDOM_SEED = 42\n",
+        "TEST_SIZE   = 0.3\n",
+        "\n",
+        "def supervised_pipeline(X_train, X_test, y_train, y_test):\n",
+        "    \"\"\"監督式：SMOTE + RandomForest\"\"\"\n",
+        "    sm = SMOTE(random_state=RANDOM_SEED)\n",
+        "    X_res, y_res = sm.fit_resample(X_train, y_train)\n",
+        "\n",
+        "    clf = RandomForestClassifier(\n",
+        "        n_estimators=100,\n",
+        "        class_weight='balanced',\n",
+        "        random_state=RANDOM_SEED\n",
+        "    )\n",
+        "    clf.fit(X_res, y_res)\n",
+        "\n",
+        "    y_pred = clf.predict(X_test)\n",
+        "    y_prob = clf.predict_proba(X_test)[:,1]\n",
+        "\n",
+        "    print(\"\\n--- 監督式學習：SMOTE + RandomForest ---\")\n",
+        "    print(classification_report(y_test, y_pred, digits=4))\n",
+        "    print(\"Confusion Matrix:\")\n",
+        "    print(confusion_matrix(y_test, y_pred))\n",
+        "    print(f\"ROC AUC: {roc_auc_score(y_test, y_prob):.4f}\")\n",
+        "\n",
+        "def unsupervised_pipeline(X_all, y_all):\n",
+        "    \"\"\"非監督式：KMeans(k=3) 異常偵測\"\"\"\n",
+        "    # 全資料標準化\n",
+        "    X_scaled = StandardScaler().fit_transform(X_all)\n",
+        "\n",
+        "    k = 3\n",
+        "    km = KMeans(n_clusters=k, random_state=RANDOM_SEED).fit(X_scaled)\n",
+        "    labels = km.labels_\n",
+        "\n",
+        "    # 群內多數標籤當預測\n",
+        "    y_pred = np.zeros_like(labels)\n",
+        "    for c in range(k):\n",
+        "        mask = (labels == c)\n",
+        "        majority = pd.Series(y_all[mask]).mode()[0]\n",
+        "        y_pred[mask] = majority\n",
+        "\n",
+        "    print(\"\\n--- 非監督式學習：KMeans (k=3) ---\")\n",
+        "    print(classification_report(y_all, y_pred, digits=4))\n",
+        "    print(\"Confusion Matrix:\")\n",
+        "    print(confusion_matrix(y_all, y_pred))\n",
+        "\n",
+        "def main():\n",
+        "    # 1. 讀檔 & 前處理\n",
+        "    data = pd.read_csv(\"data/creditcard.csv\")\n",
+        "    data = data.drop(columns=['Time'])\n",
+        "    data['Amount'] = StandardScaler().fit_transform(\n",
+        "        data['Amount'].values.reshape(-1,1)\n",
+        "    )\n",
+        "\n",
+        "    X = data.drop(columns=['Class']).values\n",
+        "    y = data['Class'].values\n",
+        "\n",
+        "    # 2. 切 supervised 的 train/test\n",
+        "    X_train, X_test, y_train, y_test = train_test_split(\n",
+        "        X, y,\n",
+        "        test_size=TEST_SIZE,\n",
+        "        random_state=RANDOM_SEED,\n",
+        "        stratify=y\n",
+        "    )\n",
+        "\n",
+        "    # 3. 執行監督式流程\n",
+        "    supervised_pipeline(X_train, X_test, y_train, y_test)\n",
+        "\n",
+        "    # 4. 執行非監督式流程（用全部資料評估）\n",
+        "    unsupervised_pipeline(X, y)\n",
+        "\n",
+        "if __name__ == \"__main__\":\n",
+        "    main()\n"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/ACS111151_ex/ex1.md b/ACS111151_ex/ex1.md
new file mode 100644
index 0000000..8662d37
--- /dev/null
+++ b/ACS111151_ex/ex1.md
@@ -0,0 +1,22 @@
+1. 前處理
+- 資料來源：`data/creditcard.csv`
+- 刪除 `Time` 欄位，對 `Amount` 做 StandardScaler。
+
+2. 監督式實驗：SMOTE + RandomForest
+- SMOTE 過採樣後的訓練集：正/負樣本比例接近平衡。
+- RandomForest 參數：`n_estimators=100, class_weight='balanced'`。
+- 結果：
+  - Precision、Recall、F1-score、ROC AUC 如下表。
+
+| 類別 | Precision | Recall | F1    |
+|----|---------|-------|-------|
+| 0  | …       | …     | …     |
+| 1  | …       | …     | …     |
+
+3. 非監督式實驗：KMeans(k=3)
+- 對全資料做標準化後聚成三群，每群以多數真實標籤做預測
+- 結果：
+  - Precision、Recall、F1-score 如下。
+
+4. 結論
+- 監督式方法效果遠優於非監督式。

From bdb81d5e3a854370bee8a9126ef3e7274389eff9 Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Sat, 14 Jun 2025 15:46:32 +0800
Subject: [PATCH 8/9] =?UTF-8?q?=E4=BD=9C=E6=A5=AD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ACS111151_ex2/README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 ACS111151_ex2/README.md

diff --git a/ACS111151_ex2/README.md b/ACS111151_ex2/README.md
new file mode 100644
index 0000000..8898a7c
--- /dev/null
+++ b/ACS111151_ex2/README.md
@@ -0,0 +1 @@
+作業2放這裡

From 1e13f08a9148e778f9d8513ac5ac984d05166b2b Mon Sep 17 00:00:00 2001
From: ferdinKuan <sampokiki@gmail.com>
Date: Sat, 14 Jun 2025 15:47:14 +0800
Subject: [PATCH 9/9] =?UTF-8?q?=E4=BD=9C=E6=A5=AD2=E6=94=BE=E9=80=99?=
 =?UTF-8?q?=E9=82=8A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ACS111151_ex2/ex2.ipynb | 129 ++++++++++++++++++++++++++++++++++++++++
 ACS111151_ex2/ex2.md    |  55 +++++++++++++++++
 2 files changed, 184 insertions(+)
 create mode 100644 ACS111151_ex2/ex2.ipynb
 create mode 100644 ACS111151_ex2/ex2.md

diff --git a/ACS111151_ex2/ex2.ipynb b/ACS111151_ex2/ex2.ipynb
new file mode 100644
index 0000000..c91fda4
--- /dev/null
+++ b/ACS111151_ex2/ex2.ipynb
@@ -0,0 +1,129 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dQc5pfBVV_SF"
+      },
+      "outputs": [],
+      "source": [
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "\n",
+        "from sklearn.model_selection   import train_test_split\n",
+        "from sklearn.preprocessing     import StandardScaler\n",
+        "from sklearn.ensemble          import IsolationForest\n",
+        "from xgboost                   import XGBClassifier\n",
+        "from sklearn.metrics           import (\n",
+        "    classification_report,\n",
+        "    confusion_matrix,\n",
+        "    roc_auc_score,\n",
+        "    f1_score\n",
+        ")\n",
+        "\n",
+        "# 固定參數\n",
+        "RANDOM_SEED = 42\n",
+        "TEST_SIZE   = 0.3\n",
+        "\n",
+        "def evaluate_pipeline(cont_list, percentile_list):\n",
+        "    # 讀檔 & 前處理\n",
+        "    df = pd.read_csv(\"data/creditcard.csv\")\n",
+        "    df = df.drop(columns=[\"Time\"])\n",
+        "    df[\"Amount\"] = StandardScaler().fit_transform(\n",
+        "        df[\"Amount\"].values.reshape(-1, 1)\n",
+        "    )\n",
+        "    X = df.drop(columns=[\"Class\"]).values\n",
+        "    y = df[\"Class\"].values\n",
+        "\n",
+        "    # 切分\n",
+        "    X_train, X_test, y_train, y_test = train_test_split(\n",
+        "        X, y,\n",
+        "        test_size=TEST_SIZE,\n",
+        "        random_state=RANDOM_SEED,\n",
+        "        stratify=y\n",
+        "    )\n",
+        "\n",
+        "    # 訓練 XGBoost（全資料）\n",
+        "    xgb = XGBClassifier(\n",
+        "        n_estimators=100,\n",
+        "        random_state=RANDOM_SEED,\n",
+        "        use_label_encoder=False,\n",
+        "        eval_metric=\"logloss\"\n",
+        "    )\n",
+        "    xgb.fit(X_train, y_train)\n",
+        "\n",
+        "    best_cfg = None\n",
+        "    best_f1  = 0\n",
+        "\n",
+        "    # 掃描不同的 contamination\n",
+        "    for cont in cont_list:\n",
+        "        iso = IsolationForest(\n",
+        "            contamination=cont,\n",
+        "            random_state=RANDOM_SEED\n",
+        "        )\n",
+        "        iso.fit(X_train[y_train==0])\n",
+        "\n",
+        "        # decision_function 取分數\n",
+        "        scores = -iso.decision_function(X_test)\n",
+        "\n",
+        "        # 在這個 contamination 下，掃描不同的 percentile 作為 threshold\n",
+        "        for pct in percentile_list:\n",
+        "            thr = np.percentile(scores, pct)\n",
+        "            mask_anom = (scores >= thr)\n",
+        "\n",
+        "            # 合併預測\n",
+        "            y_pred = np.zeros_like(y_test)\n",
+        "            if mask_anom.any():\n",
+        "                y_pred[mask_anom] = xgb.predict(X_test[mask_anom])\n",
+        "\n",
+        "            # 計算 F1\n",
+        "            f1 = f1_score(y_test, y_pred)\n",
+        "            if f1 > best_f1:\n",
+        "                best_f1 = f1\n",
+        "                best_cfg = (cont, pct, thr, f1)\n",
+        "\n",
+        "    cont, pct, thr, f1 = best_cfg\n",
+        "    print(f\"\\n最佳配置 → contamination={cont}, percentile={pct:.1f}, thr={thr:.3f}\")\n",
+        "    print(f\"對應 F1 = {f1:.4f}\\n\")\n",
+        "\n",
+        "    # 用最佳配置重跑一次並印最終報告\n",
+        "    iso = IsolationForest(contamination=cont, random_state=RANDOM_SEED)\n",
+        "    iso.fit(X_train[y_train==0])\n",
+        "    scores = -iso.decision_function(X_test)\n",
+        "    mask_anom = (scores >= thr)\n",
+        "\n",
+        "    y_pred = np.zeros_like(y_test)\n",
+        "    y_pred[mask_anom] = xgb.predict(X_test[mask_anom])\n",
+        "    y_prob = np.zeros_like(y_test, dtype=float)\n",
+        "    y_prob[mask_anom] = xgb.predict_proba(X_test[mask_anom])[:,1]\n",
+        "\n",
+        "    print(\"=== 最終評估 ===\")\n",
+        "    print(classification_report(y_test, y_pred, digits=4))\n",
+        "    print(\"Confusion Matrix:\")\n",
+        "    print(confusion_matrix(y_test, y_pred))\n",
+        "    print(f\"ROC AUC: {roc_auc_score(y_test, y_prob):.4f}\")\n",
+        "\n",
+        "if __name__ == \"__main__\":\n",
+        "    # 自訂 contamination 與 percentile 的範圍\n",
+        "    cons = [0.001, 0.002, 0.005, 0.01]\n",
+        "    pers = [99, 99.5, 99.8, 99.9]\n",
+        "    evaluate_pipeline(cons, pers)\n"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/ACS111151_ex2/ex2.md b/ACS111151_ex2/ex2.md
new file mode 100644
index 0000000..838a2ea
--- /dev/null
+++ b/ACS111151_ex2/ex2.md
@@ -0,0 +1,55 @@
+為什麼要用 AutoEncoder + XGBoost？
+AutoEncoder 是一種神經網路架構，用來壓縮並還原輸入資料。如果某筆資料「無法被還原得很好」，那可能表示這是異常樣本。
+
+XGBoost 是目前最受歡迎的梯度提升樹模型，對不平衡資料具有良好表現。
+
+結合這兩者：利用 AutoEncoder 偵測異常的能力，為每筆資料生成一個「異常分數」，再加入到 XGBoost 當作額外特徵，使模型能更好地識別詐欺交易。
+
+實作步驟與程式碼解說
+載入並預處理資料
+df = pd.read_csv("creditcard.csv")
+X = df.drop(['Class', 'Time'], axis=1)
+y = df['Class']
+Class 是標籤，0 代表正常交易，1 代表詐欺。
+Time 被移除，因為對模型學習幫助不大。
+
+使用 MinMaxScaler 對資料進行正規化：
+scaler = MinMaxScaler()
+X_scaled = scaler.fit_transform(X)
+
+訓練 AutoEncoder
+X_normal = X_scaled[y == 0]  # 只用正常樣本
+
+設計一個簡單的 AutoEncoder 結構（中間隱藏層是 16 維）：
+input_dim = X_normal.shape[1]
+input_layer = layers.Input(shape=(input_dim,))
+encoded = layers.Dense(16, activation='relu')(input_layer)
+decoded = layers.Dense(input_dim, activation='sigmoid')(encoded)
+
+autoencoder = models.Model(inputs=input_layer, outputs=decoded)
+autoencoder.compile(optimizer='adam', loss='mse')
+autoencoder.fit(X_normal, X_normal, epochs=10, batch_size=256, shuffle=True)
+
+計算重建誤差（異常分數）
+X_reconstructed = autoencoder.predict(X_scaled)
+recon_error = np.mean(np.power(X_scaled - X_reconstructed, 2), axis=1)
+這個 recon_error 就是每筆資料與其重建結果的誤差，數值愈大，表示愈可能是異常。
+
+將異常分數加入原始特徵
+X_with_score = pd.DataFrame(X_scaled, columns=X.columns)
+X_with_score['recon_error'] = recon_error
+
+使用 XGBoost 做分類
+X_train, X_test, y_train, y_test = train_test_split(X_with_score, y, test_size=0.2, stratify=y)
+
+model = xgb.XGBClassifier(scale_pos_weight=10, use_label_encoder=False, eval_metric='logloss')
+model.fit(X_train, y_train)
+
+y_pred = model.predict(X_test)
+y_prob = model.predict_proba(X_test)[:, 1]
+scale_pos_weight=10 是為了解決資料不平衡問題，可以根據實際詐欺比例微調。
+
+模型評估
+print(classification_report(y_test, y_pred))
+print("AUC Score:", roc_auc_score(y_test, y_prob))
+輸出 Precision、Recall、F1-score 與 AUC 分數，讓你評估模型在詐欺樣本上的準確程度。