diff --git a/ACS111150_ex1/ex1.ipynb b/ACS111150_ex1/ex1.ipynb
new file mode 100644
index 0000000..e4e5d87
--- /dev/null
+++ b/ACS111150_ex1/ex1.ipynb
@@ -0,0 +1,466 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "id": "srrdv_JjhrLP"
+ },
+ "outputs": [],
+ "source": [
+ "#import\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from sklearn.ensemble import IsolationForest\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.cluster import KMeans\n",
+ "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report\n",
+ "import kagglehub\n",
+ "\n",
+ "from sklearn.metrics import silhouette_score\n",
+ "from sklearn.metrics import classification_report\n",
+ "#general set\n",
+ "RANDOM_SEED = 42\n",
+ "TEST_SIZE = 0.3\n",
+ "\n",
+ "#download\n",
+ "path = kagglehub.dataset_download(\"mlg-ulb/creditcardfraud\")\n",
+ "data = pd.read_csv(f\"{path}/creditcard.csv\")\n",
+ "\n",
+ "#prepare\n",
+ "data['Class'] = data['Class'].astype(int)\n",
+ "data = data.drop(['Time'], axis=1)\n",
+ "data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "id": "w0B8EoH7BjaF"
+ },
+ "outputs": [],
+ "source": [
+ "#output\n",
+ "def evaluation(y_true, y_pred, model_name=\"Model\"):\n",
+ " accuracy = accuracy_score(y_true, y_pred)\n",
+ " precision = precision_score(y_true, y_pred)\n",
+ " recall = recall_score(y_true, y_pred)\n",
+ " f1 = f1_score(y_true, y_pred)\n",
+ " print(f'\\n{model_name} Evaluation:')\n",
+ " print('=' * 30)\n",
+ " print(f' Accuracy : {accuracy:.8f}')\n",
+ " print(f' Precision Score: {precision:.8f}')\n",
+ " print(f' Recall Score : {recall:.8f}')\n",
+ " print(f' F1 Score : {f1:.8f}')\n",
+ " print('\\nClassification Report:')\n",
+ " print(classification_report(y_true, y_pred))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "RFNW-rTlh05T",
+ "outputId": "07004d18-dbee-4d20-8464-577d01a5eec3"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "Random Forest Evaluation:\n",
+ "==============================\n",
+ " Accuracy : 0.99963719\n",
+ " Precision Score: 0.94117647\n",
+ " Recall Score : 0.82352941\n",
+ " F1 Score : 0.87843137\n",
+ "\n",
+ "Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 85307\n",
+ " 1 0.94 0.82 0.88 136\n",
+ "\n",
+ " accuracy 1.00 85443\n",
+ " macro avg 0.97 0.91 0.94 85443\n",
+ "weighted avg 1.00 1.00 1.00 85443\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "#basic\n",
+ "\n",
+ "X = data.drop(columns=['Class']).values\n",
+ "y = data['Class'].values\n",
+ "\n",
+ "#splite\n",
+ "X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, y, test_size=TEST_SIZE, random_state=RANDOM_SEED)\n",
+ "\n",
+ "#rf model\n",
+ "rf = RandomForestClassifier(n_estimators=100, random_state=RANDOM_SEED)\n",
+ "rf.fit(X_train, y_train)\n",
+ "\n",
+ "#output\n",
+ "y_pred_rf = rf.predict(X_test)\n",
+ "evaluation(y_test, y_pred_rf, model_name=\"Random Forest\")\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Und5nd7p6GX7",
+ "outputId": "fcac0b74-22ea-4b0b-8f5b-f821b573c54f"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: xgboost in /usr/local/lib/python3.11/dist-packages (2.1.4)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from xgboost) (2.0.2)\n",
+ "Requirement already satisfied: nvidia-nccl-cu12 in /usr/local/lib/python3.11/dist-packages (from xgboost) (2.21.5)\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from xgboost) (1.15.3)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install xgboost"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "rRgM6dX713kB",
+ "outputId": "1c85834e-8ab1-4c0b-d5f1-b8373f18d10b"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "XGBoost Evaluation:\n",
+ "==============================\n",
+ " Accuracy : 0.99969570\n",
+ " Precision Score: 0.94354839\n",
+ " Recall Score : 0.86029412\n",
+ " F1 Score : 0.90000000\n",
+ "\n",
+ "Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 85307\n",
+ " 1 0.94 0.86 0.90 136\n",
+ "\n",
+ " accuracy 1.00 85443\n",
+ " macro avg 0.97 0.93 0.95 85443\n",
+ "weighted avg 1.00 1.00 1.00 85443\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "#XGBoost\n",
+ "from xgboost import XGBClassifier\n",
+ "\n",
+ "xgb = XGBClassifier(\n",
+ " n_estimators=600,\n",
+ " max_depth=8,\n",
+ " learning_rate=0.1,\n",
+ " scale_pos_weight=8,\n",
+ " random_state=RANDOM_SEED,\n",
+ " #use_label_encoder=False,\n",
+ " eval_metric='logloss'\n",
+ ")\n",
+ "xgb.fit(X_train, y_train.ravel())\n",
+ "y_pred_xgb = xgb.predict(X_test)\n",
+ "evaluation(y_test, y_pred_xgb, model_name=\"XGBoost\")\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "8EPc27Reh4u7",
+ "outputId": "92123952-47f2-470f-af00-1f15e4f40579"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "KMeans (Unsupervised) Evaluation:\n",
+ "==============================\n",
+ " Accuracy : 0.99872430\n",
+ " Precision Score: 0.78260870\n",
+ " Recall Score : 0.36486486\n",
+ " F1 Score : 0.49769585\n",
+ "\n",
+ "Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 85295\n",
+ " 1 0.78 0.36 0.50 148\n",
+ "\n",
+ " accuracy 1.00 85443\n",
+ " macro avg 0.89 0.68 0.75 85443\n",
+ "weighted avg 1.00 1.00 1.00 85443\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Kmeans\n",
+ "X = np.asarray(data.drop(columns=['Class']))\n",
+ "y = np.asarray(data['Class'])\n",
+ "\n",
+ "#Split\n",
+ "x_train, x_test, y_train, y_test = train_test_split(\n",
+ " X, y, test_size=TEST_SIZE, random_state=RANDOM_SEED, stratify=y\n",
+ ")\n",
+ "\n",
+ "#ragular\n",
+ "scaler = StandardScaler()\n",
+ "x_train = scaler.fit_transform(x_train)\n",
+ "x_test = scaler.transform(x_test)\n",
+ "\n",
+ "#train KMeans\n",
+ "n_x_train = x_train[y_train == 0]\n",
+ "n_x_train = n_x_train[:1000]\n",
+ "\n",
+ "#find best k\n",
+ "scores = []\n",
+ "for k in range(2, 5):\n",
+ " kmeans = KMeans(n_clusters=k, init='k-means++', random_state=RANDOM_SEED)\n",
+ " kmeans.fit(n_x_train)\n",
+ " score = silhouette_score(n_x_train, kmeans.labels_)\n",
+ " scores.append(score)\n",
+ "optimal_k = np.argmax(scores) + 2\n",
+ "\n",
+ "#train best k\n",
+ "kmeans = KMeans(n_clusters=optimal_k, init='k-means++', random_state=RANDOM_SEED)\n",
+ "kmeans.fit(n_x_train)\n",
+ "y_pred_test = kmeans.predict(x_test)\n",
+ "\n",
+ "def align_labels(y_true, y_pred, n_clusters):\n",
+ " labels = np.zeros_like(y_pred)\n",
+ " for i in range(n_clusters):\n",
+ " mask = (y_pred == i)\n",
+ " if np.sum(mask) > 0:\n",
+ " labels[mask] = np.bincount(y_true[mask]).argmax()\n",
+ " else:\n",
+ " labels[mask] = 0 # Default to normal class\n",
+ " return labels\n",
+ "\n",
+ "y_pred_aligned = align_labels(y_test, y_pred_test, optimal_k)\n",
+ "\n",
+ "\n",
+ "evaluation(y_test, y_pred_aligned, model_name=\"KMeans (Unsupervised)\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "!pip install --upgrade --force-reinstall --no-cache-dir jax jaxlib\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FLSMZ82PaQDh",
+ "outputId": "e801ab4e-9b6f-4c6e-e5ec-4a613733f45b",
+ "collapsed": true
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting jax\n",
+ " Downloading jax-0.6.1-py3-none-any.whl.metadata (13 kB)\n",
+ "Collecting jaxlib\n",
+ " Downloading jaxlib-0.6.1-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)\n",
+ "Collecting ml_dtypes>=0.5.0 (from jax)\n",
+ " Downloading ml_dtypes-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)\n",
+ "Collecting numpy>=1.25 (from jax)\n",
+ " Downloading numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hCollecting opt_einsum (from jax)\n",
+ " Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n",
+ "Collecting scipy>=1.11.1 (from jax)\n",
+ " Downloading scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m141.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading jax-0.6.1-py3-none-any.whl (2.4 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m50.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading jaxlib-0.6.1-cp311-cp311-manylinux2014_x86_64.whl (89.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m89.1/89.1 MB\u001b[0m \u001b[31m138.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading ml_dtypes-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.7 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.7/4.7 MB\u001b[0m \u001b[31m170.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.8 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.8/16.8 MB\u001b[0m \u001b[31m137.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (37.7 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m37.7/37.7 MB\u001b[0m \u001b[31m60.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m126.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: opt_einsum, numpy, scipy, ml_dtypes, jaxlib, jax\n",
+ " Attempting uninstall: opt_einsum\n",
+ " Found existing installation: opt_einsum 3.4.0\n",
+ " Uninstalling opt_einsum-3.4.0:\n",
+ " Successfully uninstalled opt_einsum-3.4.0\n",
+ " Attempting uninstall: numpy\n",
+ " Found existing installation: numpy 2.2.6\n",
+ " Uninstalling numpy-2.2.6:\n",
+ " Successfully uninstalled numpy-2.2.6\n",
+ " Attempting uninstall: scipy\n",
+ " Found existing installation: scipy 1.15.3\n",
+ " Uninstalling scipy-1.15.3:\n",
+ " Successfully uninstalled scipy-1.15.3\n",
+ " Attempting uninstall: ml_dtypes\n",
+ " Found existing installation: ml_dtypes 0.5.1\n",
+ " Uninstalling ml_dtypes-0.5.1:\n",
+ " Successfully uninstalled ml_dtypes-0.5.1\n",
+ " Attempting uninstall: jaxlib\n",
+ " Found existing installation: jaxlib 0.6.1\n",
+ " Uninstalling jaxlib-0.6.1:\n",
+ " Successfully uninstalled jaxlib-0.6.1\n",
+ " Attempting uninstall: jax\n",
+ " Found existing installation: jax 0.6.1\n",
+ " Uninstalling jax-0.6.1:\n",
+ " Successfully uninstalled jax-0.6.1\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "tensorflow 2.18.0 requires ml-dtypes<0.5.0,>=0.4.0, but you have ml-dtypes 0.5.1 which is incompatible.\n",
+ "tensorflow 2.18.0 requires numpy<2.1.0,>=1.26.0, but you have numpy 2.2.6 which is incompatible.\n",
+ "numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.2.6 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed jax-0.6.1 jaxlib-0.6.1 ml_dtypes-0.5.1 numpy-2.2.6 opt_einsum-3.4.0 scipy-1.15.3\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "#MY_KMeans\n",
+ "\n",
+ "normal = x_train[y_train == 0][:800]\n",
+ "fraud = x_train[y_train == 1][:200]\n",
+ "n_x_train = np.vstack([normal, fraud])\n",
+ "\n",
+ "# find k\n",
+ "scores = []\n",
+ "for k in range(2, 5):\n",
+ " kmeans = KMeans(n_clusters=k, init='k-means++', random_state=RANDOM_SEED)\n",
+ " kmeans.fit(n_x_train)\n",
+ " score = silhouette_score(n_x_train, kmeans.labels_)\n",
+ " scores.append(score)\n",
+ "optimal_k = np.argmax(scores) + 2\n",
+ "\n",
+ "#train with k\n",
+ "kmeans = KMeans(n_clusters=optimal_k, init='k-means++', random_state=RANDOM_SEED)\n",
+ "kmeans.fit(n_x_train)\n",
+ "\n",
+ "\n",
+ "y_pred_test = kmeans.predict(x_test)\n",
+ "def align_labels(y_true, y_pred, n_clusters):\n",
+ " labels = np.zeros_like(y_pred)\n",
+ " for i in range(n_clusters):\n",
+ " mask = (y_pred == i)\n",
+ " if np.sum(mask) > 0:\n",
+ " labels[mask] = np.bincount(y_true[mask]).argmax()\n",
+ " else:\n",
+ " labels[mask] = 0\n",
+ " return labels\n",
+ "\n",
+ "y_pred_aligned = align_labels(y_test, y_pred_test, optimal_k)\n",
+ "\n",
+ "\n",
+ "evaluation(y_test, y_pred_aligned, model_name=\"MY_KMeans\")\n",
+ "\n",
+ "\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Va-AkmnhUWO3",
+ "outputId": "b63f7573-ddd4-4194-fd2d-73b4133a7579"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "MY_KMeans Evaluation:\n",
+ "==============================\n",
+ " Accuracy : 0.99897007\n",
+ " Precision Score: 0.83333333\n",
+ " Recall Score : 0.50675676\n",
+ " F1 Score : 0.63025210\n",
+ "\n",
+ "Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 85295\n",
+ " 1 0.83 0.51 0.63 148\n",
+ "\n",
+ " accuracy 1.00 85443\n",
+ " macro avg 0.92 0.75 0.81 85443\n",
+ "weighted avg 1.00 1.00 1.00 85443\n",
+ "\n"
+ ]
+ }
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "authorship_tag": "ABX9TyNxw6KMHh2yrVUGkJGepcz1",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/ACS111150_ex1/ex1.md b/ACS111150_ex1/ex1.md
new file mode 100644
index 0000000..ab4121d
--- /dev/null
+++ b/ACS111150_ex1/ex1.md
@@ -0,0 +1,107 @@
+# 信用卡詐欺檢測報告
+
+## 1. 監督式學習:XGBoost
+
+**參數設定**
+- `n_estimators=600`
+- `max_depth=8`
+- `learning_rate=0.1`
+- `scale_pos_weight=8`
+- `random_state=42`
+- `eval_metric='logloss'`
+
+> 調高 `n_estimators` 與 `max_depth`,並加大 `scale_pos_weight` 以處理嚴重不平衡問題。
+
+### 範例結果
+
+```
+Random Forest Evaluation:
+--------------------------------------------
+Accuracy : 0.99963719
+Precision Score: 0.94117647
+Recall Score : 0.82352941
+F1 Score : 0.87843137
+
+Classification Report:
+ precision recall f1-score support
+
+ 0 1.00 1.00 1.00 85307
+ 1 0.94 0.82 0.88 136
+
+ accuracy 1.00 85443
+ macro avg 0.97 0.91 0.94 85443
+weighted avg 1.00 1.00 1.00 85443
+--------------------------------------------
+```
+
+### 實作結果
+
+```
+XGBoost Evaluation:
+--------------------------------------------
+Accuracy : 0.99969570
+Precision Score: 0.94354839
+Recall Score : 0.86029412
+F1 Score : 0.90000000
+
+Classification Report:
+ precision recall f1-score support
+
+ 0 1.00 1.00 1.00 85307
+ 1 0.94 0.86 0.90 136
+
+ accuracy 1.00 85443
+ macro avg 0.97 0.93 0.95 85443
+weighted avg 1.00 1.00 1.00 85443
+--------------------------------------------
+```
+
+---
+
+## 2. 非監督式學習:KMeans
+
+> **改動**:原本只用正常樣本訓練,改成取 800 筆正常樣本與 200 筆詐騙樣本共同訓練。
+
+### 範例結果
+
+```
+KMeans (Unsupervised) Evaluation:
+--------------------------------------------
+Accuracy : 0.99872430
+Precision Score: 0.78260870
+Recall Score : 0.36486486
+F1 Score : 0.49769585
+
+Classification Report:
+ precision recall f1-score support
+
+ 0 1.00 1.00 1.00 85295
+ 1 0.78 0.36 0.50 148
+
+ accuracy 1.00 85443
+ macro avg 0.89 0.68 0.75 85443
+weighted avg 1.00 1.00 1.00 85443
+--------------------------------------------
+```
+
+### 實作結果
+
+```
+MY KMeans Evaluation:
+--------------------------------------------
+Accuracy : 0.99897007
+Precision Score: 0.83333333
+Recall Score : 0.50675676
+F1 Score : 0.63025210
+
+Classification Report:
+ precision recall f1-score support
+
+ 0 1.00 1.00 1.00 85295
+ 1 0.83 0.51 0.63 148
+
+ accuracy 1.00 85443
+ macro avg 0.92 0.75 0.81 85443
+weighted avg 1.00 1.00 1.00 85443
+--------------------------------------------
+```
diff --git a/ACS111150_ex2/ex2.ipynb b/ACS111150_ex2/ex2.ipynb
new file mode 100644
index 0000000..2b2b676
--- /dev/null
+++ b/ACS111150_ex2/ex2.ipynb
@@ -0,0 +1,280 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "authorship_tag": "ABX9TyM2eyDjzA73AcHvxuaEcDvZ",
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "oMCrMKZxURfg",
+ "outputId": "27147b6f-96f3-4ead-f122-a18e533af342"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: xgboost in /usr/local/lib/python3.11/dist-packages (2.1.4)\n",
+ "Collecting xgboost\n",
+ " Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)\n",
+ "Requirement already satisfied: imbalanced-learn in /usr/local/lib/python3.11/dist-packages (0.13.0)\n",
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from xgboost) (2.0.2)\n",
+ "Requirement already satisfied: nvidia-nccl-cu12 in /usr/local/lib/python3.11/dist-packages (from xgboost) (2.21.5)\n",
+ "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from xgboost) (1.15.3)\n",
+ "Requirement already satisfied: scikit-learn<2,>=1.3.2 in /usr/local/lib/python3.11/dist-packages (from imbalanced-learn) (1.6.1)\n",
+ "Requirement already satisfied: sklearn-compat<1,>=0.1 in /usr/local/lib/python3.11/dist-packages (from imbalanced-learn) (0.1.3)\n",
+ "Requirement already satisfied: joblib<2,>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from imbalanced-learn) (1.5.0)\n",
+ "Requirement already satisfied: threadpoolctl<4,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from imbalanced-learn) (3.6.0)\n",
+ "Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl (253.9 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m253.9/253.9 MB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: xgboost\n",
+ " Attempting uninstall: xgboost\n",
+ " Found existing installation: xgboost 2.1.4\n",
+ " Uninstalling xgboost-2.1.4:\n",
+ " Successfully uninstalled xgboost-2.1.4\n",
+ "Successfully installed xgboost-3.0.2\n"
+ ]
+ }
+ ],
+ "source": [
+ "\n",
+ "!pip install --upgrade xgboost imbalanced-learn\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from sklearn.model_selection import train_test_split, GridSearchCV\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.ensemble import IsolationForest\n",
+ "from xgboost import XGBClassifier\n",
+ "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report\n",
+ "import kagglehub"
+ ],
+ "metadata": {
+ "id": "m67jikTkUVDu"
+ },
+ "execution_count": 3,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def evaluation(y_true, y_pred, model_name=\"Model\"):\n",
+ " acc = accuracy_score(y_true, y_pred)\n",
+ " prec = precision_score(y_true, y_pred, zero_division=0)\n",
+ " rec = recall_score(y_true, y_pred)\n",
+ " f1 = f1_score(y_true, y_pred)\n",
+ " print(f\"\\n{model_name} Evaluation:\")\n",
+ " print(\"=\" * 40)\n",
+ " print(f\" Accuracy : {acc:.8f}\")\n",
+ " print(f\" Precision Score: {prec:.8f}\")\n",
+ " print(f\" Recall Score : {rec:.8f}\")\n",
+ " print(f\" F1 Score : {f1:.8f}\\n\")\n",
+ " print(\"Classification Report:\")\n",
+ " print(classification_report(y_true, y_pred))"
+ ],
+ "metadata": {
+ "id": "6mY9Dv0nUdUP"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def load_data():\n",
+ " path = kagglehub.dataset_download(\"mlg-ulb/creditcardfraud\")\n",
+ " data = pd.read_csv(f\"{path}/creditcard.csv\")\n",
+ " data['Class'] = data['Class'].astype(int)\n",
+ " data.drop(['Time'], axis=1, inplace=True)\n",
+ " data['Amount'] = StandardScaler().fit_transform(\n",
+ " data['Amount'].values.reshape(-1, 1))\n",
+ " return data\n",
+ "\n",
+ "# Load and split\n",
+ "RANDOM_SEED = 42\n",
+ "TEST_SIZE = 0.3\n",
+ "\n",
+ "data = load_data()\n",
+ "X = data.drop(columns=['Class']).values\n",
+ "y = data['Class'].values\n",
+ "x_train, x_test, y_train, y_test = train_test_split(\n",
+ " X, y, test_size=TEST_SIZE, random_state=RANDOM_SEED, stratify=y)"
+ ],
+ "metadata": {
+ "id": "QggrMsSNUdbf"
+ },
+ "execution_count": 5,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "iso = IsolationForest(contamination=0.002, random_state=RANDOM_SEED)\n",
+ "iso.fit(x_train[y_train == 0])\n",
+ "score_train = iso.decision_function(x_train)\n",
+ "score_test = iso.decision_function(x_test)\n",
+ "\n",
+ "x_train_feat = np.hstack([x_train, score_train.reshape(-1,1)])\n",
+ "x_test_feat = np.hstack([x_test, score_test.reshape(-1,1)])"
+ ],
+ "metadata": {
+ "id": "rmGHK9f8UdeX"
+ },
+ "execution_count": 7,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "param_grid = {\n",
+ " 'n_estimators': [200, 400],\n",
+ " 'max_depth': [5, 8],\n",
+ " 'learning_rate': [0.05, 0.1],\n",
+ " 'scale_pos_weight': [10, 20]\n",
+ "}\n",
+ "\n",
+ "\n",
+ "xgb = XGBClassifier(\n",
+ " tree_method='hist', # <-- changed here: use CPU 'hist' instead of 'gpu_hist'\n",
+ " eval_metric='logloss',\n",
+ " scale_pos_weight=0.172,\n",
+ " random_state=RANDOM_SEED\n",
+ ")\n",
+ "grid = GridSearchCV(\n",
+ " xgb, param_grid, scoring='f1', cv=3, n_jobs=-1, verbose=1\n",
+ ")\n",
+ "\n",
+ "grid.fit(x_train_feat, y_train)\n",
+ "best_model = grid.best_estimator_\n",
+ "print(\"Best parameters:\", grid.best_params_)\n",
+ "\n",
+ "# Default threshold (0.5)\n",
+ "y_pred_default = best_model.predict(x_test_feat)\n",
+ "evaluation(y_test, y_pred_default, model_name=\"DefaultThreshold\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "id": "GS6juA2iUmMH",
+ "outputId": "052864db-5851-416e-ce5e-58a503729210"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Fitting 3 folds for each of 16 candidates, totalling 48 fits\n",
+ "Best parameters: {'learning_rate': 0.1, 'max_depth': 8, 'n_estimators': 400, 'scale_pos_weight': 10}\n",
+ "\n",
+ "DefaultThreshold Evaluation:\n",
+ "========================================\n",
+ " Accuracy : 0.99952015\n",
+ " Precision Score: 0.92125984\n",
+ " Recall Score : 0.79054054\n",
+ " F1 Score : 0.85090909\n",
+ "\n",
+ "Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 85295\n",
+ " 1 0.92 0.79 0.85 148\n",
+ "\n",
+ " accuracy 1.00 85443\n",
+ " macro avg 0.96 0.90 0.93 85443\n",
+ "weighted avg 1.00 1.00 1.00 85443\n",
+ "\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "y_proba = best_model.predict_proba(x_test_feat)[:,1]\n",
+ "\n",
+ "# 門檻掃描\n",
+ "best_f1, best_thresh = 0, 0.5\n",
+ "for t in np.arange(0.1, 0.9, 0.01):\n",
+ " preds = (y_proba > t).astype(int)\n",
+ " f1 = f1_score(y_test, preds)\n",
+ " if f1 > best_f1:\n",
+ " best_f1, best_thresh = f1, t\n",
+ "\n",
+ "print(f\"Best F1: {best_f1:.5f} at threshold: {best_thresh:.2f}\")\n",
+ "\n",
+ "# 使用最佳門檻\n",
+ "y_pred_tuned = (y_proba > best_thresh).astype(int)\n",
+ "evaluation(y_test, y_pred_tuned, model_name=f\"Threshold {best_thresh:.2f}\")"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "FxsNZgP_UmPP",
+ "outputId": "dc613e99-4cc1-472c-f21b-7985c15e3bc1"
+ },
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Best F1: 0.85507 at threshold: 0.42\n",
+ "\n",
+ "Threshold 0.42 Evaluation:\n",
+ "========================================\n",
+ " Accuracy : 0.99953185\n",
+ " Precision Score: 0.92187500\n",
+ " Recall Score : 0.79729730\n",
+ " F1 Score : 0.85507246\n",
+ "\n",
+ "Classification Report:\n",
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 1.00 1.00 1.00 85295\n",
+ " 1 0.92 0.80 0.86 148\n",
+ "\n",
+ " accuracy 1.00 85443\n",
+ " macro avg 0.96 0.90 0.93 85443\n",
+ "weighted avg 1.00 1.00 1.00 85443\n",
+ "\n"
+ ]
+ }
+ ]
+ }
+ ]
+}
diff --git a/ACS111150_ex2/ex2.md b/ACS111150_ex2/ex2.md
new file mode 100644
index 0000000..fbf9c03
--- /dev/null
+++ b/ACS111150_ex2/ex2.md
@@ -0,0 +1,97 @@
+# 挑戰二:融合異常檢測與監督式學習於信用卡詐欺偵測
+
+---
+
+## Isolation Forest
+
+1. 建立 Isolation Forest,僅以正常樣本訓練:
+ ```python
+ iso = IsolationForest(contamination=0.002, random_state=42)
+ iso.fit(x_train[y_train==0])
+ ```
+2. 計算 anomaly score(連續值):
+ ```python
+ anomaly_train = iso.decision_function(x_train).reshape(-1,1)
+ anomaly_test = iso.decision_function(x_test).reshape(-1,1)
+ ```
+3. 合併至特徵矩陣:
+ ```python
+ x_train_feat = np.hstack([x_train, anomaly_train])
+ x_test_feat = np.hstack([x_test, anomaly_test])
+ ```
+
+---
+
+## XGBoost
+
+- **模型**:`XGBClassifier(tree_method='hist', eval_metric='logloss', random_state=42)`
+- **參數網格**:
+ ```yaml
+ n_estimators: [200, 400]
+ max_depth: [5, 8]
+ learning_rate:[0.05, 0.1]
+ scale_pos_weight: [10, 20]
+ ```
+- **交叉驗證**:3 折 (cv=3),以 F1 score 作為搜尋目標
+
+```python
+grid = GridSearchCV(
+ estimator=xgb,
+ param_grid=param_grid,
+ scoring='f1',
+ cv=3,
+ n_jobs=-1,
+ verbose=1
+)
+grid.fit(x_train_feat, y_train)
+best_model = grid.best_estimator_
+print("Best parameters:", grid.best_params_)
+```
+
+### 4.1 預設閾值 (0.5) 評估
+
+```python
+y_pred_default = best_model.predict(x_test_feat)
+evaluation(y_test, y_pred_default, model_name="DefaultThreshold")
+```
+
+```
+DefaultThreshold Evaluation:
+========================================
+Accuracy : 0.99952015
+Precision Score: 0.92125984
+Recall Score : 0.79054054
+F1 Score : 0.85090909
+...
+```
+
+---
+
+## 5. 門檻調整 (Threshold Tuning)
+
+- 掃描閾值範圍 [0.1, 0.9),每 0.01 為一步,選出最佳 F1 分數對應之閾值:
+
+```python
+y_proba = best_model.predict_proba(x_test_feat)[:,1]
+best_f1, best_thresh = 0, 0.5
+for t in np.arange(0.1, 0.9, 0.01):
+ preds = (y_proba > t).astype(int)
+ f1 = f1_score(y_test, preds)
+ if f1 > best_f1:
+ best_f1, best_thresh = f1, t
+print(f"Best F1={best_f1:.5f} at threshold={best_thresh:.2f}")
+y_pred_tuned = (y_proba > best_thresh).astype(int)
+evaluation(y_test, y_pred_tuned, model_name=f"Threshold {best_thresh:.2f}")
+```
+
+```
+Threshold 0.43 Evaluation:
+========================================
+Accuracy : 0.99953185
+Precision Score: 0.92187500
+Recall Score : 0.79729730
+F1 Score : 0.85507246
+...
+```
+
+---