From a63ba2dbc06fd1488d74a34df69cab821c756244 Mon Sep 17 00:00:00 2001 From: Luis <56727115+luistatera@users.noreply.github.com> Date: Tue, 20 May 2025 21:31:05 +0200 Subject: [PATCH 1/4] Refactor code structure for improved readability and maintainability --- .DS_Store | Bin 0 -> 6148 bytes jn-fake-news.ipynb | 383 ++++ predictions.csv | 4957 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 5340 insertions(+) create mode 100644 .DS_Store create mode 100644 jn-fake-news.ipynb create mode 100644 predictions.csv diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmeH~Jr2S!425mzP>H1@V-^m;4Wg<&0T*E43hX&L&p$$qDprKhvt+--jT7}7np#A3 zem<@ulZcFPQ@L2!n>{z**++&mCkOWA81W14cNZlEfg7;MkzE(HCqgga^y>{tEnwC%0;vJ&^%eQ zLs35+`xjp>T0=1.22.4 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (1.26.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (2025.2)\n", + "Requirement already satisfied: six>=1.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: matplotlib in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (3.9.4)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (1.3.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (4.58.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (1.4.7)\n", + "Requirement already satisfied: numpy>=1.23 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (1.26.3)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (24.2)\n", + "Requirement already satisfied: pillow>=8 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (11.2.1)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (3.2.3)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (2.9.0.post0)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (6.5.2)\n", + "Requirement already satisfied: zipp>=3.1.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from importlib-resources>=3.2.0->matplotlib) (3.21.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: seaborn in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (0.13.2)\n", + "Requirement already satisfied: numpy!=1.24.0,>=1.20 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from seaborn) (1.26.3)\n", + "Requirement already satisfied: pandas>=1.2 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from seaborn) (2.2.3)\n", + "Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from seaborn) (3.9.4)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.0)\n", + "Requirement already satisfied: cycler>=0.10 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.58.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.7)\n", + "Requirement already satisfied: packaging>=20.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (24.2)\n", + "Requirement already satisfied: pillow>=8 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (11.2.1)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.3)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n", + "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (6.5.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas>=1.2->seaborn) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas>=1.2->seaborn) (2025.2)\n", + "Requirement already satisfied: zipp>=3.1.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from importlib-resources>=3.2.0->matplotlib!=3.6.1,>=3.4->seaborn) (3.21.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: scikit-learn in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (1.6.1)\n", + "Requirement already satisfied: numpy>=1.19.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (1.26.3)\n", + "Requirement already satisfied: scipy>=1.6.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (1.13.1)\n", + "Requirement already satisfied: joblib>=1.2.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (1.5.0)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (3.6.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install pandas\n", + "import pandas as pd\n", + "import numpy as np\n", + "%pip install matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%pip install seaborn\n", + "import seaborn as sns\n", + "import re\n", + "import string\n", + "\n", + "%pip install scikit-learn\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "9b5d0a83", + "metadata": {}, + "source": [ + "# 3. Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88a6ab38", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2]\n" + ] + } + ], + "source": [ + "df = pd.read_csv(\"dataset/data.csv\")\n", + "val_df = pd.read_csv(\"dataset/validation_data.csv\")\n", + "df.head()\n", + "val_df.head()\n", + "# print(val_df['label'].unique())\n" + ] + }, + { + "cell_type": "markdown", + "id": "6937c16d", + "metadata": {}, + "source": [ + "# Clening up the data" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "b441f8e6", + "metadata": {}, + "outputs": [], + "source": [ + "# remove duplicates\n", + "df = df.drop_duplicates(subset=['text'])\n", + "# remove empty rows\n", + "df = df.dropna(subset=['text'])\n", + "# remove rows with empty labels\n", + "df = df.dropna(subset=['label'])\n", + "# remove rows with empty text\n", + "df = df[df['text'].str.strip() != '']\n", + "# remove rows with empty labels\n", + "df['label'] = df['label'].astype(str)\n", + "df = df[df['label'].str.strip() != '']" + ] + }, + { + "cell_type": "markdown", + "id": "879ec064", + "metadata": {}, + "source": [ + "# 4. Preprocessing" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "9a740ccc", + "metadata": {}, + "outputs": [], + "source": [ + "def clean_text(text):\n", + " text = text.lower()\n", + " text = re.sub(r'\\[.*?\\]', '', text)\n", + " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", + " text = re.sub(r'<.*?>+', '', text)\n", + " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", + " text = re.sub(r'\\n', '', text)\n", + " text = re.sub(r'\\w*\\d\\w*', '', text)\n", + " return text\n", + "\n", + "df['text_clean'] = df['title'] + \" \" + df['text']\n", + "df['text_clean'] = df['text_clean'].apply(clean_text)\n" + ] + }, + { + "cell_type": "markdown", + "id": "b88121d1", + "metadata": {}, + "source": [ + "# 5. Data Exploration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55e16def", + "metadata": {}, + "outputs": [], + "source": [ + "df.info()\n", + "df['label'].value_counts().plot(kind='bar', title='Class Balance')\n" + ] + }, + { + "cell_type": "markdown", + "id": "6f814b30", + "metadata": {}, + "source": [ + "# 6. Train/Test Split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4679c0d5", + "metadata": {}, + "outputs": [], + "source": [ + "X = df['title'] + \" \" + df['text']\n", + "y = df['label']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" + ] + }, + { + "cell_type": "markdown", + "id": "e3c79fc6", + "metadata": {}, + "source": [ + "# 7. Vectorization + Model Training" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "5741a809", + "metadata": {}, + "outputs": [], + "source": [ + "vectorizer = TfidfVectorizer(max_features=5000)\n", + "X_train_vec = vectorizer.fit_transform(X_train)\n", + "X_test_vec = vectorizer.transform(X_test)\n", + "\n", + "model = LogisticRegression()\n", + "model.fit(X_train_vec, y_train)\n", + "\n", + "y_pred = model.predict(X_test_vec)\n" + ] + }, + { + "cell_type": "markdown", + "id": "9706ead4", + "metadata": {}, + "source": [ + "# 8. Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "e5b52270", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.99 0.99 0.99 3996\n", + " 1 0.99 0.99 0.99 3993\n", + "\n", + " accuracy 0.99 7989\n", + " macro avg 0.99 0.99 0.99 7989\n", + "weighted avg 0.99 0.99 0.99 7989\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "print(classification_report(y_test, y_pred))\n", + "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n" + ] + }, + { + "cell_type": "markdown", + "id": "e050b32c", + "metadata": {}, + "source": [ + "# 9. Predict Validation Set" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d4f99cf7", + "metadata": {}, + "outputs": [], + "source": [ + "val_df['text_clean'] = (val_df['title'] + \" \" + val_df['text']).apply(clean_text)\n", + "X_val_vec = vectorizer.transform(val_df['text_clean'])\n", + "val_df['label'] = model.predict(X_val_vec)\n", + "\n", + "# Save predictions\n", + "val_df.reset_index(inplace=True)\n", + "val_df[['index', 'label']].to_csv(\"predictions.csv\", index=False)\n" + ] + }, + { + "cell_type": "markdown", + "id": "a78dd8c6", + "metadata": {}, + "source": [ + "# 10. Final Notes" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "e50e33ea", + "metadata": {}, + "outputs": [], + "source": [ + "# Estimated model accuracy: ~XX%\n", + "# Next steps: Improve with different models, grid search, or embeddings (if time allows).\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/predictions.csv b/predictions.csv new file mode 100644 index 0000000..9328673 --- /dev/null +++ b/predictions.csv @@ -0,0 +1,4957 @@ +index,label +0,1 +1,1 +2,1 +3,1 +4,1 +5,1 +6,1 +7,1 +8,1 +9,1 +10,1 +11,1 +12,1 +13,1 +14,1 +15,1 +16,1 +17,1 +18,1 +19,1 +20,1 +21,1 +22,1 +23,1 +24,1 +25,1 +26,1 +27,1 +28,1 +29,1 +30,1 +31,1 +32,1 +33,1 +34,1 +35,1 +36,1 +37,1 +38,1 +39,1 +40,1 +41,1 +42,1 +43,1 +44,1 +45,1 +46,1 +47,1 +48,1 +49,1 +50,1 +51,1 +52,1 +53,1 +54,1 +55,1 +56,1 +57,1 +58,1 +59,1 +60,1 +61,1 +62,1 +63,1 +64,1 +65,1 +66,1 +67,1 +68,1 +69,1 +70,1 +71,1 +72,1 +73,1 +74,1 +75,1 +76,1 +77,1 +78,1 +79,1 +80,1 +81,1 +82,1 +83,1 +84,1 +85,1 +86,1 +87,1 +88,1 +89,1 +90,1 +91,1 +92,1 +93,1 +94,1 +95,1 +96,1 +97,1 +98,1 +99,1 +100,1 +101,1 +102,1 +103,1 +104,1 +105,1 +106,1 +107,1 +108,1 +109,1 +110,1 +111,1 +112,1 +113,1 +114,1 +115,1 +116,1 +117,1 +118,1 +119,1 +120,1 +121,1 +122,1 +123,1 +124,1 +125,1 +126,1 +127,1 +128,1 +129,1 +130,1 +131,1 +132,1 +133,1 +134,1 +135,1 +136,1 +137,1 +138,1 +139,1 +140,1 +141,1 +142,1 +143,1 +144,1 +145,1 +146,1 +147,1 +148,1 +149,1 +150,1 +151,1 +152,1 +153,1 +154,1 +155,1 +156,1 +157,1 +158,1 +159,1 +160,1 +161,1 +162,1 +163,1 +164,1 +165,1 +166,1 +167,1 +168,1 +169,1 +170,1 +171,1 +172,1 +173,1 +174,1 +175,1 +176,1 +177,1 +178,1 +179,1 +180,1 +181,1 +182,1 +183,1 +184,1 +185,1 +186,1 +187,1 +188,1 +189,1 +190,1 +191,1 +192,1 +193,1 +194,1 +195,1 +196,1 +197,1 +198,1 +199,1 +200,1 +201,1 +202,1 +203,1 +204,1 +205,1 +206,1 +207,1 +208,1 +209,1 +210,1 +211,1 +212,1 +213,1 +214,1 +215,1 +216,1 +217,1 +218,1 +219,1 +220,1 +221,1 +222,1 +223,1 +224,1 +225,1 +226,1 +227,1 +228,1 +229,1 +230,1 +231,1 +232,1 +233,1 +234,1 +235,1 +236,1 +237,1 +238,1 +239,1 +240,0 +241,1 +242,1 +243,1 +244,1 +245,1 +246,1 +247,1 +248,1 +249,1 +250,1 +251,1 +252,1 +253,1 +254,1 +255,1 +256,1 +257,1 +258,1 +259,1 +260,1 +261,1 +262,1 +263,1 +264,1 +265,1 +266,1 +267,1 +268,1 +269,1 +270,1 +271,1 +272,1 +273,1 +274,1 +275,1 +276,1 +277,1 +278,1 +279,1 +280,1 +281,1 +282,1 +283,1 +284,1 +285,1 +286,1 +287,1 +288,1 +289,1 +290,1 +291,1 +292,1 +293,1 +294,1 +295,1 +296,1 +297,1 +298,1 +299,1 +300,1 +301,1 +302,1 +303,1 +304,1 +305,1 +306,1 +307,1 +308,1 +309,1 +310,1 +311,1 +312,1 +313,1 +314,1 +315,1 +316,1 +317,1 +318,1 +319,1 +320,1 +321,1 +322,1 +323,1 +324,1 +325,1 +326,1 +327,1 +328,1 +329,1 +330,1 +331,1 +332,1 +333,1 +334,1 +335,1 +336,1 +337,1 +338,1 +339,1 +340,1 +341,1 +342,1 +343,1 +344,1 +345,1 +346,1 +347,1 +348,1 +349,1 +350,1 +351,1 +352,1 +353,1 +354,1 +355,1 +356,1 +357,1 +358,1 +359,1 +360,1 +361,1 +362,1 +363,1 +364,1 +365,1 +366,1 +367,1 +368,1 +369,1 +370,1 +371,1 +372,1 +373,1 +374,1 +375,1 +376,1 +377,1 +378,1 +379,1 +380,1 +381,1 +382,1 +383,1 +384,1 +385,1 +386,1 +387,1 +388,1 +389,1 +390,1 +391,1 +392,1 +393,1 +394,1 +395,1 +396,1 +397,1 +398,1 +399,1 +400,1 +401,1 +402,1 +403,1 +404,1 +405,1 +406,1 +407,1 +408,1 +409,1 +410,1 +411,1 +412,1 +413,1 +414,1 +415,1 +416,1 +417,1 +418,1 +419,1 +420,1 +421,1 +422,1 +423,1 +424,1 +425,1 +426,1 +427,1 +428,1 +429,1 +430,1 +431,1 +432,0 +433,1 +434,1 +435,1 +436,1 +437,1 +438,1 +439,1 +440,1 +441,1 +442,1 +443,1 +444,1 +445,1 +446,1 +447,1 +448,1 +449,1 +450,1 +451,1 +452,1 +453,1 +454,1 +455,1 +456,1 +457,1 +458,1 +459,1 +460,1 +461,1 +462,1 +463,1 +464,1 +465,1 +466,1 +467,1 +468,1 +469,1 +470,1 +471,1 +472,1 +473,1 +474,1 +475,1 +476,1 +477,1 +478,1 +479,1 +480,1 +481,1 +482,1 +483,1 +484,1 +485,1 +486,1 +487,1 +488,1 +489,1 +490,1 +491,1 +492,1 +493,1 +494,1 +495,1 +496,1 +497,1 +498,1 +499,1 +500,1 +501,1 +502,1 +503,1 +504,1 +505,1 +506,1 +507,1 +508,1 +509,1 +510,1 +511,1 +512,1 +513,1 +514,1 +515,1 +516,1 +517,0 +518,1 +519,1 +520,1 +521,1 +522,1 +523,1 +524,1 +525,0 +526,1 +527,1 +528,1 +529,1 +530,1 +531,1 +532,1 +533,1 +534,1 +535,1 +536,1 +537,1 +538,1 +539,1 +540,1 +541,1 +542,1 +543,1 +544,1 +545,1 +546,1 +547,1 +548,1 +549,1 +550,1 +551,1 +552,1 +553,1 +554,1 +555,1 +556,1 +557,1 +558,1 +559,1 +560,1 +561,1 +562,1 +563,1 +564,1 +565,1 +566,1 +567,1 +568,1 +569,1 +570,1 +571,1 +572,1 +573,1 +574,1 +575,1 +576,1 +577,1 +578,1 +579,1 +580,1 +581,1 +582,1 +583,1 +584,1 +585,1 +586,1 +587,0 +588,1 +589,1 +590,1 +591,1 +592,1 +593,1 +594,1 +595,1 +596,1 +597,1 +598,1 +599,1 +600,1 +601,1 +602,1 +603,1 +604,1 +605,1 +606,1 +607,1 +608,1 +609,1 +610,1 +611,1 +612,1 +613,1 +614,1 +615,1 +616,1 +617,1 +618,1 +619,1 +620,1 +621,1 +622,1 +623,1 +624,1 +625,1 +626,1 +627,1 +628,1 +629,1 +630,1 +631,1 +632,1 +633,1 +634,1 +635,1 +636,1 +637,1 +638,1 +639,1 +640,1 +641,1 +642,1 +643,1 +644,1 +645,1 +646,1 +647,1 +648,1 +649,1 +650,1 +651,1 +652,1 +653,1 +654,1 +655,1 +656,1 +657,1 +658,1 +659,1 +660,1 +661,1 +662,1 +663,1 +664,1 +665,1 +666,1 +667,1 +668,1 +669,1 +670,1 +671,1 +672,1 +673,1 +674,1 +675,1 +676,1 +677,1 +678,1 +679,1 +680,1 +681,1 +682,1 +683,1 +684,1 +685,1 +686,1 +687,1 +688,1 +689,1 +690,1 +691,1 +692,1 +693,1 +694,1 +695,1 +696,1 +697,1 +698,1 +699,1 +700,1 +701,1 +702,1 +703,1 +704,1 +705,1 +706,1 +707,1 +708,1 +709,1 +710,1 +711,1 +712,1 +713,1 +714,1 +715,1 +716,1 +717,1 +718,1 +719,1 +720,1 +721,1 +722,1 +723,1 +724,1 +725,1 +726,1 +727,1 +728,1 +729,1 +730,1 +731,1 +732,1 +733,1 +734,1 +735,1 +736,1 +737,1 +738,1 +739,1 +740,1 +741,1 +742,1 +743,1 +744,1 +745,1 +746,1 +747,1 +748,1 +749,1 +750,1 +751,1 +752,1 +753,1 +754,1 +755,1 +756,1 +757,1 +758,1 +759,1 +760,1 +761,1 +762,1 +763,1 +764,1 +765,1 +766,1 +767,1 +768,1 +769,1 +770,1 +771,1 +772,1 +773,1 +774,1 +775,1 +776,1 +777,1 +778,1 +779,1 +780,1 +781,1 +782,1 +783,1 +784,1 +785,1 +786,1 +787,1 +788,1 +789,1 +790,1 +791,1 +792,1 +793,1 +794,1 +795,1 +796,1 +797,1 +798,1 +799,1 +800,1 +801,1 +802,1 +803,1 +804,1 +805,1 +806,1 +807,1 +808,1 +809,1 +810,1 +811,1 +812,1 +813,1 +814,1 +815,1 +816,1 +817,1 +818,1 +819,1 +820,1 +821,1 +822,1 +823,1 +824,1 +825,1 +826,1 +827,1 +828,1 +829,1 +830,1 +831,1 +832,1 +833,1 +834,1 +835,1 +836,1 +837,1 +838,1 +839,1 +840,1 +841,1 +842,1 +843,1 +844,1 +845,1 +846,1 +847,1 +848,1 +849,1 +850,1 +851,1 +852,1 +853,1 +854,1 +855,1 +856,1 +857,1 +858,1 +859,1 +860,1 +861,1 +862,1 +863,1 +864,1 +865,1 +866,1 +867,1 +868,1 +869,1 +870,1 +871,1 +872,1 +873,1 +874,1 +875,1 +876,1 +877,1 +878,1 +879,1 +880,1 +881,1 +882,1 +883,1 +884,1 +885,1 +886,1 +887,1 +888,1 +889,1 +890,1 +891,1 +892,1 +893,1 +894,1 +895,1 +896,1 +897,1 +898,1 +899,1 +900,1 +901,1 +902,1 +903,1 +904,1 +905,1 +906,1 +907,1 +908,1 +909,1 +910,1 +911,1 +912,1 +913,1 +914,1 +915,1 +916,1 +917,1 +918,1 +919,1 +920,1 +921,1 +922,1 +923,1 +924,1 +925,1 +926,1 +927,1 +928,1 +929,1 +930,1 +931,1 +932,1 +933,1 +934,1 +935,1 +936,1 +937,1 +938,1 +939,1 +940,1 +941,1 +942,1 +943,1 +944,1 +945,1 +946,1 +947,1 +948,1 +949,1 +950,1 +951,1 +952,1 +953,1 +954,1 +955,1 +956,1 +957,1 +958,1 +959,1 +960,1 +961,1 +962,1 +963,1 +964,1 +965,1 +966,1 +967,1 +968,1 +969,1 +970,1 +971,1 +972,1 +973,1 +974,1 +975,1 +976,1 +977,1 +978,1 +979,1 +980,1 +981,1 +982,1 +983,1 +984,1 +985,1 +986,1 +987,1 +988,1 +989,1 +990,1 +991,1 +992,1 +993,1 +994,1 +995,1 +996,1 +997,1 +998,1 +999,1 +1000,1 +1001,1 +1002,1 +1003,1 +1004,1 +1005,1 +1006,1 +1007,1 +1008,1 +1009,1 +1010,1 +1011,1 +1012,1 +1013,1 +1014,1 +1015,1 +1016,1 +1017,1 +1018,1 +1019,1 +1020,1 +1021,1 +1022,1 +1023,1 +1024,1 +1025,1 +1026,1 +1027,1 +1028,1 +1029,1 +1030,1 +1031,1 +1032,1 +1033,1 +1034,1 +1035,1 +1036,1 +1037,1 +1038,1 +1039,1 +1040,1 +1041,1 +1042,1 +1043,1 +1044,1 +1045,1 +1046,1 +1047,1 +1048,1 +1049,1 +1050,1 +1051,1 +1052,1 +1053,1 +1054,1 +1055,1 +1056,1 +1057,1 +1058,1 +1059,1 +1060,1 +1061,1 +1062,1 +1063,1 +1064,1 +1065,1 +1066,1 +1067,1 +1068,1 +1069,1 +1070,1 +1071,1 +1072,1 +1073,1 +1074,1 +1075,1 +1076,1 +1077,0 +1078,1 +1079,1 +1080,1 +1081,1 +1082,1 +1083,1 +1084,1 +1085,1 +1086,1 +1087,1 +1088,1 +1089,1 +1090,1 +1091,1 +1092,1 +1093,1 +1094,1 +1095,1 +1096,1 +1097,1 +1098,1 +1099,1 +1100,1 +1101,1 +1102,1 +1103,1 +1104,1 +1105,1 +1106,1 +1107,1 +1108,1 +1109,1 +1110,1 +1111,1 +1112,1 +1113,1 +1114,1 +1115,1 +1116,1 +1117,1 +1118,1 +1119,1 +1120,1 +1121,1 +1122,1 +1123,1 +1124,1 +1125,1 +1126,1 +1127,1 +1128,1 +1129,1 +1130,1 +1131,1 +1132,1 +1133,1 +1134,1 +1135,1 +1136,1 +1137,1 +1138,1 +1139,1 +1140,1 +1141,1 +1142,1 +1143,0 +1144,1 +1145,1 +1146,1 +1147,1 +1148,1 +1149,1 +1150,1 +1151,1 +1152,1 +1153,1 +1154,1 +1155,1 +1156,1 +1157,1 +1158,1 +1159,1 +1160,1 +1161,1 +1162,1 +1163,1 +1164,1 +1165,1 +1166,1 +1167,1 +1168,1 +1169,1 +1170,1 +1171,1 +1172,1 +1173,1 +1174,1 +1175,1 +1176,1 +1177,1 +1178,1 +1179,1 +1180,1 +1181,1 +1182,1 +1183,1 +1184,1 +1185,1 +1186,1 +1187,1 +1188,1 +1189,1 +1190,1 +1191,1 +1192,1 +1193,1 +1194,1 +1195,1 +1196,1 +1197,1 +1198,1 +1199,1 +1200,1 +1201,1 +1202,1 +1203,1 +1204,1 +1205,1 +1206,1 +1207,1 +1208,1 +1209,1 +1210,1 +1211,1 +1212,1 +1213,1 +1214,1 +1215,1 +1216,1 +1217,1 +1218,1 +1219,1 +1220,1 +1221,1 +1222,1 +1223,1 +1224,1 +1225,1 +1226,1 +1227,1 +1228,1 +1229,1 +1230,1 +1231,1 +1232,1 +1233,1 +1234,1 +1235,1 +1236,1 +1237,1 +1238,1 +1239,1 +1240,1 +1241,1 +1242,1 +1243,1 +1244,1 +1245,1 +1246,1 +1247,1 +1248,1 +1249,1 +1250,1 +1251,1 +1252,1 +1253,1 +1254,1 +1255,1 +1256,1 +1257,1 +1258,1 +1259,1 +1260,1 +1261,1 +1262,1 +1263,1 +1264,1 +1265,1 +1266,1 +1267,1 +1268,1 +1269,1 +1270,1 +1271,1 +1272,1 +1273,1 +1274,1 +1275,1 +1276,1 +1277,1 +1278,1 +1279,1 +1280,1 +1281,1 +1282,1 +1283,1 +1284,1 +1285,1 +1286,1 +1287,1 +1288,1 +1289,1 +1290,1 +1291,1 +1292,1 +1293,1 +1294,1 +1295,1 +1296,1 +1297,1 +1298,1 +1299,1 +1300,1 +1301,1 +1302,1 +1303,1 +1304,1 +1305,1 +1306,1 +1307,1 +1308,1 +1309,1 +1310,1 +1311,1 +1312,1 +1313,1 +1314,1 +1315,1 +1316,1 +1317,1 +1318,1 +1319,1 +1320,1 +1321,1 +1322,1 +1323,1 +1324,1 +1325,1 +1326,1 +1327,1 +1328,1 +1329,1 +1330,1 +1331,1 +1332,1 +1333,1 +1334,1 +1335,1 +1336,1 +1337,1 +1338,1 +1339,1 +1340,1 +1341,1 +1342,1 +1343,0 +1344,1 +1345,1 +1346,1 +1347,1 +1348,1 +1349,1 +1350,1 +1351,1 +1352,1 +1353,1 +1354,1 +1355,1 +1356,1 +1357,1 +1358,1 +1359,1 +1360,1 +1361,1 +1362,1 +1363,1 +1364,1 +1365,1 +1366,1 +1367,1 +1368,1 +1369,1 +1370,1 +1371,1 +1372,1 +1373,1 +1374,1 +1375,1 +1376,1 +1377,1 +1378,1 +1379,1 +1380,1 +1381,1 +1382,1 +1383,1 +1384,1 +1385,1 +1386,1 +1387,1 +1388,1 +1389,1 +1390,1 +1391,1 +1392,1 +1393,1 +1394,1 +1395,1 +1396,1 +1397,1 +1398,1 +1399,1 +1400,1 +1401,1 +1402,1 +1403,1 +1404,1 +1405,1 +1406,1 +1407,1 +1408,1 +1409,1 +1410,1 +1411,1 +1412,1 +1413,1 +1414,1 +1415,1 +1416,1 +1417,1 +1418,0 +1419,0 +1420,0 +1421,0 +1422,0 +1423,0 +1424,0 +1425,0 +1426,0 +1427,0 +1428,0 +1429,0 +1430,0 +1431,0 +1432,0 +1433,0 +1434,0 +1435,0 +1436,0 +1437,0 +1438,0 +1439,0 +1440,0 +1441,0 +1442,0 +1443,0 +1444,0 +1445,0 +1446,0 +1447,0 +1448,0 +1449,0 +1450,0 +1451,0 +1452,0 +1453,0 +1454,0 +1455,0 +1456,0 +1457,0 +1458,0 +1459,0 +1460,0 +1461,0 +1462,0 +1463,0 +1464,0 +1465,0 +1466,0 +1467,0 +1468,0 +1469,0 +1470,0 +1471,0 +1472,0 +1473,0 +1474,0 +1475,0 +1476,0 +1477,0 +1478,0 +1479,0 +1480,0 +1481,0 +1482,0 +1483,0 +1484,0 +1485,0 +1486,0 +1487,0 +1488,0 +1489,0 +1490,0 +1491,0 +1492,0 +1493,0 +1494,0 +1495,0 +1496,0 +1497,0 +1498,0 +1499,0 +1500,0 +1501,0 +1502,0 +1503,0 +1504,0 +1505,0 +1506,0 +1507,0 +1508,0 +1509,0 +1510,0 +1511,0 +1512,0 +1513,0 +1514,0 +1515,0 +1516,0 +1517,0 +1518,0 +1519,0 +1520,0 +1521,0 +1522,0 +1523,1 +1524,0 +1525,0 +1526,0 +1527,0 +1528,0 +1529,0 +1530,0 +1531,0 +1532,0 +1533,0 +1534,0 +1535,0 +1536,0 +1537,0 +1538,0 +1539,0 +1540,0 +1541,0 +1542,0 +1543,0 +1544,0 +1545,0 +1546,0 +1547,0 +1548,0 +1549,0 +1550,0 +1551,0 +1552,0 +1553,0 +1554,0 +1555,0 +1556,0 +1557,0 +1558,0 +1559,0 +1560,0 +1561,0 +1562,0 +1563,0 +1564,0 +1565,0 +1566,0 +1567,0 +1568,0 +1569,0 +1570,0 +1571,0 +1572,0 +1573,0 +1574,0 +1575,0 +1576,0 +1577,0 +1578,1 +1579,0 +1580,0 +1581,0 +1582,0 +1583,0 +1584,0 +1585,0 +1586,0 +1587,0 +1588,0 +1589,0 +1590,0 +1591,0 +1592,0 +1593,0 +1594,0 +1595,0 +1596,0 +1597,0 +1598,0 +1599,0 +1600,0 +1601,0 +1602,0 +1603,0 +1604,0 +1605,0 +1606,0 +1607,0 +1608,0 +1609,0 +1610,0 +1611,0 +1612,0 +1613,0 +1614,0 +1615,0 +1616,0 +1617,0 +1618,0 +1619,0 +1620,0 +1621,0 +1622,0 +1623,0 +1624,0 +1625,0 +1626,0 +1627,0 +1628,0 +1629,0 +1630,0 +1631,0 +1632,0 +1633,0 +1634,0 +1635,0 +1636,0 +1637,0 +1638,0 +1639,0 +1640,0 +1641,0 +1642,0 +1643,0 +1644,0 +1645,0 +1646,0 +1647,0 +1648,0 +1649,0 +1650,0 +1651,0 +1652,0 +1653,0 +1654,0 +1655,0 +1656,0 +1657,0 +1658,0 +1659,0 +1660,0 +1661,0 +1662,0 +1663,0 +1664,0 +1665,0 +1666,0 +1667,0 +1668,0 +1669,0 +1670,0 +1671,0 +1672,0 +1673,0 +1674,0 +1675,0 +1676,0 +1677,0 +1678,0 +1679,0 +1680,0 +1681,0 +1682,0 +1683,0 +1684,0 +1685,0 +1686,0 +1687,0 +1688,0 +1689,0 +1690,0 +1691,0 +1692,0 +1693,0 +1694,0 +1695,0 +1696,0 +1697,0 +1698,0 +1699,0 +1700,0 +1701,0 +1702,0 +1703,0 +1704,0 +1705,0 +1706,0 +1707,0 +1708,0 +1709,1 +1710,0 +1711,0 +1712,0 +1713,0 +1714,0 +1715,0 +1716,0 +1717,0 +1718,0 +1719,0 +1720,0 +1721,0 +1722,0 +1723,0 +1724,0 +1725,0 +1726,0 +1727,0 +1728,0 +1729,0 +1730,0 +1731,0 +1732,0 +1733,0 +1734,0 +1735,0 +1736,0 +1737,0 +1738,0 +1739,0 +1740,0 +1741,0 +1742,0 +1743,0 +1744,0 +1745,0 +1746,0 +1747,0 +1748,0 +1749,0 +1750,0 +1751,0 +1752,0 +1753,0 +1754,0 +1755,0 +1756,0 +1757,0 +1758,0 +1759,0 +1760,0 +1761,0 +1762,0 +1763,0 +1764,0 +1765,0 +1766,0 +1767,0 +1768,0 +1769,0 +1770,0 +1771,0 +1772,0 +1773,0 +1774,0 +1775,0 +1776,0 +1777,0 +1778,0 +1779,0 +1780,0 +1781,0 +1782,0 +1783,0 +1784,0 +1785,0 +1786,0 +1787,0 +1788,0 +1789,0 +1790,0 +1791,0 +1792,0 +1793,0 +1794,0 +1795,0 +1796,0 +1797,0 +1798,0 +1799,0 +1800,0 +1801,0 +1802,0 +1803,0 +1804,0 +1805,0 +1806,0 +1807,0 +1808,0 +1809,0 +1810,0 +1811,0 +1812,0 +1813,0 +1814,0 +1815,0 +1816,0 +1817,0 +1818,0 +1819,0 +1820,0 +1821,0 +1822,0 +1823,0 +1824,0 +1825,0 +1826,0 +1827,0 +1828,0 +1829,0 +1830,0 +1831,0 +1832,0 +1833,0 +1834,0 +1835,0 +1836,0 +1837,0 +1838,0 +1839,0 +1840,0 +1841,0 +1842,0 +1843,1 +1844,1 +1845,0 +1846,0 +1847,0 +1848,0 +1849,0 +1850,0 +1851,0 +1852,0 +1853,0 +1854,0 +1855,0 +1856,0 +1857,0 +1858,0 +1859,0 +1860,0 +1861,0 +1862,0 +1863,0 +1864,0 +1865,0 +1866,0 +1867,0 +1868,0 +1869,0 +1870,0 +1871,0 +1872,0 +1873,0 +1874,0 +1875,0 +1876,0 +1877,0 +1878,0 +1879,1 +1880,0 +1881,0 +1882,0 +1883,0 +1884,0 +1885,0 +1886,0 +1887,0 +1888,0 +1889,0 +1890,0 +1891,0 +1892,0 +1893,0 +1894,0 +1895,0 +1896,0 +1897,0 +1898,0 +1899,0 +1900,0 +1901,0 +1902,0 +1903,0 +1904,0 +1905,1 +1906,0 +1907,0 +1908,1 +1909,0 +1910,0 +1911,0 +1912,0 +1913,0 +1914,0 +1915,0 +1916,0 +1917,0 +1918,0 +1919,0 +1920,0 +1921,0 +1922,0 +1923,0 +1924,0 +1925,0 +1926,0 +1927,0 +1928,0 +1929,0 +1930,0 +1931,0 +1932,0 +1933,0 +1934,0 +1935,0 +1936,0 +1937,0 +1938,0 +1939,0 +1940,0 +1941,0 +1942,0 +1943,0 +1944,0 +1945,0 +1946,0 +1947,0 +1948,0 +1949,0 +1950,0 +1951,0 +1952,0 +1953,0 +1954,0 +1955,0 +1956,0 +1957,0 +1958,0 +1959,0 +1960,0 +1961,0 +1962,0 +1963,0 +1964,0 +1965,0 +1966,0 +1967,0 +1968,0 +1969,0 +1970,0 +1971,0 +1972,0 +1973,0 +1974,0 +1975,0 +1976,0 +1977,0 +1978,0 +1979,0 +1980,0 +1981,0 +1982,0 +1983,0 +1984,0 +1985,0 +1986,0 +1987,0 +1988,0 +1989,0 +1990,0 +1991,0 +1992,0 +1993,0 +1994,0 +1995,0 +1996,0 +1997,0 +1998,0 +1999,0 +2000,0 +2001,0 +2002,0 +2003,0 +2004,0 +2005,0 +2006,0 +2007,0 +2008,0 +2009,0 +2010,0 +2011,0 +2012,0 +2013,0 +2014,0 +2015,0 +2016,0 +2017,0 +2018,0 +2019,0 +2020,0 +2021,0 +2022,0 +2023,0 +2024,0 +2025,0 +2026,0 +2027,0 +2028,0 +2029,0 +2030,0 +2031,0 +2032,0 +2033,0 +2034,0 +2035,0 +2036,0 +2037,0 +2038,0 +2039,0 +2040,0 +2041,0 +2042,0 +2043,0 +2044,0 +2045,0 +2046,0 +2047,0 +2048,0 +2049,0 +2050,0 +2051,0 +2052,0 +2053,0 +2054,0 +2055,0 +2056,0 +2057,0 +2058,0 +2059,0 +2060,0 +2061,0 +2062,0 +2063,0 +2064,0 +2065,0 +2066,0 +2067,0 +2068,0 +2069,0 +2070,0 +2071,0 +2072,0 +2073,0 +2074,0 +2075,0 +2076,0 +2077,0 +2078,0 +2079,0 +2080,0 +2081,0 +2082,0 +2083,0 +2084,0 +2085,0 +2086,0 +2087,0 +2088,0 +2089,0 +2090,0 +2091,0 +2092,0 +2093,0 +2094,0 +2095,0 +2096,0 +2097,0 +2098,0 +2099,0 +2100,0 +2101,0 +2102,0 +2103,0 +2104,0 +2105,0 +2106,0 +2107,0 +2108,0 +2109,0 +2110,0 +2111,0 +2112,0 +2113,0 +2114,0 +2115,0 +2116,0 +2117,0 +2118,0 +2119,0 +2120,0 +2121,0 +2122,0 +2123,0 +2124,0 +2125,0 +2126,0 +2127,0 +2128,0 +2129,0 +2130,0 +2131,0 +2132,0 +2133,0 +2134,0 +2135,0 +2136,0 +2137,0 +2138,0 +2139,0 +2140,0 +2141,0 +2142,0 +2143,0 +2144,0 +2145,0 +2146,0 +2147,0 +2148,0 +2149,0 +2150,0 +2151,0 +2152,0 +2153,0 +2154,0 +2155,0 +2156,0 +2157,0 +2158,0 +2159,0 +2160,0 +2161,0 +2162,0 +2163,0 +2164,0 +2165,1 +2166,0 +2167,0 +2168,0 +2169,0 +2170,0 +2171,0 +2172,0 +2173,0 +2174,0 +2175,0 +2176,0 +2177,0 +2178,0 +2179,0 +2180,0 +2181,0 +2182,0 +2183,0 +2184,0 +2185,0 +2186,0 +2187,0 +2188,0 +2189,0 +2190,0 +2191,0 +2192,0 +2193,0 +2194,0 +2195,0 +2196,0 +2197,0 +2198,0 +2199,0 +2200,0 +2201,0 +2202,0 +2203,0 +2204,0 +2205,1 +2206,0 +2207,0 +2208,0 +2209,0 +2210,0 +2211,0 +2212,0 +2213,0 +2214,0 +2215,0 +2216,0 +2217,0 +2218,0 +2219,0 +2220,0 +2221,0 +2222,0 +2223,0 +2224,0 +2225,0 +2226,0 +2227,0 +2228,0 +2229,0 +2230,0 +2231,0 +2232,0 +2233,0 +2234,0 +2235,0 +2236,0 +2237,0 +2238,0 +2239,0 +2240,0 +2241,0 +2242,0 +2243,0 +2244,0 +2245,0 +2246,0 +2247,0 +2248,0 +2249,0 +2250,0 +2251,1 +2252,0 +2253,0 +2254,0 +2255,0 +2256,0 +2257,0 +2258,0 +2259,0 +2260,0 +2261,0 +2262,0 +2263,0 +2264,0 +2265,0 +2266,0 +2267,0 +2268,0 +2269,0 +2270,0 +2271,0 +2272,0 +2273,0 +2274,0 +2275,0 +2276,0 +2277,0 +2278,0 +2279,0 +2280,0 +2281,0 +2282,0 +2283,0 +2284,0 +2285,0 +2286,0 +2287,0 +2288,0 +2289,0 +2290,0 +2291,0 +2292,0 +2293,0 +2294,0 +2295,0 +2296,0 +2297,0 +2298,0 +2299,0 +2300,0 +2301,0 +2302,0 +2303,0 +2304,0 +2305,0 +2306,0 +2307,0 +2308,0 +2309,0 +2310,0 +2311,0 +2312,0 +2313,0 +2314,0 +2315,0 +2316,0 +2317,0 +2318,0 +2319,0 +2320,0 +2321,0 +2322,0 +2323,0 +2324,0 +2325,0 +2326,0 +2327,0 +2328,0 +2329,0 +2330,0 +2331,0 +2332,0 +2333,0 +2334,1 +2335,0 +2336,0 +2337,0 +2338,0 +2339,0 +2340,0 +2341,0 +2342,0 +2343,0 +2344,0 +2345,0 +2346,0 +2347,0 +2348,0 +2349,0 +2350,0 +2351,0 +2352,0 +2353,0 +2354,0 +2355,1 +2356,0 +2357,0 +2358,0 +2359,0 +2360,0 +2361,0 +2362,0 +2363,0 +2364,0 +2365,0 +2366,0 +2367,0 +2368,0 +2369,0 +2370,0 +2371,0 +2372,0 +2373,0 +2374,0 +2375,0 +2376,0 +2377,0 +2378,0 +2379,0 +2380,0 +2381,0 +2382,0 +2383,0 +2384,0 +2385,0 +2386,0 +2387,0 +2388,0 +2389,0 +2390,0 +2391,0 +2392,0 +2393,0 +2394,0 +2395,0 +2396,0 +2397,0 +2398,0 +2399,0 +2400,0 +2401,0 +2402,0 +2403,0 +2404,0 +2405,0 +2406,0 +2407,0 +2408,0 +2409,0 +2410,0 +2411,0 +2412,0 +2413,0 +2414,0 +2415,0 +2416,0 +2417,0 +2418,0 +2419,0 +2420,0 +2421,0 +2422,0 +2423,1 +2424,0 +2425,0 +2426,0 +2427,0 +2428,0 +2429,0 +2430,0 +2431,0 +2432,0 +2433,0 +2434,0 +2435,0 +2436,0 +2437,1 +2438,0 +2439,0 +2440,0 +2441,0 +2442,0 +2443,0 +2444,0 +2445,0 +2446,0 +2447,0 +2448,0 +2449,0 +2450,0 +2451,0 +2452,0 +2453,0 +2454,0 +2455,0 +2456,0 +2457,0 +2458,0 +2459,0 +2460,0 +2461,0 +2462,0 +2463,0 +2464,0 +2465,0 +2466,0 +2467,0 +2468,0 +2469,0 +2470,0 +2471,0 +2472,0 +2473,0 +2474,0 +2475,0 +2476,0 +2477,1 +2478,0 +2479,0 +2480,0 +2481,0 +2482,0 +2483,0 +2484,0 +2485,0 +2486,0 +2487,0 +2488,0 +2489,0 +2490,0 +2491,0 +2492,0 +2493,0 +2494,0 +2495,0 +2496,0 +2497,0 +2498,0 +2499,0 +2500,0 +2501,0 +2502,0 +2503,0 +2504,0 +2505,0 +2506,0 +2507,0 +2508,1 +2509,0 +2510,0 +2511,0 +2512,0 +2513,0 +2514,0 +2515,0 +2516,0 +2517,0 +2518,0 +2519,0 +2520,0 +2521,0 +2522,0 +2523,0 +2524,0 +2525,0 +2526,0 +2527,0 +2528,0 +2529,0 +2530,0 +2531,0 +2532,0 +2533,0 +2534,0 +2535,0 +2536,0 +2537,0 +2538,0 +2539,0 +2540,0 +2541,0 +2542,0 +2543,0 +2544,0 +2545,0 +2546,0 +2547,0 +2548,0 +2549,0 +2550,0 +2551,0 +2552,0 +2553,0 +2554,0 +2555,0 +2556,0 +2557,0 +2558,0 +2559,0 +2560,0 +2561,0 +2562,0 +2563,0 +2564,0 +2565,0 +2566,0 +2567,0 +2568,0 +2569,0 +2570,0 +2571,0 +2572,0 +2573,0 +2574,0 +2575,0 +2576,0 +2577,0 +2578,0 +2579,0 +2580,0 +2581,0 +2582,0 +2583,0 +2584,0 +2585,0 +2586,0 +2587,0 +2588,0 +2589,0 +2590,0 +2591,0 +2592,0 +2593,0 +2594,0 +2595,1 +2596,0 +2597,0 +2598,1 +2599,0 +2600,0 +2601,0 +2602,0 +2603,0 +2604,0 +2605,0 +2606,0 +2607,0 +2608,0 +2609,0 +2610,0 +2611,0 +2612,0 +2613,0 +2614,0 +2615,0 +2616,0 +2617,0 +2618,0 +2619,0 +2620,0 +2621,0 +2622,0 +2623,0 +2624,0 +2625,0 +2626,0 +2627,0 +2628,0 +2629,0 +2630,0 +2631,1 +2632,0 +2633,0 +2634,0 +2635,0 +2636,1 +2637,0 +2638,0 +2639,0 +2640,0 +2641,0 +2642,0 +2643,0 +2644,0 +2645,0 +2646,0 +2647,0 +2648,0 +2649,1 +2650,0 +2651,0 +2652,0 +2653,0 +2654,0 +2655,0 +2656,0 +2657,0 +2658,0 +2659,0 +2660,0 +2661,0 +2662,0 +2663,1 +2664,0 +2665,0 +2666,0 +2667,0 +2668,0 +2669,0 +2670,0 +2671,0 +2672,0 +2673,0 +2674,0 +2675,0 +2676,0 +2677,0 +2678,0 +2679,0 +2680,0 +2681,0 +2682,0 +2683,0 +2684,0 +2685,0 +2686,0 +2687,0 +2688,0 +2689,0 +2690,0 +2691,0 +2692,0 +2693,0 +2694,0 +2695,0 +2696,0 +2697,0 +2698,0 +2699,0 +2700,0 +2701,0 +2702,0 +2703,0 +2704,0 +2705,0 +2706,0 +2707,0 +2708,0 +2709,0 +2710,0 +2711,0 +2712,0 +2713,0 +2714,0 +2715,0 +2716,0 +2717,0 +2718,0 +2719,0 +2720,0 +2721,0 +2722,0 +2723,0 +2724,0 +2725,0 +2726,0 +2727,0 +2728,0 +2729,0 +2730,0 +2731,0 +2732,0 +2733,0 +2734,0 +2735,0 +2736,0 +2737,0 +2738,0 +2739,0 +2740,0 +2741,0 +2742,0 +2743,0 +2744,0 +2745,0 +2746,0 +2747,0 +2748,0 +2749,0 +2750,0 +2751,0 +2752,0 +2753,0 +2754,0 +2755,0 +2756,0 +2757,0 +2758,0 +2759,0 +2760,0 +2761,0 +2762,0 +2763,0 +2764,0 +2765,0 +2766,0 +2767,0 +2768,0 +2769,0 +2770,0 +2771,0 +2772,0 +2773,0 +2774,0 +2775,0 +2776,0 +2777,0 +2778,0 +2779,0 +2780,0 +2781,0 +2782,1 +2783,0 +2784,0 +2785,0 +2786,1 +2787,0 +2788,0 +2789,0 +2790,0 +2791,0 +2792,0 +2793,1 +2794,0 +2795,0 +2796,0 +2797,0 +2798,0 +2799,0 +2800,0 +2801,0 +2802,0 +2803,0 +2804,0 +2805,0 +2806,0 +2807,0 +2808,0 +2809,0 +2810,0 +2811,0 +2812,0 +2813,0 +2814,0 +2815,0 +2816,0 +2817,0 +2818,0 +2819,0 +2820,0 +2821,0 +2822,0 +2823,0 +2824,0 +2825,0 +2826,1 +2827,0 +2828,0 +2829,0 +2830,0 +2831,0 +2832,1 +2833,0 +2834,0 +2835,0 +2836,0 +2837,0 +2838,0 +2839,0 +2840,0 +2841,0 +2842,0 +2843,0 +2844,0 +2845,0 +2846,0 +2847,0 +2848,0 +2849,0 +2850,0 +2851,0 +2852,0 +2853,0 +2854,0 +2855,0 +2856,0 +2857,0 +2858,0 +2859,0 +2860,0 +2861,0 +2862,0 +2863,0 +2864,0 +2865,0 +2866,0 +2867,0 +2868,0 +2869,0 +2870,0 +2871,1 +2872,0 +2873,0 +2874,0 +2875,0 +2876,0 +2877,0 +2878,0 +2879,0 +2880,0 +2881,0 +2882,0 +2883,0 +2884,0 +2885,0 +2886,0 +2887,0 +2888,0 +2889,0 +2890,0 +2891,0 +2892,0 +2893,0 +2894,0 +2895,0 +2896,0 +2897,0 +2898,0 +2899,0 +2900,0 +2901,0 +2902,0 +2903,0 +2904,0 +2905,0 +2906,0 +2907,0 +2908,0 +2909,0 +2910,0 +2911,0 +2912,0 +2913,0 +2914,0 +2915,0 +2916,0 +2917,0 +2918,0 +2919,0 +2920,1 +2921,0 +2922,0 +2923,0 +2924,0 +2925,0 +2926,0 +2927,0 +2928,0 +2929,0 +2930,0 +2931,0 +2932,0 +2933,0 +2934,0 +2935,0 +2936,0 +2937,0 +2938,0 +2939,0 +2940,0 +2941,0 +2942,0 +2943,0 +2944,0 +2945,0 +2946,0 +2947,0 +2948,0 +2949,0 +2950,0 +2951,0 +2952,0 +2953,0 +2954,0 +2955,0 +2956,0 +2957,0 +2958,0 +2959,0 +2960,0 +2961,0 +2962,0 +2963,0 +2964,0 +2965,0 +2966,0 +2967,0 +2968,0 +2969,0 +2970,0 +2971,0 +2972,0 +2973,0 +2974,0 +2975,0 +2976,0 +2977,0 +2978,0 +2979,0 +2980,0 +2981,0 +2982,0 +2983,0 +2984,0 +2985,0 +2986,0 +2987,0 +2988,0 +2989,0 +2990,0 +2991,0 +2992,0 +2993,0 +2994,0 +2995,0 +2996,0 +2997,0 +2998,0 +2999,0 +3000,0 +3001,0 +3002,0 +3003,0 +3004,0 +3005,0 +3006,0 +3007,0 +3008,0 +3009,0 +3010,0 +3011,0 +3012,0 +3013,1 +3014,0 +3015,0 +3016,0 +3017,0 +3018,0 +3019,0 +3020,0 +3021,0 +3022,0 +3023,0 +3024,0 +3025,0 +3026,0 +3027,0 +3028,0 +3029,0 +3030,0 +3031,0 +3032,0 +3033,0 +3034,0 +3035,0 +3036,0 +3037,0 +3038,0 +3039,0 +3040,0 +3041,0 +3042,0 +3043,0 +3044,0 +3045,0 +3046,0 +3047,0 +3048,0 +3049,0 +3050,0 +3051,0 +3052,0 +3053,0 +3054,0 +3055,0 +3056,0 +3057,0 +3058,0 +3059,0 +3060,1 +3061,1 +3062,0 +3063,0 +3064,0 +3065,0 +3066,0 +3067,0 +3068,0 +3069,0 +3070,0 +3071,0 +3072,0 +3073,0 +3074,0 +3075,0 +3076,0 +3077,0 +3078,0 +3079,0 +3080,0 +3081,0 +3082,0 +3083,0 +3084,0 +3085,0 +3086,0 +3087,0 +3088,0 +3089,0 +3090,0 +3091,0 +3092,0 +3093,0 +3094,0 +3095,0 +3096,0 +3097,0 +3098,0 +3099,0 +3100,0 +3101,0 +3102,0 +3103,0 +3104,0 +3105,0 +3106,0 +3107,0 +3108,0 +3109,0 +3110,0 +3111,0 +3112,0 +3113,0 +3114,0 +3115,0 +3116,0 +3117,0 +3118,0 +3119,0 +3120,0 +3121,0 +3122,0 +3123,0 +3124,0 +3125,0 +3126,0 +3127,1 +3128,0 +3129,0 +3130,0 +3131,0 +3132,0 +3133,0 +3134,0 +3135,0 +3136,0 +3137,0 +3138,0 +3139,0 +3140,0 +3141,0 +3142,0 +3143,0 +3144,0 +3145,0 +3146,1 +3147,0 +3148,0 +3149,0 +3150,0 +3151,0 +3152,0 +3153,0 +3154,0 +3155,0 +3156,0 +3157,0 +3158,0 +3159,0 +3160,0 +3161,0 +3162,0 +3163,0 +3164,0 +3165,0 +3166,0 +3167,0 +3168,0 +3169,0 +3170,0 +3171,0 +3172,0 +3173,0 +3174,0 +3175,0 +3176,0 +3177,0 +3178,0 +3179,0 +3180,0 +3181,0 +3182,0 +3183,0 +3184,0 +3185,0 +3186,0 +3187,0 +3188,0 +3189,0 +3190,0 +3191,0 +3192,0 +3193,0 +3194,0 +3195,0 +3196,0 +3197,0 +3198,0 +3199,0 +3200,0 +3201,0 +3202,0 +3203,0 +3204,0 +3205,0 +3206,0 +3207,0 +3208,0 +3209,0 +3210,0 +3211,0 +3212,0 +3213,0 +3214,0 +3215,0 +3216,0 +3217,0 +3218,0 +3219,0 +3220,0 +3221,0 +3222,0 +3223,0 +3224,0 +3225,1 +3226,0 +3227,0 +3228,0 +3229,0 +3230,0 +3231,0 +3232,0 +3233,0 +3234,0 +3235,0 +3236,1 +3237,0 +3238,0 +3239,0 +3240,0 +3241,0 +3242,0 +3243,0 +3244,0 +3245,0 +3246,0 +3247,0 +3248,0 +3249,0 +3250,0 +3251,0 +3252,0 +3253,0 +3254,0 +3255,0 +3256,0 +3257,0 +3258,1 +3259,0 +3260,0 +3261,0 +3262,0 +3263,0 +3264,0 +3265,0 +3266,0 +3267,0 +3268,0 +3269,0 +3270,0 +3271,0 +3272,0 +3273,0 +3274,0 +3275,0 +3276,0 +3277,0 +3278,0 +3279,0 +3280,0 +3281,0 +3282,0 +3283,0 +3284,0 +3285,0 +3286,0 +3287,0 +3288,0 +3289,0 +3290,0 +3291,0 +3292,0 +3293,0 +3294,1 +3295,0 +3296,0 +3297,0 +3298,0 +3299,0 +3300,1 +3301,0 +3302,0 +3303,0 +3304,0 +3305,0 +3306,0 +3307,0 +3308,0 +3309,0 +3310,0 +3311,0 +3312,0 +3313,0 +3314,0 +3315,0 +3316,0 +3317,0 +3318,0 +3319,0 +3320,0 +3321,0 +3322,0 +3323,0 +3324,0 +3325,0 +3326,0 +3327,0 +3328,0 +3329,0 +3330,0 +3331,0 +3332,0 +3333,0 +3334,0 +3335,0 +3336,0 +3337,0 +3338,0 +3339,1 +3340,0 +3341,0 +3342,0 +3343,0 +3344,0 +3345,0 +3346,0 +3347,0 +3348,0 +3349,0 +3350,0 +3351,1 +3352,0 +3353,0 +3354,0 +3355,0 +3356,0 +3357,0 +3358,0 +3359,0 +3360,0 +3361,0 +3362,0 +3363,0 +3364,0 +3365,0 +3366,0 +3367,0 +3368,0 +3369,0 +3370,0 +3371,0 +3372,0 +3373,0 +3374,0 +3375,0 +3376,0 +3377,0 +3378,0 +3379,0 +3380,0 +3381,0 +3382,0 +3383,0 +3384,0 +3385,0 +3386,0 +3387,0 +3388,0 +3389,0 +3390,0 +3391,0 +3392,0 +3393,0 +3394,0 +3395,0 +3396,0 +3397,1 +3398,0 +3399,0 +3400,0 +3401,0 +3402,0 +3403,0 +3404,0 +3405,0 +3406,0 +3407,1 +3408,0 +3409,0 +3410,0 +3411,0 +3412,0 +3413,0 +3414,0 +3415,0 +3416,0 +3417,0 +3418,0 +3419,0 +3420,0 +3421,0 +3422,0 +3423,0 +3424,0 +3425,0 +3426,0 +3427,0 +3428,0 +3429,0 +3430,0 +3431,0 +3432,0 +3433,0 +3434,0 +3435,0 +3436,0 +3437,1 +3438,0 +3439,0 +3440,0 +3441,0 +3442,0 +3443,0 +3444,0 +3445,0 +3446,0 +3447,0 +3448,0 +3449,0 +3450,0 +3451,0 +3452,0 +3453,0 +3454,0 +3455,0 +3456,0 +3457,1 +3458,0 +3459,0 +3460,0 +3461,0 +3462,0 +3463,0 +3464,0 +3465,0 +3466,0 +3467,0 +3468,0 +3469,1 +3470,0 +3471,0 +3472,0 +3473,0 +3474,0 +3475,0 +3476,0 +3477,0 +3478,0 +3479,0 +3480,0 +3481,0 +3482,0 +3483,0 +3484,1 +3485,0 +3486,0 +3487,0 +3488,0 +3489,0 +3490,0 +3491,0 +3492,0 +3493,0 +3494,0 +3495,1 +3496,0 +3497,0 +3498,0 +3499,0 +3500,0 +3501,0 +3502,0 +3503,0 +3504,0 +3505,0 +3506,0 +3507,1 +3508,0 +3509,0 +3510,0 +3511,0 +3512,0 +3513,0 +3514,0 +3515,1 +3516,0 +3517,0 +3518,0 +3519,0 +3520,0 +3521,0 +3522,0 +3523,0 +3524,0 +3525,0 +3526,0 +3527,1 +3528,0 +3529,0 +3530,0 +3531,0 +3532,0 +3533,0 +3534,0 +3535,0 +3536,0 +3537,0 +3538,0 +3539,0 +3540,0 +3541,0 +3542,0 +3543,0 +3544,0 +3545,0 +3546,0 +3547,0 +3548,0 +3549,0 +3550,0 +3551,0 +3552,0 +3553,0 +3554,0 +3555,0 +3556,0 +3557,0 +3558,0 +3559,0 +3560,0 +3561,1 +3562,0 +3563,1 +3564,0 +3565,1 +3566,0 +3567,1 +3568,0 +3569,0 +3570,0 +3571,0 +3572,1 +3573,0 +3574,0 +3575,0 +3576,0 +3577,0 +3578,1 +3579,1 +3580,0 +3581,0 +3582,0 +3583,0 +3584,0 +3585,0 +3586,0 +3587,0 +3588,0 +3589,0 +3590,0 +3591,0 +3592,0 +3593,0 +3594,0 +3595,0 +3596,0 +3597,0 +3598,0 +3599,0 +3600,0 +3601,0 +3602,0 +3603,0 +3604,1 +3605,0 +3606,1 +3607,0 +3608,0 +3609,0 +3610,0 +3611,0 +3612,0 +3613,0 +3614,0 +3615,0 +3616,0 +3617,0 +3618,0 +3619,0 +3620,0 +3621,0 +3622,0 +3623,0 +3624,0 +3625,0 +3626,0 +3627,0 +3628,0 +3629,0 +3630,0 +3631,0 +3632,1 +3633,0 +3634,0 +3635,0 +3636,0 +3637,0 +3638,0 +3639,1 +3640,0 +3641,0 +3642,0 +3643,0 +3644,0 +3645,0 +3646,0 +3647,0 +3648,0 +3649,0 +3650,0 +3651,1 +3652,0 +3653,0 +3654,0 +3655,1 +3656,0 +3657,0 +3658,0 +3659,0 +3660,0 +3661,0 +3662,0 +3663,0 +3664,1 +3665,0 +3666,0 +3667,0 +3668,0 +3669,0 +3670,0 +3671,0 +3672,0 +3673,0 +3674,0 +3675,0 +3676,0 +3677,0 +3678,1 +3679,0 +3680,0 +3681,0 +3682,0 +3683,0 +3684,0 +3685,0 +3686,0 +3687,0 +3688,0 +3689,0 +3690,0 +3691,0 +3692,0 +3693,0 +3694,1 +3695,0 +3696,0 +3697,0 +3698,0 +3699,0 +3700,0 +3701,0 +3702,0 +3703,0 +3704,0 +3705,0 +3706,1 +3707,0 +3708,1 +3709,1 +3710,0 +3711,1 +3712,0 +3713,0 +3714,0 +3715,0 +3716,0 +3717,0 +3718,0 +3719,0 +3720,0 +3721,0 +3722,0 +3723,1 +3724,0 +3725,0 +3726,1 +3727,0 +3728,0 +3729,0 +3730,0 +3731,0 +3732,0 +3733,0 +3734,0 +3735,0 +3736,0 +3737,0 +3738,0 +3739,0 +3740,0 +3741,0 +3742,1 +3743,0 +3744,0 +3745,0 +3746,0 +3747,0 +3748,0 +3749,0 +3750,1 +3751,0 +3752,0 +3753,0 +3754,0 +3755,0 +3756,0 +3757,0 +3758,0 +3759,0 +3760,0 +3761,0 +3762,0 +3763,0 +3764,0 +3765,0 +3766,0 +3767,1 +3768,0 +3769,0 +3770,0 +3771,0 +3772,0 +3773,0 +3774,0 +3775,0 +3776,0 +3777,0 +3778,0 +3779,0 +3780,0 +3781,0 +3782,0 +3783,0 +3784,0 +3785,0 +3786,0 +3787,0 +3788,0 +3789,0 +3790,0 +3791,0 +3792,0 +3793,0 +3794,1 +3795,0 +3796,0 +3797,0 +3798,0 +3799,0 +3800,0 +3801,1 +3802,0 +3803,0 +3804,1 +3805,0 +3806,0 +3807,0 +3808,0 +3809,0 +3810,0 +3811,0 +3812,0 +3813,1 +3814,0 +3815,0 +3816,0 +3817,0 +3818,0 +3819,0 +3820,0 +3821,0 +3822,0 +3823,0 +3824,0 +3825,0 +3826,0 +3827,0 +3828,0 +3829,0 +3830,0 +3831,0 +3832,0 +3833,1 +3834,0 +3835,0 +3836,0 +3837,0 +3838,0 +3839,0 +3840,0 +3841,0 +3842,0 +3843,0 +3844,0 +3845,0 +3846,0 +3847,1 +3848,0 +3849,0 +3850,0 +3851,0 +3852,0 +3853,0 +3854,0 +3855,0 +3856,0 +3857,0 +3858,0 +3859,0 +3860,0 +3861,0 +3862,0 +3863,0 +3864,0 +3865,0 +3866,0 +3867,1 +3868,0 +3869,0 +3870,0 +3871,0 +3872,0 +3873,0 +3874,0 +3875,0 +3876,0 +3877,0 +3878,0 +3879,0 +3880,0 +3881,0 +3882,0 +3883,0 +3884,0 +3885,0 +3886,0 +3887,0 +3888,0 +3889,0 +3890,0 +3891,0 +3892,0 +3893,0 +3894,0 +3895,0 +3896,0 +3897,0 +3898,0 +3899,0 +3900,0 +3901,0 +3902,0 +3903,0 +3904,0 +3905,0 +3906,0 +3907,0 +3908,0 +3909,0 +3910,0 +3911,0 +3912,0 +3913,0 +3914,1 +3915,0 +3916,0 +3917,0 +3918,0 +3919,0 +3920,0 +3921,0 +3922,0 +3923,0 +3924,0 +3925,0 +3926,0 +3927,0 +3928,0 +3929,0 +3930,0 +3931,1 +3932,0 +3933,0 +3934,0 +3935,0 +3936,0 +3937,0 +3938,0 +3939,0 +3940,0 +3941,0 +3942,1 +3943,0 +3944,1 +3945,0 +3946,0 +3947,0 +3948,0 +3949,0 +3950,1 +3951,0 +3952,0 +3953,0 +3954,0 +3955,0 +3956,0 +3957,0 +3958,0 +3959,0 +3960,0 +3961,0 +3962,1 +3963,0 +3964,0 +3965,0 +3966,0 +3967,0 +3968,0 +3969,0 +3970,0 +3971,0 +3972,0 +3973,0 +3974,0 +3975,0 +3976,0 +3977,0 +3978,0 +3979,0 +3980,0 +3981,0 +3982,0 +3983,0 +3984,0 +3985,0 +3986,0 +3987,0 +3988,0 +3989,0 +3990,0 +3991,0 +3992,0 +3993,0 +3994,1 +3995,0 +3996,0 +3997,0 +3998,0 +3999,0 +4000,0 +4001,0 +4002,0 +4003,0 +4004,0 +4005,1 +4006,0 +4007,0 +4008,0 +4009,0 +4010,0 +4011,0 +4012,0 +4013,0 +4014,0 +4015,0 +4016,0 +4017,0 +4018,0 +4019,0 +4020,0 +4021,0 +4022,0 +4023,0 +4024,0 +4025,0 +4026,0 +4027,0 +4028,1 +4029,0 +4030,0 +4031,0 +4032,0 +4033,0 +4034,0 +4035,0 +4036,0 +4037,0 +4038,0 +4039,0 +4040,0 +4041,0 +4042,0 +4043,0 +4044,0 +4045,0 +4046,0 +4047,1 +4048,0 +4049,0 +4050,0 +4051,0 +4052,0 +4053,0 +4054,0 +4055,0 +4056,0 +4057,0 +4058,0 +4059,0 +4060,0 +4061,0 +4062,0 +4063,0 +4064,0 +4065,0 +4066,0 +4067,0 +4068,0 +4069,0 +4070,0 +4071,0 +4072,1 +4073,0 +4074,0 +4075,0 +4076,0 +4077,0 +4078,0 +4079,0 +4080,0 +4081,0 +4082,0 +4083,0 +4084,0 +4085,0 +4086,0 +4087,0 +4088,0 +4089,0 +4090,0 +4091,0 +4092,0 +4093,0 +4094,0 +4095,0 +4096,0 +4097,0 +4098,0 +4099,0 +4100,0 +4101,0 +4102,0 +4103,0 +4104,0 +4105,1 +4106,0 +4107,0 +4108,0 +4109,0 +4110,0 +4111,0 +4112,0 +4113,0 +4114,0 +4115,0 +4116,0 +4117,0 +4118,0 +4119,0 +4120,0 +4121,0 +4122,0 +4123,1 +4124,0 +4125,0 +4126,0 +4127,0 +4128,0 +4129,0 +4130,0 +4131,0 +4132,0 +4133,0 +4134,0 +4135,0 +4136,0 +4137,0 +4138,0 +4139,0 +4140,0 +4141,0 +4142,0 +4143,0 +4144,0 +4145,0 +4146,0 +4147,0 +4148,0 +4149,0 +4150,0 +4151,0 +4152,0 +4153,0 +4154,0 +4155,0 +4156,0 +4157,1 +4158,0 +4159,0 +4160,0 +4161,0 +4162,0 +4163,1 +4164,0 +4165,0 +4166,0 +4167,0 +4168,0 +4169,0 +4170,0 +4171,0 +4172,0 +4173,0 +4174,0 +4175,0 +4176,0 +4177,0 +4178,0 +4179,0 +4180,1 +4181,0 +4182,0 +4183,0 +4184,0 +4185,0 +4186,0 +4187,0 +4188,0 +4189,0 +4190,1 +4191,0 +4192,0 +4193,0 +4194,0 +4195,0 +4196,0 +4197,0 +4198,0 +4199,0 +4200,0 +4201,0 +4202,0 +4203,0 +4204,0 +4205,0 +4206,0 +4207,0 +4208,0 +4209,0 +4210,0 +4211,0 +4212,0 +4213,0 +4214,0 +4215,0 +4216,0 +4217,0 +4218,0 +4219,0 +4220,1 +4221,0 +4222,0 +4223,0 +4224,0 +4225,0 +4226,0 +4227,0 +4228,0 +4229,0 +4230,0 +4231,0 +4232,0 +4233,0 +4234,0 +4235,0 +4236,0 +4237,0 +4238,0 +4239,0 +4240,1 +4241,0 +4242,0 +4243,0 +4244,0 +4245,0 +4246,0 +4247,0 +4248,0 +4249,0 +4250,0 +4251,0 +4252,1 +4253,0 +4254,0 +4255,0 +4256,0 +4257,0 +4258,0 +4259,0 +4260,0 +4261,0 +4262,0 +4263,0 +4264,0 +4265,0 +4266,0 +4267,1 +4268,0 +4269,0 +4270,0 +4271,0 +4272,0 +4273,0 +4274,0 +4275,0 +4276,0 +4277,0 +4278,1 +4279,0 +4280,0 +4281,0 +4282,0 +4283,0 +4284,0 +4285,0 +4286,0 +4287,0 +4288,0 +4289,0 +4290,1 +4291,0 +4292,0 +4293,0 +4294,0 +4295,0 +4296,0 +4297,0 +4298,1 +4299,0 +4300,0 +4301,0 +4302,0 +4303,0 +4304,0 +4305,0 +4306,0 +4307,0 +4308,0 +4309,0 +4310,1 +4311,0 +4312,0 +4313,0 +4314,0 +4315,0 +4316,0 +4317,0 +4318,0 +4319,0 +4320,0 +4321,0 +4322,0 +4323,0 +4324,0 +4325,0 +4326,0 +4327,0 +4328,0 +4329,0 +4330,0 +4331,0 +4332,0 +4333,0 +4334,0 +4335,0 +4336,0 +4337,0 +4338,0 +4339,0 +4340,0 +4341,0 +4342,0 +4343,0 +4344,1 +4345,0 +4346,1 +4347,0 +4348,1 +4349,0 +4350,1 +4351,0 +4352,0 +4353,0 +4354,0 +4355,1 +4356,0 +4357,0 +4358,0 +4359,0 +4360,0 +4361,1 +4362,1 +4363,0 +4364,0 +4365,0 +4366,0 +4367,0 +4368,0 +4369,0 +4370,0 +4371,0 +4372,0 +4373,0 +4374,0 +4375,0 +4376,0 +4377,0 +4378,0 +4379,0 +4380,0 +4381,0 +4382,0 +4383,0 +4384,0 +4385,0 +4386,0 +4387,1 +4388,0 +4389,1 +4390,0 +4391,0 +4392,0 +4393,0 +4394,0 +4395,0 +4396,0 +4397,0 +4398,0 +4399,0 +4400,0 +4401,0 +4402,0 +4403,0 +4404,0 +4405,0 +4406,0 +4407,0 +4408,0 +4409,0 +4410,0 +4411,0 +4412,0 +4413,0 +4414,0 +4415,1 +4416,0 +4417,0 +4418,0 +4419,0 +4420,0 +4421,0 +4422,1 +4423,0 +4424,0 +4425,0 +4426,0 +4427,0 +4428,0 +4429,0 +4430,0 +4431,0 +4432,0 +4433,0 +4434,1 +4435,0 +4436,0 +4437,0 +4438,1 +4439,0 +4440,0 +4441,0 +4442,0 +4443,0 +4444,0 +4445,0 +4446,0 +4447,1 +4448,0 +4449,0 +4450,0 +4451,0 +4452,0 +4453,0 +4454,0 +4455,0 +4456,0 +4457,0 +4458,0 +4459,0 +4460,0 +4461,1 +4462,0 +4463,0 +4464,0 +4465,0 +4466,0 +4467,0 +4468,0 +4469,0 +4470,0 +4471,0 +4472,0 +4473,0 +4474,0 +4475,0 +4476,0 +4477,1 +4478,0 +4479,0 +4480,0 +4481,0 +4482,0 +4483,0 +4484,0 +4485,0 +4486,0 +4487,0 +4488,0 +4489,1 +4490,0 +4491,1 +4492,1 +4493,0 +4494,1 +4495,0 +4496,0 +4497,0 +4498,0 +4499,0 +4500,0 +4501,0 +4502,0 +4503,0 +4504,0 +4505,0 +4506,1 +4507,0 +4508,0 +4509,1 +4510,0 +4511,0 +4512,0 +4513,0 +4514,0 +4515,0 +4516,0 +4517,0 +4518,0 +4519,0 +4520,0 +4521,0 +4522,0 +4523,0 +4524,0 +4525,1 +4526,0 +4527,0 +4528,0 +4529,0 +4530,0 +4531,0 +4532,0 +4533,1 +4534,0 +4535,0 +4536,0 +4537,0 +4538,0 +4539,0 +4540,0 +4541,0 +4542,0 +4543,0 +4544,0 +4545,0 +4546,0 +4547,0 +4548,0 +4549,0 +4550,1 +4551,0 +4552,0 +4553,0 +4554,0 +4555,0 +4556,0 +4557,0 +4558,0 +4559,0 +4560,0 +4561,0 +4562,0 +4563,0 +4564,0 +4565,0 +4566,0 +4567,0 +4568,0 +4569,0 +4570,0 +4571,0 +4572,0 +4573,0 +4574,0 +4575,0 +4576,0 +4577,1 +4578,0 +4579,0 +4580,0 +4581,0 +4582,0 +4583,0 +4584,1 +4585,0 +4586,0 +4587,1 +4588,0 +4589,0 +4590,0 +4591,0 +4592,0 +4593,0 +4594,0 +4595,0 +4596,1 +4597,0 +4598,0 +4599,0 +4600,0 +4601,0 +4602,0 +4603,0 +4604,0 +4605,0 +4606,0 +4607,0 +4608,0 +4609,0 +4610,0 +4611,0 +4612,0 +4613,0 +4614,0 +4615,0 +4616,1 +4617,0 +4618,0 +4619,0 +4620,0 +4621,0 +4622,0 +4623,0 +4624,0 +4625,0 +4626,0 +4627,0 +4628,0 +4629,0 +4630,1 +4631,0 +4632,0 +4633,0 +4634,0 +4635,0 +4636,0 +4637,0 +4638,0 +4639,0 +4640,0 +4641,0 +4642,0 +4643,0 +4644,0 +4645,0 +4646,0 +4647,0 +4648,0 +4649,0 +4650,1 +4651,0 +4652,0 +4653,0 +4654,0 +4655,0 +4656,0 +4657,0 +4658,0 +4659,0 +4660,0 +4661,0 +4662,0 +4663,0 +4664,0 +4665,0 +4666,0 +4667,0 +4668,0 +4669,0 +4670,0 +4671,0 +4672,0 +4673,0 +4674,0 +4675,0 +4676,0 +4677,0 +4678,0 +4679,0 +4680,0 +4681,0 +4682,0 +4683,0 +4684,0 +4685,0 +4686,0 +4687,0 +4688,0 +4689,0 +4690,0 +4691,0 +4692,0 +4693,0 +4694,0 +4695,0 +4696,0 +4697,1 +4698,0 +4699,0 +4700,0 +4701,0 +4702,0 +4703,0 +4704,0 +4705,0 +4706,0 +4707,0 +4708,0 +4709,0 +4710,0 +4711,0 +4712,0 +4713,0 +4714,1 +4715,0 +4716,0 +4717,0 +4718,0 +4719,0 +4720,0 +4721,0 +4722,0 +4723,0 +4724,0 +4725,1 +4726,0 +4727,1 +4728,0 +4729,0 +4730,0 +4731,0 +4732,0 +4733,1 +4734,0 +4735,0 +4736,0 +4737,0 +4738,0 +4739,0 +4740,0 +4741,0 +4742,0 +4743,0 +4744,0 +4745,1 +4746,0 +4747,0 +4748,0 +4749,0 +4750,0 +4751,0 +4752,0 +4753,0 +4754,0 +4755,0 +4756,0 +4757,0 +4758,0 +4759,0 +4760,0 +4761,0 +4762,0 +4763,0 +4764,0 +4765,0 +4766,0 +4767,0 +4768,0 +4769,0 +4770,0 +4771,0 +4772,0 +4773,0 +4774,0 +4775,0 +4776,0 +4777,1 +4778,0 +4779,0 +4780,0 +4781,0 +4782,0 +4783,0 +4784,0 +4785,0 +4786,0 +4787,0 +4788,1 +4789,0 +4790,0 +4791,0 +4792,0 +4793,0 +4794,0 +4795,0 +4796,0 +4797,0 +4798,0 +4799,0 +4800,0 +4801,0 +4802,0 +4803,0 +4804,0 +4805,0 +4806,0 +4807,0 +4808,0 +4809,0 +4810,0 +4811,1 +4812,0 +4813,0 +4814,0 +4815,0 +4816,0 +4817,0 +4818,0 +4819,0 +4820,0 +4821,0 +4822,0 +4823,0 +4824,0 +4825,0 +4826,0 +4827,0 +4828,0 +4829,0 +4830,1 +4831,0 +4832,0 +4833,0 +4834,0 +4835,0 +4836,0 +4837,0 +4838,0 +4839,0 +4840,0 +4841,0 +4842,0 +4843,0 +4844,0 +4845,0 +4846,0 +4847,0 +4848,0 +4849,0 +4850,0 +4851,0 +4852,0 +4853,0 +4854,0 +4855,1 +4856,0 +4857,0 +4858,0 +4859,0 +4860,0 +4861,0 +4862,0 +4863,0 +4864,0 +4865,0 +4866,0 +4867,0 +4868,0 +4869,0 +4870,0 +4871,0 +4872,0 +4873,0 +4874,0 +4875,0 +4876,0 +4877,0 +4878,0 +4879,0 +4880,0 +4881,0 +4882,0 +4883,0 +4884,0 +4885,0 +4886,0 +4887,0 +4888,1 +4889,0 +4890,0 +4891,0 +4892,0 +4893,0 +4894,0 +4895,0 +4896,0 +4897,0 +4898,0 +4899,0 +4900,0 +4901,0 +4902,0 +4903,0 +4904,0 +4905,0 +4906,1 +4907,0 +4908,0 +4909,0 +4910,0 +4911,0 +4912,0 +4913,0 +4914,0 +4915,0 +4916,0 +4917,0 +4918,0 +4919,0 +4920,0 +4921,0 +4922,0 +4923,0 +4924,0 +4925,0 +4926,0 +4927,0 +4928,0 +4929,0 +4930,0 +4931,0 +4932,0 +4933,0 +4934,0 +4935,0 +4936,0 +4937,0 +4938,0 +4939,0 +4940,1 +4941,0 +4942,0 +4943,0 +4944,0 +4945,0 +4946,1 +4947,0 +4948,0 +4949,0 +4950,0 +4951,0 +4952,0 +4953,0 +4954,0 +4955,0 From 5efd96a44f42409858b21b2d6caa3b9ed5923623 Mon Sep 17 00:00:00 2001 From: Luis <56727115+luistatera@users.noreply.github.com> Date: Thu, 22 May 2025 20:34:21 +0200 Subject: [PATCH 2/4] 1st version --- jn-fake-news.ipynb | 175 ++++++--------------------------------------- 1 file changed, 22 insertions(+), 153 deletions(-) diff --git a/jn-fake-news.ipynb b/jn-fake-news.ipynb index 3c810bd..d2d3d24 100644 --- a/jn-fake-news.ipynb +++ b/jn-fake-news.ipynb @@ -18,78 +18,10 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "b24551f5", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: pandas in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (2.2.3)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (1.26.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (2025.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas) (2025.2)\n", - "Requirement already satisfied: six>=1.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: matplotlib in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (3.9.4)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (1.3.0)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (4.58.0)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (1.4.7)\n", - "Requirement already satisfied: numpy>=1.23 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (1.26.3)\n", - "Requirement already satisfied: packaging>=20.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (24.2)\n", - "Requirement already satisfied: pillow>=8 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (11.2.1)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (3.2.3)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (2.9.0.post0)\n", - "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib) (6.5.2)\n", - "Requirement already satisfied: zipp>=3.1.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from importlib-resources>=3.2.0->matplotlib) (3.21.0)\n", - "Requirement already satisfied: six>=1.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from python-dateutil>=2.7->matplotlib) (1.17.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: seaborn in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (0.13.2)\n", - "Requirement already satisfied: numpy!=1.24.0,>=1.20 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from seaborn) (1.26.3)\n", - "Requirement already satisfied: pandas>=1.2 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from seaborn) (2.2.3)\n", - "Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from seaborn) (3.9.4)\n", - "Requirement already satisfied: contourpy>=1.0.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.3.0)\n", - "Requirement already satisfied: cycler>=0.10 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (0.12.1)\n", - "Requirement already satisfied: fonttools>=4.22.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (4.58.0)\n", - "Requirement already satisfied: kiwisolver>=1.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (1.4.7)\n", - "Requirement already satisfied: packaging>=20.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (24.2)\n", - "Requirement already satisfied: pillow>=8 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (11.2.1)\n", - "Requirement already satisfied: pyparsing>=2.3.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (3.2.3)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (2.9.0.post0)\n", - "Requirement already satisfied: importlib-resources>=3.2.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from matplotlib!=3.6.1,>=3.4->seaborn) (6.5.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas>=1.2->seaborn) (2025.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from pandas>=1.2->seaborn) (2025.2)\n", - "Requirement already satisfied: zipp>=3.1.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from importlib-resources>=3.2.0->matplotlib!=3.6.1,>=3.4->seaborn) (3.21.0)\n", - "Requirement already satisfied: six>=1.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.4->seaborn) (1.17.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Defaulting to user installation because normal site-packages is not writeable\n", - "Requirement already satisfied: scikit-learn in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (1.6.1)\n", - "Requirement already satisfied: numpy>=1.19.5 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (1.26.3)\n", - "Requirement already satisfied: scipy>=1.6.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (1.13.1)\n", - "Requirement already satisfied: joblib>=1.2.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (1.5.0)\n", - "Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/luis.guimaraes/Library/Python/3.9/lib/python/site-packages (from scikit-learn) (3.6.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.1.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install pandas\n", "import pandas as pd\n", @@ -114,7 +46,7 @@ "id": "9b5d0a83", "metadata": {}, "source": [ - "# 3. Load Data" + "# 2. Load Data" ] }, { @@ -122,15 +54,7 @@ "execution_count": null, "id": "88a6ab38", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2]\n" - ] - } - ], + "outputs": [], "source": [ "df = pd.read_csv(\"dataset/data.csv\")\n", "val_df = pd.read_csv(\"dataset/validation_data.csv\")\n", @@ -144,27 +68,27 @@ "id": "6937c16d", "metadata": {}, "source": [ - "# Clening up the data" + "# 3. Clening Up Data" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 23, "id": "b441f8e6", "metadata": {}, "outputs": [], "source": [ - "# remove duplicates\n", - "df = df.drop_duplicates(subset=['text'])\n", - "# remove empty rows\n", - "df = df.dropna(subset=['text'])\n", - "# remove rows with empty labels\n", - "df = df.dropna(subset=['label'])\n", - "# remove rows with empty text\n", - "df = df[df['text'].str.strip() != '']\n", - "# remove rows with empty labels\n", - "df['label'] = df['label'].astype(str)\n", - "df = df[df['label'].str.strip() != '']" + "# Remove duplicate rows based on the 'text' column\n", + "df = df.drop_duplicates(subset=['text']) \n", + "\n", + "# Remove rows with 'text' is NaN\n", + "df = df.dropna(subset=['text']) \n", + "\n", + "# Remove rows with 'label' is NaN\n", + "df = df.dropna(subset=['label']) \n", + "\n", + "# Remove rows with 'text' empty or only with whitespace\n", + "df = df[df['text'].str.strip() != ''] " ] }, { @@ -193,7 +117,7 @@ " return text\n", "\n", "df['text_clean'] = df['title'] + \" \" + df['text']\n", - "df['text_clean'] = df['text_clean'].apply(clean_text)\n" + "# df['text_clean'] = df['text_clean'].apply(clean_text)\n" ] }, { @@ -225,12 +149,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "4679c0d5", "metadata": {}, "outputs": [], "source": [ - "X = df['title'] + \" \" + df['text']\n", + "X = df['title'] + \" \" + df['text_clean']\n", "y = df['label']\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" ] @@ -270,46 +194,10 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "e5b52270", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " 0 0.99 0.99 0.99 3996\n", - " 1 0.99 0.99 0.99 3993\n", - "\n", - " accuracy 0.99 7989\n", - " macro avg 0.99 0.99 0.99 7989\n", - "weighted avg 0.99 0.99 0.99 7989\n", - "\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "print(classification_report(y_test, y_pred))\n", "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n" @@ -338,25 +226,6 @@ "val_df.reset_index(inplace=True)\n", "val_df[['index', 'label']].to_csv(\"predictions.csv\", index=False)\n" ] - }, - { - "cell_type": "markdown", - "id": "a78dd8c6", - "metadata": {}, - "source": [ - "# 10. Final Notes" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "e50e33ea", - "metadata": {}, - "outputs": [], - "source": [ - "# Estimated model accuracy: ~XX%\n", - "# Next steps: Improve with different models, grid search, or embeddings (if time allows).\n" - ] } ], "metadata": { From 94e2a94a191ebf359e252bc8a5a4a6259add8c2d Mon Sep 17 00:00:00 2001 From: Luis <56727115+luistatera@users.noreply.github.com> Date: Thu, 22 May 2025 22:54:44 +0200 Subject: [PATCH 3/4] Add initial notebook file fn.ipynb --- 2nd-jn-fake-news.ipynb | 637 +++++++++++++++++++++++++++++++++++++++++ fn.ipynb | 0 predictions.csv | 190 ++++++------ 3 files changed, 732 insertions(+), 95 deletions(-) create mode 100644 2nd-jn-fake-news.ipynb create mode 100644 fn.ipynb diff --git a/2nd-jn-fake-news.ipynb b/2nd-jn-fake-news.ipynb new file mode 100644 index 0000000..436832a --- /dev/null +++ b/2nd-jn-fake-news.ipynb @@ -0,0 +1,637 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cb329d99", + "metadata": {}, + "source": [ + "# Project: Fake News Classification" + ] + }, + { + "cell_type": "markdown", + "id": "24995232", + "metadata": {}, + "source": [ + "# 1. Imports & Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b24551f5", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install pandas\n", + "import pandas as pd\n", + "import numpy as np\n", + "%pip install matplotlib\n", + "import matplotlib.pyplot as plt\n", + "%pip install seaborn\n", + "import seaborn as sns\n", + "import re\n", + "import string\n", + "%pip install scikit-learn\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import classification_report, confusion_matrix" + ] + }, + { + "cell_type": "markdown", + "id": "9b5d0a83", + "metadata": {}, + "source": [ + "# 2. Load Data" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "88a6ab38", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "label", + "rawType": "int64", + "type": "integer" + }, + { + "name": "title", + "rawType": "object", + "type": "string" + }, + { + "name": "text", + "rawType": "object", + "type": "string" + }, + { + "name": "subject", + "rawType": "object", + "type": "string" + }, + { + "name": "date", + "rawType": "object", + "type": "string" + } + ], + "ref": "eb22746b-4811-43a1-be70-df9c2f720498", + "rows": [ + [ + "0", + "1", + "As U.S. budget fight looms, Republicans flip their fiscal script", + "WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a “fiscal conservative” on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBS’ “Face the Nation,” drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional increases for non-defense “discretionary” spending on programs that support education, scientific research, infrastructure, public health and environmental protection. “The (Trump) administration has already been willing to say: ‘We’re going to increase non-defense discretionary spending ... by about 7 percent,’” Meadows, chairman of the small but influential House Freedom Caucus, said on the program. “Now, Democrats are saying that’s not enough, we need to give the government a pay raise of 10 to 11 percent. For a fiscal conservative, I don’t see where the rationale is. ... Eventually you run out of other people’s money,” he said. Meadows was among Republicans who voted in late December for their party’s debt-financed tax overhaul, which is expected to balloon the federal budget deficit and add about $1.5 trillion over 10 years to the $20 trillion national debt. “It’s interesting to hear Mark talk about fiscal responsibility,” Democratic U.S. Representative Joseph Crowley said on CBS. Crowley said the Republican tax bill would require the United States to borrow $1.5 trillion, to be paid off by future generations, to finance tax cuts for corporations and the rich. “This is one of the least ... fiscally responsible bills we’ve ever seen passed in the history of the House of Representatives. I think we’re going to be paying for this for many, many years to come,” Crowley said. Republicans insist the tax package, the biggest U.S. tax overhaul in more than 30 years, will boost the economy and job growth. House Speaker Paul Ryan, who also supported the tax bill, recently went further than Meadows, making clear in a radio interview that welfare or “entitlement reform,” as the party often calls it, would be a top Republican priority in 2018. In Republican parlance, “entitlement” programs mean food stamps, housing assistance, Medicare and Medicaid health insurance for the elderly, poor and disabled, as well as other programs created by Washington to assist the needy. Democrats seized on Ryan’s early December remarks, saying they showed Republicans would try to pay for their tax overhaul by seeking spending cuts for social programs. But the goals of House Republicans may have to take a back seat to the Senate, where the votes of some Democrats will be needed to approve a budget and prevent a government shutdown. Democrats will use their leverage in the Senate, which Republicans narrowly control, to defend both discretionary non-defense programs and social spending, while tackling the issue of the “Dreamers,” people brought illegally to the country as children. Trump in September put a March 2018 expiration date on the Deferred Action for Childhood Arrivals, or DACA, program, which protects the young immigrants from deportation and provides them with work permits. The president has said in recent Twitter messages he wants funding for his proposed Mexican border wall and other immigration law changes in exchange for agreeing to help the Dreamers. Representative Debbie Dingell told CBS she did not favor linking that issue to other policy objectives, such as wall funding. “We need to do DACA clean,” she said. On Wednesday, Trump aides will meet with congressional leaders to discuss those issues. That will be followed by a weekend of strategy sessions for Trump and Republican leaders on Jan. 6 and 7, the White House said. Trump was also scheduled to meet on Sunday with Florida Republican Governor Rick Scott, who wants more emergency aid. The House has passed an $81 billion aid package after hurricanes in Florida, Texas and Puerto Rico, and wildfires in California. The package far exceeded the $44 billion requested by the Trump administration. The Senate has not yet voted on the aid. ", + "politicsNews", + "December 31, 2017 " + ], + [ + "1", + "1", + "U.S. military to accept transgender recruits on Monday: Pentagon", + "WASHINGTON (Reuters) - Transgender people will be allowed for the first time to enlist in the U.S. military starting on Monday as ordered by federal courts, the Pentagon said on Friday, after President Donald Trump’s administration decided not to appeal rulings that blocked his transgender ban. Two federal appeals courts, one in Washington and one in Virginia, last week rejected the administration’s request to put on hold orders by lower court judges requiring the military to begin accepting transgender recruits on Jan. 1. A Justice Department official said the administration will not challenge those rulings. “The Department of Defense has announced that it will be releasing an independent study of these issues in the coming weeks. So rather than litigate this interim appeal before that occurs, the administration has decided to wait for DOD’s study and will continue to defend the president’s lawful authority in District Court in the meantime,” the official said, speaking on condition of anonymity. In September, the Pentagon said it had created a panel of senior officials to study how to implement a directive by Trump to prohibit transgender individuals from serving. The Defense Department has until Feb. 21 to submit a plan to Trump. Lawyers representing currently-serving transgender service members and aspiring recruits said they had expected the administration to appeal the rulings to the conservative-majority Supreme Court, but were hoping that would not happen. Pentagon spokeswoman Heather Babb said in a statement: “As mandated by court order, the Department of Defense is prepared to begin accessing transgender applicants for military service Jan. 1. All applicants must meet all accession standards.” Jennifer Levi, a lawyer with gay, lesbian and transgender advocacy group GLAD, called the decision not to appeal “great news.” “I’m hoping it means the government has come to see that there is no way to justify a ban and that it’s not good for the military or our country,” Levi said. Both GLAD and the American Civil Liberties Union represent plaintiffs in the lawsuits filed against the administration. In a move that appealed to his hard-line conservative supporters, Trump announced in July that he would prohibit transgender people from serving in the military, reversing Democratic President Barack Obama’s policy of accepting them. Trump said on Twitter at the time that the military “cannot be burdened with the tremendous medical costs and disruption that transgender in the military would entail.” Four federal judges - in Baltimore, Washington, D.C., Seattle and Riverside, California - have issued rulings blocking Trump’s ban while legal challenges to the Republican president’s policy proceed. The judges said the ban would likely violate the right under the U.S. Constitution to equal protection under the law. The Pentagon on Dec. 8 issued guidelines to recruitment personnel in order to enlist transgender applicants by Jan. 1. The memo outlined medical requirements and specified how the applicants’ sex would be identified and even which undergarments they would wear. The Trump administration previously said in legal papers that the armed forces were not prepared to train thousands of personnel on the medical standards needed to process transgender applicants and might have to accept “some individuals who are not medically fit for service.” The Obama administration had set a deadline of July 1, 2017, to begin accepting transgender recruits. But Trump’s defense secretary, James Mattis, postponed that date to Jan. 1, 2018, which the president’s ban then put off indefinitely. Trump has taken other steps aimed at rolling back transgender rights. In October, his administration said a federal law banning gender-based workplace discrimination does not protect transgender employees, reversing another Obama-era position. In February, Trump rescinded guidance issued by the Obama administration saying that public schools should allow transgender students to use the restroom that corresponds to their gender identity. ", + "politicsNews", + "December 29, 2017 " + ], + [ + "2", + "1", + "Senior U.S. Republican senator: 'Let Mr. Mueller do his job'", + "WASHINGTON (Reuters) - The special counsel investigation of links between Russia and President Trump’s 2016 election campaign should continue without interference in 2018, despite calls from some Trump administration allies and Republican lawmakers to shut it down, a prominent Republican senator said on Sunday. Lindsey Graham, who serves on the Senate armed forces and judiciary committees, said Department of Justice Special Counsel Robert Mueller needs to carry on with his Russia investigation without political interference. “This investigation will go forward. It will be an investigation conducted without political influence,” Graham said on CBS’s Face the Nation news program. “And we all need to let Mr. Mueller do his job. I think he’s the right guy at the right time.” The question of how Russia may have interfered in the election, and how Trump’s campaign may have had links with or co-ordinated any such effort, has loomed over the White House since Trump took office in January. It shows no sign of receding as Trump prepares for his second year in power, despite intensified rhetoric from some Trump allies in recent weeks accusing Mueller’s team of bias against the Republican president. Trump himself seemed to undercut his supporters in an interview last week with the New York Times in which he said he expected Mueller was “going to be fair.” Russia’s role in the election and the question of possible links to the Trump campaign are the focus of multiple inquiries in Washington. Three committees of the Senate and the House of Representatives are investigating, as well as Mueller, whose team in May took over an earlier probe launched by the U.S. Federal Bureau of Investigation (FBI). Several members of the Trump campaign and administration have been convicted or indicted in the investigation. Trump and his allies deny any collusion with Russia during the campaign, and the Kremlin has denied meddling in the election. Graham said he still wants an examination of the FBI’s use of a dossier on links between Trump and Russia that was compiled by a former British spy, Christopher Steele, which prompted Trump allies and some Republicans to question Mueller’s inquiry. On Saturday, the New York Times reported that it was not that dossier that triggered an early FBI probe, but a tip from former Trump campaign foreign policy adviser George Papadopoulos to an Australian diplomat that Russia had damaging information about former Trump rival Hillary Clinton. “I want somebody to look at the way the Department of Justice used this dossier. It bothers me greatly the way they used it, and I want somebody to look at it,” Graham said. But he said the Russia investigation must continue. “As a matter of fact, it would hurt us if we ignored it,” he said. ", + "politicsNews", + "December 31, 2017 " + ], + [ + "3", + "1", + "FBI Russia probe helped by Australian diplomat tip-off: NYT", + "WASHINGTON (Reuters) - Trump campaign adviser George Papadopoulos told an Australian diplomat in May 2016 that Russia had political dirt on Democratic presidential candidate Hillary Clinton, the New York Times reported on Saturday. The conversation between Papadopoulos and the diplomat, Alexander Downer, in London was a driving factor behind the FBI’s decision to open a counter-intelligence investigation of Moscow’s contacts with the Trump campaign, the Times reported. Two months after the meeting, Australian officials passed the information that came from Papadopoulos to their American counterparts when leaked Democratic emails began appearing online, according to the newspaper, which cited four current and former U.S. and foreign officials. Besides the information from the Australians, the probe by the Federal Bureau of Investigation was also propelled by intelligence from other friendly governments, including the British and Dutch, the Times said. Papadopoulos, a Chicago-based international energy lawyer, pleaded guilty on Oct. 30 to lying to FBI agents about contacts with people who claimed to have ties to top Russian officials. It was the first criminal charge alleging links between the Trump campaign and Russia. The White House has played down the former aide’s campaign role, saying it was “extremely limited” and that any actions he took would have been on his own. The New York Times, however, reported that Papadopoulos helped set up a meeting between then-candidate Donald Trump and Egyptian President Abdel Fattah al-Sisi and edited the outline of Trump’s first major foreign policy speech in April 2016. The federal investigation, which is now being led by Special Counsel Robert Mueller, has hung over Trump’s White House since he took office almost a year ago. Some Trump allies have recently accused Mueller’s team of being biased against the Republican president. Lawyers for Papadopoulos did not immediately respond to requests by Reuters for comment. Mueller’s office declined to comment. Trump’s White House attorney, Ty Cobb, declined to comment on the New York Times report. “Out of respect for the special counsel and his process, we are not commenting on matters such as this,” he said in a statement. Mueller has charged four Trump associates, including Papadopoulos, in his investigation. Russia has denied interfering in the U.S. election and Trump has said there was no collusion between his campaign and Moscow. ", + "politicsNews", + "December 30, 2017 " + ], + [ + "4", + "1", + "Trump wants Postal Service to charge 'much more' for Amazon shipments", + "SEATTLE/WASHINGTON (Reuters) - President Donald Trump called on the U.S. Postal Service on Friday to charge “much more” to ship packages for Amazon (AMZN.O), picking another fight with an online retail giant he has criticized in the past. “Why is the United States Post Office, which is losing many billions of dollars a year, while charging Amazon and others so little to deliver their packages, making Amazon richer and the Post Office dumber and poorer? Should be charging MUCH MORE!” Trump wrote on Twitter. The president’s tweet drew fresh attention to the fragile finances of the Postal Service at a time when tens of millions of parcels have just been shipped all over the country for the holiday season. The U.S. Postal Service, which runs at a big loss, is an independent agency within the federal government and does not receive tax dollars for operating expenses, according to its website. Package delivery has become an increasingly important part of its business as the Internet has led to a sharp decline in the amount of first-class letters. The president does not determine postal rates. They are set by the Postal Regulatory Commission, an independent government agency with commissioners selected by the president from both political parties. That panel raised prices on packages by almost 2 percent in November. Amazon was founded by Jeff Bezos, who remains the chief executive officer of the retail company and is the richest person in the world, according to Bloomberg News. Bezos also owns The Washington Post, a newspaper Trump has repeatedly railed against in his criticisms of the news media. In tweets over the past year, Trump has said the “Amazon Washington Post” fabricated stories. He has said Amazon does not pay sales tax, which is not true, and so hurts other retailers, part of a pattern by the former businessman and reality television host of periodically turning his ire on big American companies since he took office in January. Daniel Ives, a research analyst at GBH Insights, said Trump’s comment could be taken as a warning to the retail giant. However, he said he was not concerned for Amazon. “We do not see any price hikes in the future. However, that is a risk that Amazon is clearly aware of and (it) is building out its distribution (system) aggressively,” he said. Amazon has shown interest in the past in shifting into its own delivery service, including testing drones for deliveries. In 2015, the company spent $11.5 billion on shipping, 46 percent of its total operating expenses that year. Amazon shares were down 0.86 percent to $1,175.90 by early afternoon. Overall, U.S. stock prices were down slightly on Friday. Satish Jindel, president of ShipMatrix Inc, which analyzes shipping data, disputed the idea that the Postal Service charges less than United Parcel Service Inc (UPS.N) and FedEx Corp (FDX.N), the other biggest players in the parcel delivery business in the United States. Many customers get lower rates from UPS and FedEx than they would get from the post office for comparable services, he said. The Postal Service delivers about 62 percent of Amazon packages, for about 3.5 to 4 million a day during the current peak year-end holiday shipping season, Jindel said. The Seattle-based company and the post office have an agreement in which mail carriers take Amazon packages on the last leg of their journeys, from post offices to customers’ doorsteps. Amazon’s No. 2 carrier is UPS, at 21 percent, and FedEx is third, with 8 percent or so, according to Jindel. Trump’s comment tapped into a debate over whether Postal Service pricing has kept pace with the rise of e-commerce, which has flooded the mail with small packages.Private companies like UPS have long claimed the current system unfairly undercuts their business. Steve Gaut, a spokesman for UPS, noted that the company values its “productive relationship” with the postal service, but that it has filed with the Postal Regulatory Commission its concerns about the postal service’s methods for covering costs. Representatives for Amazon, the White House, the U.S. Postal Service and FedEx declined comment or were not immediately available for comment on Trump’s tweet. According to its annual report, the Postal Service lost $2.74 billion this year, and its deficit has ballooned to $61.86 billion. While the Postal Service’s revenue for first class mail, marketing mail and periodicals is flat or declining, revenue from package delivery is up 44 percent since 2014 to $19.5 billion in the fiscal year ended Sept. 30, 2017. But it also lost about $2 billion in revenue when a temporary surcharge expired in April 2016. According to a Government Accountability Office report in February, the service is facing growing personnel expenses, particularly $73.4 billion in unfunded pension and benefits liabilities. The Postal Service has not announced any plans to cut costs. By law, the Postal Service has to set prices for package delivery to cover the costs attributable to that service. But the postal service allocates only 5.5 percent of its total costs to its business of shipping packages even though that line of business is 28 percent of its total revenue. ", + "politicsNews", + "December 29, 2017 " + ] + ], + "shape": { + "columns": 5, + "rows": 5 + } + }, + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
labeltitletextsubjectdate
01As U.S. budget fight looms, Republicans flip t...WASHINGTON (Reuters) - The head of a conservat...politicsNewsDecember 31, 2017
11U.S. military to accept transgender recruits o...WASHINGTON (Reuters) - Transgender people will...politicsNewsDecember 29, 2017
21Senior U.S. Republican senator: 'Let Mr. Muell...WASHINGTON (Reuters) - The special counsel inv...politicsNewsDecember 31, 2017
31FBI Russia probe helped by Australian diplomat...WASHINGTON (Reuters) - Trump campaign adviser ...politicsNewsDecember 30, 2017
41Trump wants Postal Service to charge 'much mor...SEATTLE/WASHINGTON (Reuters) - President Donal...politicsNewsDecember 29, 2017
\n", + "
" + ], + "text/plain": [ + " label title \\\n", + "0 1 As U.S. budget fight looms, Republicans flip t... \n", + "1 1 U.S. military to accept transgender recruits o... \n", + "2 1 Senior U.S. Republican senator: 'Let Mr. Muell... \n", + "3 1 FBI Russia probe helped by Australian diplomat... \n", + "4 1 Trump wants Postal Service to charge 'much mor... \n", + "\n", + " text subject \\\n", + "0 WASHINGTON (Reuters) - The head of a conservat... politicsNews \n", + "1 WASHINGTON (Reuters) - Transgender people will... politicsNews \n", + "2 WASHINGTON (Reuters) - The special counsel inv... politicsNews \n", + "3 WASHINGTON (Reuters) - Trump campaign adviser ... politicsNews \n", + "4 SEATTLE/WASHINGTON (Reuters) - President Donal... politicsNews \n", + "\n", + " date \n", + "0 December 31, 2017 \n", + "1 December 29, 2017 \n", + "2 December 31, 2017 \n", + "3 December 30, 2017 \n", + "4 December 29, 2017 " + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"dataset/data.csv\")\n", + "df.head()\n", + "#print(df['label'].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "4679c0d5", + "metadata": {}, + "outputs": [], + "source": [ + "X = df['title'] + \" \" + df['text']\n", + "y = df['label']\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "9a740ccc", + "metadata": {}, + "outputs": [], + "source": [ + "def clean_text(text):\n", + " text = text.lower()\n", + " text = re.sub(r'\\[.*?\\]', '', text)\n", + " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", + " text = re.sub(r'<.*?>+', '', text)\n", + " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", + " text = re.sub(r'\\n', '', text)\n", + " text = re.sub(r'\\w*\\d\\w*', '', text)\n", + " return text\n", + "\n", + "df['text_clean'] = df['title'] + \" \" + df['text']\n", + "df['text_clean'] = df['text_clean'].apply(clean_text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "b441f8e6", + "metadata": {}, + "outputs": [], + "source": [ + "# Remove duplicate rows based on the 'text' column\n", + "df = df.drop_duplicates(subset=['text_clean']) \n", + "\n", + "# Remove rows with 'text' is NaN\n", + "df = df.dropna(subset=['text_clean']) \n", + "\n", + "# Remove rows with 'label' is NaN\n", + "df = df.dropna(subset=['label']) \n", + "\n", + "# Remove rows with 'text' empty or only with whitespace\n", + "df = df[df['text_clean'].str.strip() != ''] " + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "dccc6d3a", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "label", + "rawType": "int64", + "type": "integer" + }, + { + "name": "proportion", + "rawType": "float64", + "type": "float" + } + ], + "ref": "36239f7f-979d-4840-b3d4-7941796addad", + "rows": [ + [ + "1", + "0.543295629255436" + ], + [ + "0", + "0.45670437074456405" + ] + ], + "shape": { + "columns": 1, + "rows": 2 + } + }, + "text/plain": [ + "label\n", + "1 0.543296\n", + "0 0.456704\n", + "Name: proportion, dtype: float64" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['label'].value_counts(normalize=True)\n" + ] + }, + { + "cell_type": "markdown", + "id": "b88121d1", + "metadata": {}, + "source": [ + "# 5. Data Exploration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55e16def", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 36424 entries, 0 to 39939\n", + "Data columns (total 6 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 label 36424 non-null int64 \n", + " 1 title 36424 non-null object\n", + " 2 text 36424 non-null object\n", + " 3 subject 36424 non-null object\n", + " 4 date 36424 non-null object\n", + " 5 text_clean 36424 non-null object\n", + "dtypes: int64(1), object(5)\n", + "memory usage: 1.9+ MB\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAHCCAYAAAANVtgqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8ekN5oAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAx6klEQVR4nO3dB3gVdb7/8W8oCUV6CxGkL53QBILSlkhAFmVlV6QIKEW8oEAQEC9LvXfDBWkrbVlpd4UVcBVpl97EBCnS21LCglcCFkiWlkA4/+f7e/4z9xxIgGBCyO+8X88zezIz3zNn5rgxH39lJsDj8XgEAADAMtky+wQAAAAyAiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQfAQylbtqz06NFD/IE/XStgM0IO4OdOnz4tb731lpQvX15y5col+fPnl+eee06mTZsmN27ckCfZggULJCAgwGcpXry4tGjRQv7nf/4ns08PQCbLkdknACDzrF69Wn7/+99LUFCQdOvWTWrUqCFJSUmyY8cOGTJkiBw5ckTmzJkjT7qxY8dKuXLlRB/Fd/HiRRN+XnzxRVm5cqX85je/yezTA5BJCDmAn4qNjZXXXntNypQpI5s3b5aSJUu6+/r16yenTp0yISgraNOmjdSvX99d79mzp5QoUUL+9re/EXIAP0Z3FeCnJkyYIFevXpW5c+f6BBxHxYoVZcCAAam+/+eff5b33ntPatasKU899ZTp5tKwceDAgXtqP/roI6levbrkyZNHChUqZALJ4sWL3f3/+te/ZODAgWYsjLYqaZfTCy+8IN9+++0jXVvBggUld+7ckiOH73/Hffjhh9K4cWMpUqSI2V+vXj357LPPHni8h73WrVu3mi6zpUuXyn/+539KqVKlTBdgy5YtTWi82zfffGNanPQ7yZs3r9SqVct0E3o7fvy4/O53v5PChQubY+l3t2LFikf6XgB/Q0sO4Ke0K0fH4egf/Udx5swZWb58uenu0q4i7Sb685//LM2aNZOjR49KSEiIqfvLX/4i7777rvlDraHp5s2bcvDgQfMHvnPnzqamb9++Jmz0799fqlWrJj/99JPpMjt27JjUrVv3gecSHx8vP/74o+muunTpkglVGuC6du3qU6cB4qWXXpIuXbqYbrlPP/3UnP+qVaukbdu2v/haHePHj5ds2bKZYKTnpoFSP1Ov2bFhwwbTyqQBU7+X4OBgc716Lk641O5CHR/19NNPy/vvv2+CkAao9u3by9///nf57W9/m8Z/aoCf8QDwO/Hx8R799X/55Zcf+j1lypTxdO/e3V2/efOmJzk52acmNjbWExQU5Bk7dqy7TT+jevXq9z12gQIFPP369fOk1fz588113L3oOSxYsOCe+uvXr/usJyUleWrUqOH59a9/nS7XumXLFvP5VatW9SQmJrrbp02bZrYfOnTIrN++fdtTrlw58zmXL1/2Oe6dO3fcn1u2bOmpWbOm+Xzv/Y0bN/ZUqlQpTd8V4I/orgL8UEJCgnnNly/fIx9Du5W0tUIlJyeb1hftyqlcubJPN5N2HX333Xeye/fuVI+lNdrK8f333z/SucyYMcO0jOjyySefmNlVvXr1ks8//9ynTruoHJcvXzatLE2aNHlgt9jDXqvjjTfekMDAQHddP8NpEVL79u0zY6K0i06v3Zt2dzldZDpW6tVXXzXdedpSpYt+dkREhJw8eVL+93//9xG+LcB/EHIAP6RjSpT+8XxUd+7ckSlTpkilSpVMCChatKgUK1bMdEVpeHAMGzbMBIIGDRqYWh3U/PXXX/scS7tzDh8+LKVLlzZ1o0ePdgPBw9D3hIeHm0W7hXTAtHZ7afeXdks5tCuoUaNGZmyLjnHR8501a5bP+f6Sa3U888wzPus65sYJVs60faWz2VKjY3i0++0Pf/iD+SzvZdSoUaZGu+YApI6QA/hpyNFxJBosHtUf//hHiYyMlKZNm5rWk3Xr1pmWFB1grKHAUbVqVTlx4oQZ//L888+bsST66vyhVtpaoaFGx9LoeU2cONEc51HvdaOtLtqac+HCBdPiob766iszHkcDzsyZM2XNmjXmfHVckIaJ9LhWR/bs2VM8zoM+x5tzXB3X47RS3b3o4HAAqWPgMeCndNCr3gMnJiZGwsLC0vx+HSisQUJnZ3m7cuWKaenwpgNmO3bsaBZtWXnllVfM7KPhw4eb0KF0AO6//du/mUVbKHTAsdboLKZHcfv2bfOqA5CVhiv9LA0o2hrjmD9/frpe68OoUKGCedWQqa1PKdFB4Spnzpyp1gC4P1pyAD81dOhQEz507IrOFrqbdqncPZ357taKu1smli1bds84ER1D4k3HqmhXkr731q1bZozL3V0+OoVcW3QSExMf6dr0uOvXrzefpS1JzvnqeBf9PMfZs2fNrKkHedhrfVga4HSW1tSpU01Q8uZ8jn4HzZs3N7O4tEXqbj/88MMjfTbgT2jJAfyUtibovWq0dUWDgPcdj6Ojo80f8fs9v0lbgvROwzrIVqehHzp0SBYtWuS2QDhatWplpkfrVGi9QZ9Ok54+fbqZsq0Dn/WPvN5PRqeYh4aGmvE7GzduNAOVJ02a9FDXot1aej8Zpa1Ael3aTaXTrp3xR/p5kydPltatW5suKq3TAcva5aNja+7nYa81Ld1pOhaoXbt2Urt2bXNcbcnSa9Bp49rapPT8tGtP78/Tu3dv83kaSLX1TQdzp3RPIgBeMnt6F4DM9Y9//MPTu3dvT9myZT2BgYGefPnyeZ577jnPRx995DN1OaVp1YMHD/aULFnSkzt3bvOemJgYT7Nmzczi+POf/+xp2rSpp0iRImbKdYUKFTxDhgwx09iVTrXW9dDQUPPZefPmNT/PnDnzkaaQ58qVy1O7dm3PrFmzfKZjq7lz55qp13oeVapUMe8fNWqUeZ+3R71WZwr5smXL7plurtv187zt2LHD88ILL7jXXatWLfO9ezt9+rSnW7dunuDgYE/OnDk9Tz/9tOc3v/mN57PPPnvg9wP4uwD9H+/QAwAAYAPG5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWMmvbwaoz4bRpx7rDcmcJ/8CAIAnm979Rh8wrHdG15trpsavQ44GHH3qMQAAyHrOnz9v7pieGr8OOdqC43xJzq3fAQDAky0hIcE0Ujh/x1Pj1yHH6aLSgEPIAQAga3nQUBMGHgMAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArJSmkBMVFSXPPvusuflO8eLFpX379nLixAmfmps3b0q/fv2kSJEi8tRTT0mHDh3k4sWLPjXnzp2Ttm3bSp48ecxxhgwZIrdv3/ap2bp1q9StW1eCgoKkYsWKsmDBgnvOZ8aMGVK2bFnJlSuXNGzYUHbt2pW2qwcAANZKU8jZtm2bCTA7d+6UDRs2yK1bt6RVq1Zy7do1t2bQoEGycuVKWbZsmanXRye88sor7v7k5GQTcJKSkiQ6OloWLlxoAszIkSPdmtjYWFPTokUL2b9/vwwcOFB69eol69atc2uWLFkikZGRMmrUKPn2228lNDRUIiIi5NKlS7/8WwEAAFmf5xe4dOmSRw+xbds2s37lyhVPzpw5PcuWLXNrjh07ZmpiYmLM+po1azzZsmXzxMXFuTWzZs3y5M+f35OYmGjWhw4d6qlevbrPZ3Xs2NETERHhrjdo0MDTr18/dz05OdkTEhLiiYqKeujzj4+PN+emrwAAIGt42L/fv2hMTnx8vHktXLiwed27d69p3QkPD3drqlSpIs8884zExMSYdX2tWbOmlChRwq3RFhh9DsWRI0fcGu9jODXOMbQVSD/Lu0afQqrrTk1KEhMTzed4LwAAwE6PHHLu3LljupGee+45qVGjhtkWFxcngYGBUrBgQZ9aDTS6z6nxDjjOfmff/Wo0lNy4cUN+/PFH0+2VUo1zjNTGFBUoUMBdeAI5AAD2euSQo2NzDh8+LJ9++qlkFcOHDzetT86iTx8HAAB2eqSnkPfv319WrVol27dvl1KlSrnbg4ODTVfSlStXfFpzdHaV7nNq7p4F5cy+8q65e0aWruuTwnPnzi3Zs2c3S0o1zjFSojO1dAEAAPZLU0uOx+MxAeeLL76QzZs3S7ly5Xz216tXT3LmzCmbNm1yt+kUc50yHhYWZtb19dChQz6zoHSmlgaYatWquTXex3BqnGNol5h+lneNdp/pulMDAAD8W460dlEtXrxYvvzyS3OvHGf8i45v0RYWfe3Zs6eZ2q2DkTW4vPPOOyZ4NGrUyNTqlHMNM6+//rpMmDDBHGPEiBHm2E4rS9++fWX69OkydOhQefPNN02gWrp0qaxevdo9F/2M7t27S/369aVBgwYydepUM5X9jTfeSN9vyFJl3/+/7xL2Ozu+bWafAgA82SFn1qxZ5rV58+Y+2+fPny89evQwP0+ZMsXMdNKbAOpsJp0VNXPmTLdWu5m0q+vtt9824Sdv3rwmrIwdO9at0RYiDTR6z51p06aZLrGPP/7YHMvRsWNH+eGHH8z9dTQo1a5dW9auXXvPYGQAAOCfAnQeufgpna2lrU86CFlbnfwJLTn+hZYcAP7495tnVwEAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAK6U55Gzfvl3atWsnISEhEhAQIMuXL/fZr9tSWiZOnOjWlC1b9p7948eP9znOwYMHpUmTJpIrVy4pXbq0TJgw4Z5zWbZsmVSpUsXU1KxZU9asWZPWywEAAJZKc8i5du2ahIaGyowZM1Lcf+HCBZ9l3rx5JsR06NDBp27s2LE+de+88467LyEhQVq1aiVlypSRvXv3moA0evRomTNnjlsTHR0tnTp1kp49e8q+ffukffv2Zjl8+HBaLwkAAFgoR1rf0KZNG7OkJjg42Gf9yy+/lBYtWkj58uV9tufLl++eWseiRYskKSnJBKTAwECpXr267N+/XyZPnix9+vQxNdOmTZPWrVvLkCFDzPq4ceNkw4YNMn36dJk9e3ZaLwsAAFgmQ8fkXLx4UVavXm1aW+6m3VNFihSROnXqmJaa27dvu/tiYmKkadOmJuA4IiIi5MSJE3L58mW3Jjw83OeYWqPbAQAA0tySkxYLFy40LTavvPKKz/Z3331X6tatK4ULFzbdTsOHDzddVtpSo+Li4qRcuXI+7ylRooS7r1ChQubV2eZdo9tTk5iYaBbvbjEAAGCnDA052t3UpUsXMzDYW2RkpPtzrVq1TIvNW2+9JVFRURIUFJRh56PHHzNmTIYdHwAA+EF31VdffWW6l3r16vXA2oYNG5ruqrNnz5p1HaujXV3enHVnHE9qNamN81HaYhQfH+8u58+ff6RrAwAAfhxy5s6dK/Xq1TMzsR5EBxVny5ZNihcvbtbDwsLMVPVbt265NTqouHLlyqaryqnZtGmTz3G0RrenRluJ8ufP77MAAAA7pTnkXL161YQSXVRsbKz5+dy5cz5jXfQeNim14ujA4KlTp8qBAwfkzJkzZibVoEGDpGvXrm6A6dy5s+nC0gHLR44ckSVLlpjZVN7dXAMGDJC1a9fKpEmT5Pjx42aK+Z49e6R///6P+l0AAAB/HpOjQUKnhDuc4NG9e3dZsGCB+fnTTz8Vj8dj7mOTUmuK7tdQooOAdYCxhhzvAFOgQAFZv3699OvXz7QGFS1aVEaOHOlOH1eNGzeWxYsXy4gRI+SDDz6QSpUqmRsT1qhRI+3fAgAAsE6AR9OIn9IWJw1UOj7H37quyr6/OrNPAY/R2fFtM/sUAOCx//3m2VUAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEppfqwDAODJxh3N/Qt3NE8dLTkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICV0hxytm/fLu3atZOQkBAJCAiQ5cuX++zv0aOH2e69tG7d2qfm559/li5dukj+/PmlYMGC0rNnT7l69apPzcGDB6VJkyaSK1cuKV26tEyYMOGec1m2bJlUqVLF1NSsWVPWrFmT1ssBAACWSnPIuXbtmoSGhsqMGTNSrdFQc+HCBXf529/+5rNfA86RI0dkw4YNsmrVKhOc+vTp4+5PSEiQVq1aSZkyZWTv3r0yceJEGT16tMyZM8etiY6Olk6dOpmAtG/fPmnfvr1ZDh8+nNZLAgAAFsqR1je0adPGLPcTFBQkwcHBKe47duyYrF27Vnbv3i3169c32z766CN58cUX5cMPPzQtRIsWLZKkpCSZN2+eBAYGSvXq1WX//v0yefJkNwxNmzbNhKkhQ4aY9XHjxpnQNH36dJk9e3ZaLwsAAFgmQ8bkbN26VYoXLy6VK1eWt99+W3766Sd3X0xMjOmicgKOCg8Pl2zZssk333zj1jRt2tQEHEdERIScOHFCLl++7Nbo+7xpjW5PTWJiomkl8l4AAICd0j3kaOvKf//3f8umTZvkv/7rv2Tbtm2m5Sc5Odnsj4uLMwHIW44cOaRw4cJmn1NTokQJnxpn/UE1zv6UREVFSYECBdxFx/oAAAA7pbm76kFee+0192cdDFyrVi2pUKGCad1p2bKlZKbhw4dLZGSku64tOQQdAADslOFTyMuXLy9FixaVU6dOmXUdq3Pp0iWfmtu3b5sZV844Hn29ePGiT42z/qCa1MYCOWOFdEaX9wIAAOyU4SHnu+++M2NySpYsadbDwsLkypUrZtaUY/PmzXLnzh1p2LChW6Mzrm7duuXW6KBiHeNTqFAht0a7xLxpjW4HAABIc8jR+9noTCddVGxsrPn53LlzZp/Odtq5c6ecPXvWhJCXX35ZKlasaAYFq6pVq5pxO71795Zdu3bJ119/Lf379zfdXDqzSnXu3NkMOtbp4TrVfMmSJWY2lXdX04ABA8wsrUmTJsnx48fNFPM9e/aYYwEAAKQ55GiQqFOnjlmUBg/9eeTIkZI9e3ZzE7+XXnpJfvWrX5mQUq9ePfnqq69MV5FDp4jrTfx0jI5OHX/++ed97oGjg4LXr19vApS+f/Dgweb43vfSady4sSxevNi8T+/b89lnn5kbE9aoUeOXfysAACDLC/B4PB7xUzrwWANVfHy8343PKfv+6sw+BTxGZ8e3zexTwGPE77d/8cff74SH/PvNs6sAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJXSHHK2b98u7dq1k5CQEAkICJDly5e7+27duiXDhg2TmjVrSt68eU1Nt27d5Pvvv/c5RtmyZc17vZfx48f71Bw8eFCaNGkiuXLlktKlS8uECRPuOZdly5ZJlSpVTI1+5po1a9J6OQAAwFJpDjnXrl2T0NBQmTFjxj37rl+/Lt9++6384Q9/MK+ff/65nDhxQl566aV7aseOHSsXLlxwl3feecfdl5CQIK1atZIyZcrI3r17ZeLEiTJ69GiZM2eOWxMdHS2dOnWSnj17yr59+6R9+/ZmOXz4cFovCQAAWChHWt/Qpk0bs6SkQIECsmHDBp9t06dPlwYNGsi5c+fkmWeecbfny5dPgoODUzzOokWLJCkpSebNmyeBgYFSvXp12b9/v0yePFn69OljaqZNmyatW7eWIUOGmPVx48aZz9bPmz17dlovCwAAWCbDx+TEx8eb7qiCBQv6bNfuqSJFikidOnVMS83t27fdfTExMdK0aVMTcBwRERGmVejy5ctuTXh4uM8xtUa3AwAApLklJy1u3rxpxuhot1L+/Pnd7e+++67UrVtXChcubLqdhg8fbrqstKVGxcXFSbly5XyOVaJECXdfoUKFzKuzzbtGt6cmMTHRLN7dYgAAwE4ZFnJ0EPKrr74qHo9HZs2a5bMvMjLS/blWrVqmxeatt96SqKgoCQoKyqhTMscfM2ZMhh0fAABY3l3lBJx//vOfZpyMdytOSho2bGi6q86ePWvWdazOxYsXfWqcdWccT2o1qY3zUdpipN1nznL+/PlHvkYAAOBnIccJOCdPnpSNGzeacTcPooOKs2XLJsWLFzfrYWFhZqq6HsuhYaly5cqmq8qp2bRpk89xtEa3p0ZbiTRweS8AAMBOae6uunr1qpw6dcpdj42NNSFFx9eULFlSfve735np46tWrZLk5GR3jIzu124pHRj8zTffSIsWLcwMK10fNGiQdO3a1Q0wnTt3Nt1KOj1cx/TotHCdTTVlyhT3cwcMGCDNmjWTSZMmSdu2beXTTz+VPXv2+EwzBwAA/ivNIUeDhAaUu8fXdO/e3dzLZsWKFWa9du3aPu/bsmWLNG/e3LSmaCDRWh0ErAOMNeR4j9PRqejr16+Xfv36Sb169aRo0aIycuRId/q4aty4sSxevFhGjBghH3zwgVSqVMncmLBGjRqP9k0AAACrBHh0ZLCf0tlVGqh0fI6/dV2VfX91Zp8CHqOz49tm9ingMeL327/44+93wkP+/ebZVQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgpTSHnO3bt0u7du0kJCREAgICZPny5T77PR6PjBw5UkqWLCm5c+eW8PBwOXnypE/Nzz//LF26dJH8+fNLwYIFpWfPnnL16lWfmoMHD0qTJk0kV65cUrp0aZkwYcI957Js2TKpUqWKqalZs6asWbMmrZcDAAAsleaQc+3aNQkNDZUZM2akuF/DyJ/+9CeZPXu2fPPNN5I3b16JiIiQmzdvujUacI4cOSIbNmyQVatWmeDUp08fd39CQoK0atVKypQpI3v37pWJEyfK6NGjZc6cOW5NdHS0dOrUyQSkffv2Sfv27c1y+PDhtH8LAADAOgEebXp51DcHBMgXX3xhwoXSQ2kLz+DBg+W9994z2+Lj46VEiRKyYMECee211+TYsWNSrVo12b17t9SvX9/UrF27Vl588UX57rvvzPtnzZol//7v/y5xcXESGBhoat5//33TanT8+HGz3rFjRxO4NCQ5GjVqJLVr1zYB62FomCpQoIA5R21V8idl31+d2aeAx+js+LaZfQp4jPj99i/++Pud8JB/v9N1TE5sbKwJJtpF5dCTaNiwocTExJh1fdUuKifgKK3Pli2baflxapo2beoGHKWtQSdOnJDLly+7Nd6f49Q4n5OSxMRE88V4LwAAwE7pGnI04ChtufGm684+fS1evLjP/hw5ckjhwoV9alI6hvdnpFbj7E9JVFSUCV3OomN9AACAnfxqdtXw4cNN05aznD9/PrNPCQAAZIWQExwcbF4vXrzos13XnX36eunSJZ/9t2/fNjOuvGtSOob3Z6RW4+xPSVBQkOm7814AAICd0jXklCtXzoSMTZs2udt03IuOtQkLCzPr+nrlyhUza8qxefNmuXPnjhm749TojKtbt265NToTq3LlylKoUCG3xvtznBrncwAAgH9Lc8jR+9ns37/fLM5gY/353LlzZrbVwIED5T/+4z9kxYoVcujQIenWrZuZMeXMwKpataq0bt1aevfuLbt27ZKvv/5a+vfvb2ZeaZ3q3LmzGXSs08N1qvmSJUtk2rRpEhkZ6Z7HgAEDzKysSZMmmRlXOsV8z5495lgAAAA50voGDRItWrRw153g0b17dzNNfOjQoWZqt973Rltsnn/+eRNG9IZ9jkWLFpkw0rJlSzOrqkOHDubeOg4dFLx+/Xrp16+f1KtXT4oWLWpuMOh9L53GjRvL4sWLZcSIEfLBBx9IpUqVzBTzGjVq/JLvAwAAWOIX3Scnq+M+OfAX/ngfDX/G77d/8cff74TMuE8OAADAk4KQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYKd1DTtmyZSUgIOCepV+/fmZ/8+bN79nXt29fn2OcO3dO2rZtK3ny5JHixYvLkCFD5Pbt2z41W7dulbp160pQUJBUrFhRFixYkN6XAgAAsrAc6X3A3bt3S3Jysrt++PBheeGFF+T3v/+9u613794yduxYd13DjEPfqwEnODhYoqOj5cKFC9KtWzfJmTOn/PGPfzQ1sbGxpkbD0aJFi2TTpk3Sq1cvKVmypERERKT3JQEAgCwo3UNOsWLFfNbHjx8vFSpUkGbNmvmEGg0xKVm/fr0cPXpUNm7cKCVKlJDatWvLuHHjZNiwYTJ69GgJDAyU2bNnS7ly5WTSpEnmPVWrVpUdO3bIlClTCDkAACDjx+QkJSXJJ598Im+++abplnJo60vRokWlRo0aMnz4cLl+/bq7LyYmRmrWrGkCjkODS0JCghw5csStCQ8P9/ksrdHt95OYmGiO470AAAA7pXtLjrfly5fLlStXpEePHu62zp07S5kyZSQkJEQOHjxoWmhOnDghn3/+udkfFxfnE3CUs6777lejoeXGjRuSO3fuFM8nKipKxowZk+7XCQAA/CzkzJ07V9q0aWMCjaNPnz7uz9pio+NoWrZsKadPnzbdWhlJW40iIyPddQ1FpUuXztDPBAAAloWcf/7zn2ZcjdNCk5qGDRua11OnTpmQo2N1du3a5VNz8eJF8+qM49FXZ5t3Tf78+VNtxVE6E0sXAABgvwwbkzN//nwz/VtnQd3P/v37zau26KiwsDA5dOiQXLp0ya3ZsGGDCTDVqlVza3RGlTet0e0AAAAZFnLu3LljQk737t0lR47/ayzSLimdKbV37145e/asrFixwkwPb9q0qdSqVcvUtGrVyoSZ119/XQ4cOCDr1q2TESNGmPvsOK0wOnX8zJkzMnToUDl+/LjMnDlTli5dKoMGDeKfKgAAyLiQo91UekM/nVXlTad/6z4NMlWqVJHBgwdLhw4dZOXKlW5N9uzZZdWqVeZVW2a6du1qgpD3fXV0+vjq1atN601oaKiZSv7xxx8zfRwAAGTsmBwNMR6P557tOsh327ZtD3y/zr5as2bNfWv0zsn79u37RecJAADsxbOrAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICV0j3kjB49WgICAnyWKlWquPtv3rwp/fr1kyJFishTTz0lHTp0kIsXL/oc49y5c9K2bVvJkyePFC9eXIYMGSK3b9/2qdm6davUrVtXgoKCpGLFirJgwYL0vhQAAJCFZUhLTvXq1eXChQvusmPHDnffoEGDZOXKlbJs2TLZtm2bfP/99/LKK6+4+5OTk03ASUpKkujoaFm4cKEJMCNHjnRrYmNjTU2LFi1k//79MnDgQOnVq5esW7cuIy4HAABkQTky5KA5ckhwcPA92+Pj42Xu3LmyePFi+fWvf222zZ8/X6pWrSo7d+6URo0ayfr16+Xo0aOyceNGKVGihNSuXVvGjRsnw4YNM61EgYGBMnv2bClXrpxMmjTJHEPfr0FqypQpEhERkRGXBAAAspgMack5efKkhISESPny5aVLly6m+0nt3btXbt26JeHh4W6tdmU988wzEhMTY9b1tWbNmibgODS4JCQkyJEjR9wa72M4Nc4xAAAA0r0lp2HDhqZ7qXLlyqarasyYMdKkSRM5fPiwxMXFmZaYggUL+rxHA43uU/rqHXCc/c6++9VoELpx44bkzp07xXNLTEw0i0PrAQCAndI95LRp08b9uVatWib0lClTRpYuXZpq+HhcoqKiTOgCAAD2y/Ap5Npq86tf/UpOnTplxunogOIrV6741OjsKmcMj77ePdvKWX9QTf78+e8bpIYPH27GBTnL+fPn0+06AQCAn4Wcq1evyunTp6VkyZJSr149yZkzp2zatMndf+LECTNmJywszKzr66FDh+TSpUtuzYYNG0yAqVatmlvjfQynxjlGanS6uR7HewEAAHZK95Dz3nvvmanhZ8+eNVPAf/vb30r27NmlU6dOUqBAAenZs6dERkbKli1bzEDkN954w4QTnVmlWrVqZcLM66+/LgcOHDDTwkeMGGHuraMhRfXt21fOnDkjQ4cOlePHj8vMmTNNd5hOTwcAAMiQMTnfffedCTQ//fSTFCtWTJ5//nkzPVx/VjrNO1u2bOYmgDoIWGdFaUhxaCBatWqVvP322yb85M2bV7p37y5jx451a3T6+OrVq02omTZtmpQqVUo+/vhjpo8DAABXgMfj8Yif0tlV2rqk43P8reuq7PurM/sU8BidHd82s08BjxG/3/7FH3+/Ex7y7zfPrgIAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAK6V7yImKipJnn31W8uXLJ8WLF5f27dvLiRMnfGqaN28uAQEBPkvfvn19as6dOydt27aVPHnymOMMGTJEbt++7VOzdetWqVu3rgQFBUnFihVlwYIF6X05AAAgi0r3kLNt2zbp16+f7Ny5UzZs2CC3bt2SVq1aybVr13zqevfuLRcuXHCXCRMmuPuSk5NNwElKSpLo6GhZuHChCTAjR450a2JjY01NixYtZP/+/TJw4EDp1auXrFu3Lr0vCQAAZEE50vuAa9eu9VnXcKItMXv37pWmTZu627WFJjg4OMVjrF+/Xo4ePSobN26UEiVKSO3atWXcuHEybNgwGT16tAQGBsrs2bOlXLlyMmnSJPOeqlWryo4dO2TKlCkSERGR3pcFAACymAwfkxMfH29eCxcu7LN90aJFUrRoUalRo4YMHz5crl+/7u6LiYmRmjVrmoDj0OCSkJAgR44ccWvCw8N9jqk1uj01iYmJ5hjeCwAAsFO6t+R4u3PnjulGeu6550yYcXTu3FnKlCkjISEhcvDgQdNCo+N2Pv/8c7M/Li7OJ+AoZ1333a9Gg8uNGzckd+7cKY4XGjNmTIZcKwAA8KOQo2NzDh8+bLqRvPXp08f9WVtsSpYsKS1btpTTp09LhQoVMux8tMUoMjLSXddAVLp06Qz7PAAAYGF3Vf/+/WXVqlWyZcsWKVWq1H1rGzZsaF5PnTplXnWszsWLF31qnHVnHE9qNfnz50+xFUfpLCzd770AAAA7pXvI8Xg8JuB88cUXsnnzZjM4+EF0dpTSFh0VFhYmhw4dkkuXLrk1OlNLQ0m1atXcmk2bNvkcR2t0OwAAQLaM6KL65JNPZPHixeZeOTp2RhcdJ6O0S0pnSulsq7Nnz8qKFSukW7duZuZVrVq1TI1OOdcw8/rrr8uBAwfMtPARI0aYY2trjNL76pw5c0aGDh0qx48fl5kzZ8rSpUtl0KBB6X1JAAAgC0r3kDNr1iwzo0pv+KctM86yZMkSs1+nf+vUcA0yVapUkcGDB0uHDh1k5cqV7jGyZ89uurr0VVtmunbtaoLQ2LFj3RptIVq9erVpvQkNDTVTyT/++GOmjwMAgIwZeKzdVfejA331hoEPorOv1qxZc98aDVL79u1L8zkCAAD78ewqAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGClLB9yZsyYIWXLlpVcuXJJw4YNZdeuXZl9SgAA4AmQpUPOkiVLJDIyUkaNGiXffvuthIaGSkREhFy6dCmzTw0AAGSyLB1yJk+eLL1795Y33nhDqlWrJrNnz5Y8efLIvHnzMvvUAABAJsuyIScpKUn27t0r4eHh7rZs2bKZ9ZiYmEw9NwAAkPlySBb1448/SnJyspQoUcJnu64fP348xfckJiaaxREfH29eExISxN/cSbye2aeAx8gf/z/uz/j99i/++Pud8P+v2ePx2BlyHkVUVJSMGTPmnu2lS5fOlPMBHpcCUzP7DABkFH/+/f7Xv/4lBQoUsC/kFC1aVLJnzy4XL1702a7rwcHBKb5n+PDhZqCy486dO/Lzzz9LkSJFJCAgIMPPGZmf/DXQnj9/XvLnz5/ZpwMgHfH77V88Ho8JOCEhIfety7IhJzAwUOrVqyebNm2S9u3bu6FF1/v375/ie4KCgszirWDBgo/lfPHk0H8B8i9BwE78fvuPAvdpwcnyIUdpq0z37t2lfv360qBBA5k6dapcu3bNzLYCAAD+LUuHnI4dO8oPP/wgI0eOlLi4OKldu7asXbv2nsHIAADA/2TpkKO0ayq17inAm3ZV6o0j7+6yBJD18fuNlAR4HjT/CgAAIAvKsjcDBAAAuB9CDgAAsBIhBwAAWImQAwAArJTlZ1cBAPyLPrtw3rx55mHMevsQpXe6b9y4sfTo0UOKFSuW2aeIJwSzqwAAWcbu3bslIiJC8uTJI+Hh4e590fSRPnrH++vXr8u6devMTWIBQg78kj7fRu+pof81CCDraNSokYSGhsrs2bPveeag/jnr27evHDx40LTyAIQc+KUDBw5I3bp1JTk5ObNPBUAa5M6dW/bt2ydVqlRJcf/x48elTp06cuPGjcd+bnjyMCYHVlqxYsV99585c+axnQuA9KNjb3bt2pVqyNF9PNoHDkIOrKRPptem7Ps1VN7d1A3gyffee+9Jnz59ZO/evdKyZct7xuT85S9/kQ8//DCzTxNPCLqrYKWnn35aZs6cKS+//HKK+/fv3y/16tWjuwrIgpYsWSJTpkwxQcf5Hc6ePbv5nY6MjJRXX301s08RTwhCDqz00ksvmafSjx07NtUxOdpvf+fOncd+bgDSx61bt8x0clW0aFHJmTNnZp8SnjB0V8FKQ4YMkWvXrqW6v2LFirJly5bHek4A0peGmpIlS2b2aeAJRksOAACwEo91AAAAViLkAAAAKxFyAACAlQg5AJ5YzZs3l4EDBz5U7datW829j65cufKLPrNs2bIyderUX3QMAE8GQg4AALASIQcAAFiJkAMgS/jrX/8q9evXl3z58pnnF3Xu3FkuXbp0T93XX38ttWrVkly5cpknVh8+fNhn/44dO6RJkybmQY+lS5eWd9999773VAKQdRFyAGSZu9uOGzfO3K16+fLlcvbsWenRo0eKN4KcNGmS7N69W4oVKybt2rUz71WnT5+W1q1bS4cOHeTgwYPm8QAaevr3758JVwQgo3HHYwBZwptvvun+XL58efnTn/4kzz77rFy9elWeeuopd9+oUaPkhRdeMD8vXLhQSpUqJV988YV5nlFUVJR06dLFHcxcqVIlc5xmzZrJrFmzTOsPAHvQkgMgS9CHMWqrzDPPPGO6rDSYqHPnzvnUhYWFuT8XLlxYKleuLMeOHTPr2gq0YMECE4qcJSIiwjzDLDY29jFfEYCMRksOgCeejpnRMKLLokWLTDeUhhtdT0pKeujjaKvPW2+9Zcbh3E3DEwC7EHIAPPGOHz8uP/30k4wfP94MFlZ79uxJsXbnzp1uYLl8+bL84x//kKpVq5r1unXrytGjR80DWgHYj+4qAE88DS2BgYHy0UcfyZkzZ2TFihVmEHJKxo4dK5s2bTKzqnRgctGiRaV9+/Zm37BhwyQ6OtoMNN6/f7+cPHlSvvzySwYeA5Yi5AB44mn3lI6lWbZsmVSrVs206Hz44Ycp1uq+AQMGSL169SQuLk5WrlxpApLSqeXbtm0zrTs6jbxOnToycuRICQkJecxXBOBxCPB4PJ7H8kkAAACPES05AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAIiN/h/1p9nWmA5w/gAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df.info()\n", + "df['label'].value_counts().plot(kind='bar', title='Class Balance')\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "e3c79fc6", + "metadata": {}, + "source": [ + "# 7. Vectorization + Model Training" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "5741a809", + "metadata": {}, + "outputs": [], + "source": [ + "# vectorizer = TfidfVectorizer(max_features=5000)\n", + "vectorizer = TfidfVectorizer()\n", + "\n", + "X_train_vec = vectorizer.fit_transform(X_train) # transform the training data\n", + "\n", + "X_test_vec = vectorizer.transform(X_test) # transform only\n", + "\n", + "model = LogisticRegression()\n", + "\n", + "model.fit(X_train_vec, y_train) # train the model using the training data\n", + "\n", + "y_pred = model.predict(X_test_vec)\n" + ] + }, + { + "cell_type": "markdown", + "id": "9706ead4", + "metadata": {}, + "source": [ + "# 8. Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5b52270", + "metadata": {}, + "outputs": [], + "source": [ + "print(classification_report(y_test, y_pred))\n", + "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "0555f1bf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9896106398535254" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import f1_score\n", + "f1_score(y_test, y_pred, average='weighted')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "c49ac4a6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "✅ Correct Predictions Sample:\n", + "11181 White House confident Supreme Court will uphol...\n", + "17721 Illinois man charged with kidnapping, death of...\n", + "34887 NOT KIDDING: Students Are Given Counseling Aft...\n", + "26367 WATCH: Robert De Niro Makes Sure To Perfectly...\n", + "11339 Venezuelans scramble to survive as merchants d...\n", + "dtype: object\n", + "❌ Incorrect Predictions Sample:\n", + "36134 N. KOREA WARNS It Will Strike U.S. Bases In S....\n", + "35791 GOTCHA! CLINTON CHARITY “MISFILED” MILLIONS IN...\n", + "2133 Instant View: Reaction to disbanding of Trump ...\n", + "10261 Trump's 'obsession' with anchorwoman Kelly is ...\n", + "37012 CANADA’S OBAMA? WATCH New Prime Minister Call ...\n", + "dtype: object\n" + ] + } + ], + "source": [ + "# Show correctly predicted examples\n", + "correct = X_test[y_test == y_pred]\n", + "print(\"✅ Correct Predictions Sample:\")\n", + "print(correct.sample(5))\n", + "\n", + "# Show incorrect predictions\n", + "incorrect = X_test[y_test != y_pred]\n", + "print(\"❌ Incorrect Predictions Sample:\")\n", + "print(incorrect.sample(5))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "033beb70", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🔍 Top predictive words: [(19.84848710357449, 'reuters'), (18.659089790349725, 'said'), (8.91313089066927, 'on'), (6.749188523164079, 'washington'), (5.338842565654789, 'in'), (4.882292853983793, 'republican'), (4.379473297016656, 'wednesday'), (4.0831862881709515, 'thursday'), (4.007826342292402, 'tuesday'), (3.8843498360894237, 'presidential'), (3.4691150007402456, 'had'), (3.4679091837900216, 'friday'), (3.446936293565944, 'minister'), (3.424262556326454, 'edt'), (3.3858258917952204, 'nov'), (3.307878382165057, 'reporters'), (3.24999287779387, 'told'), (3.2313675932622723, 'democratic'), (3.213999961333914, 'its'), (3.144683560067598, 'monday')]\n" + ] + } + ], + "source": [ + "feature_names = vectorizer.get_feature_names_out()\n", + "coefficients = model.coef_[0]\n", + "top_features = sorted(zip(coefficients, feature_names), reverse=True)[:20]\n", + "print(\"🔍 Top predictive words:\", top_features)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "d141d50c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "F1-score: 0.9896106398535254\n" + ] + } + ], + "source": [ + "from sklearn.metrics import f1_score\n", + "print(\"F1-score:\", f1_score(y_test, y_pred, average='weighted'))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "84c99dc5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🔝 Words pushing prediction to 1: ['presidential' 'tuesday' 'thursday' 'wednesday' 'republican' 'in'\n", + " 'washington' 'on' 'said' 'reuters']\n", + "🔻 Words pushing prediction to 0: ['video' 'via' 'this' 'just' 'hillary' 'gop' 'is' 'that' 'com' 'image']\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "feature_names = vectorizer.get_feature_names_out()\n", + "coefficients = model.coef_[0]\n", + "top_positive = np.argsort(coefficients)[-10:]\n", + "top_negative = np.argsort(coefficients)[:10]\n", + "\n", + "print(\"🔝 Words pushing prediction to 1:\", feature_names[top_positive])\n", + "print(\"🔻 Words pushing prediction to 0:\", feature_names[top_negative])\n" + ] + }, + { + "cell_type": "markdown", + "id": "e050b32c", + "metadata": {}, + "source": [ + "# 9. Predict Validation Set" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "d4f99cf7", + "metadata": {}, + "outputs": [], + "source": [ + "val_df = pd.read_csv(\"dataset/validation_data.csv\")\n", + "val_df.head()\n", + "# print(val_df['label'].unique())\n", + "\n", + "val_df['text_clean'] = (val_df['title'] + \" \" + val_df['text']).apply(clean_text)\n", + "X_val_vec = vectorizer.transform(val_df['text_clean'])\n", + "val_df['label'] = model.predict(X_val_vec)\n", + "\n", + "# Save predictions\n", + "val_df.reset_index(inplace=True)\n", + "val_df[['index', 'label']].to_csv(\"predictions.csv\", index=False)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/fn.ipynb b/fn.ipynb new file mode 100644 index 0000000..e69de29 diff --git a/predictions.csv b/predictions.csv index 9328673..6861553 100644 --- a/predictions.csv +++ b/predictions.csv @@ -495,7 +495,7 @@ index,label 493,1 494,1 495,1 -496,1 +496,0 497,1 498,1 499,1 @@ -524,7 +524,7 @@ index,label 522,1 523,1 524,1 -525,0 +525,1 526,1 527,1 528,1 @@ -533,7 +533,7 @@ index,label 531,1 532,1 533,1 -534,1 +534,0 535,1 536,1 537,1 @@ -586,7 +586,7 @@ index,label 584,1 585,1 586,1 -587,0 +587,1 588,1 589,1 590,1 @@ -1343,7 +1343,7 @@ index,label 1341,1 1342,1 1343,0 -1344,1 +1344,0 1345,1 1346,1 1347,1 @@ -1400,7 +1400,7 @@ index,label 1398,1 1399,1 1400,1 -1401,1 +1401,0 1402,1 1403,1 1404,1 @@ -1802,7 +1802,7 @@ index,label 1800,0 1801,0 1802,0 -1803,0 +1803,1 1804,0 1805,0 1806,0 @@ -1843,7 +1843,7 @@ index,label 1841,0 1842,0 1843,1 -1844,1 +1844,0 1845,0 1846,0 1847,0 @@ -1904,10 +1904,10 @@ index,label 1902,0 1903,0 1904,0 -1905,1 +1905,0 1906,0 1907,0 -1908,1 +1908,0 1909,0 1910,0 1911,0 @@ -1929,7 +1929,7 @@ index,label 1927,0 1928,0 1929,0 -1930,0 +1930,1 1931,0 1932,0 1933,0 @@ -1943,7 +1943,7 @@ index,label 1941,0 1942,0 1943,0 -1944,0 +1944,1 1945,0 1946,0 1947,0 @@ -1963,7 +1963,7 @@ index,label 1961,0 1962,0 1963,0 -1964,0 +1964,1 1965,0 1966,0 1967,0 @@ -2355,7 +2355,7 @@ index,label 2353,0 2354,0 2355,1 -2356,0 +2356,1 2357,0 2358,0 2359,0 @@ -2406,7 +2406,7 @@ index,label 2404,0 2405,0 2406,0 -2407,0 +2407,1 2408,0 2409,0 2410,0 @@ -2539,7 +2539,7 @@ index,label 2537,0 2538,0 2539,0 -2540,0 +2540,1 2541,0 2542,0 2543,0 @@ -2597,7 +2597,7 @@ index,label 2595,1 2596,0 2597,0 -2598,1 +2598,0 2599,0 2600,0 2601,0 @@ -2630,12 +2630,12 @@ index,label 2628,0 2629,0 2630,0 -2631,1 +2631,0 2632,0 2633,0 2634,0 2635,0 -2636,1 +2636,0 2637,0 2638,0 2639,0 @@ -2785,7 +2785,7 @@ index,label 2783,0 2784,0 2785,0 -2786,1 +2786,0 2787,0 2788,0 2789,0 @@ -3012,7 +3012,7 @@ index,label 3010,0 3011,0 3012,0 -3013,1 +3013,0 3014,0 3015,0 3016,0 @@ -3028,7 +3028,7 @@ index,label 3026,0 3027,0 3028,0 -3029,0 +3029,1 3030,0 3031,0 3032,0 @@ -3041,9 +3041,9 @@ index,label 3039,0 3040,0 3041,0 -3042,0 +3042,1 3043,0 -3044,0 +3044,1 3045,0 3046,0 3047,0 @@ -3145,7 +3145,7 @@ index,label 3143,0 3144,0 3145,0 -3146,1 +3146,0 3147,0 3148,0 3149,0 @@ -3224,7 +3224,7 @@ index,label 3222,0 3223,0 3224,0 -3225,1 +3225,0 3226,0 3227,0 3228,0 @@ -3235,13 +3235,13 @@ index,label 3233,0 3234,0 3235,0 -3236,1 +3236,0 3237,0 3238,0 3239,0 3240,0 3241,0 -3242,0 +3242,1 3243,0 3244,0 3245,0 @@ -3293,7 +3293,7 @@ index,label 3291,0 3292,0 3293,0 -3294,1 +3294,0 3295,0 3296,0 3297,0 @@ -3468,7 +3468,7 @@ index,label 3466,0 3467,0 3468,0 -3469,1 +3469,0 3470,0 3471,0 3472,0 @@ -3514,7 +3514,7 @@ index,label 3512,0 3513,0 3514,0 -3515,1 +3515,0 3516,0 3517,0 3518,0 @@ -3562,16 +3562,16 @@ index,label 3560,0 3561,1 3562,0 -3563,1 +3563,0 3564,0 -3565,1 +3565,0 3566,0 3567,1 3568,0 3569,0 3570,0 3571,0 -3572,1 +3572,0 3573,0 3574,0 3575,0 @@ -3592,7 +3592,7 @@ index,label 3590,0 3591,0 3592,0 -3593,0 +3593,1 3594,0 3595,0 3596,0 @@ -3603,7 +3603,7 @@ index,label 3601,0 3602,0 3603,0 -3604,1 +3604,0 3605,0 3606,1 3607,0 @@ -3638,7 +3638,7 @@ index,label 3636,0 3637,0 3638,0 -3639,1 +3639,0 3640,0 3641,0 3642,0 @@ -3650,11 +3650,11 @@ index,label 3648,0 3649,0 3650,0 -3651,1 +3651,0 3652,0 3653,0 3654,0 -3655,1 +3655,0 3656,0 3657,0 3658,0 @@ -3688,7 +3688,7 @@ index,label 3686,0 3687,0 3688,0 -3689,0 +3689,1 3690,0 3691,0 3692,0 @@ -3705,7 +3705,7 @@ index,label 3703,0 3704,0 3705,0 -3706,1 +3706,0 3707,0 3708,1 3709,1 @@ -3722,10 +3722,10 @@ index,label 3720,0 3721,0 3722,0 -3723,1 +3723,0 3724,0 3725,0 -3726,1 +3726,0 3727,0 3728,0 3729,0 @@ -3741,7 +3741,7 @@ index,label 3739,0 3740,0 3741,0 -3742,1 +3742,0 3743,0 3744,0 3745,0 @@ -3779,7 +3779,7 @@ index,label 3777,0 3778,0 3779,0 -3780,0 +3780,1 3781,0 3782,0 3783,0 @@ -3793,7 +3793,7 @@ index,label 3791,0 3792,0 3793,0 -3794,1 +3794,0 3795,0 3796,0 3797,0 @@ -3803,7 +3803,7 @@ index,label 3801,1 3802,0 3803,0 -3804,1 +3804,0 3805,0 3806,0 3807,0 @@ -3832,7 +3832,7 @@ index,label 3830,0 3831,0 3832,0 -3833,1 +3833,0 3834,0 3835,0 3836,0 @@ -3866,7 +3866,7 @@ index,label 3864,0 3865,0 3866,0 -3867,1 +3867,0 3868,0 3869,0 3870,0 @@ -3881,7 +3881,7 @@ index,label 3879,0 3880,0 3881,0 -3882,0 +3882,1 3883,0 3884,0 3885,0 @@ -3901,7 +3901,7 @@ index,label 3899,0 3900,0 3901,0 -3902,0 +3902,1 3903,0 3904,0 3905,0 @@ -3930,7 +3930,7 @@ index,label 3928,0 3929,0 3930,0 -3931,1 +3931,0 3932,0 3933,0 3934,0 @@ -3941,7 +3941,7 @@ index,label 3939,0 3940,0 3941,0 -3942,1 +3942,0 3943,0 3944,1 3945,0 @@ -3949,7 +3949,7 @@ index,label 3947,0 3948,0 3949,0 -3950,1 +3950,0 3951,0 3952,0 3953,0 @@ -3961,7 +3961,7 @@ index,label 3959,0 3960,0 3961,0 -3962,1 +3962,0 3963,0 3964,0 3965,0 @@ -3993,7 +3993,7 @@ index,label 3991,0 3992,0 3993,0 -3994,1 +3994,0 3995,0 3996,0 3997,0 @@ -4004,7 +4004,7 @@ index,label 4002,0 4003,0 4004,0 -4005,1 +4005,0 4006,0 4007,0 4008,0 @@ -4071,7 +4071,7 @@ index,label 4069,0 4070,0 4071,0 -4072,1 +4072,0 4073,0 4074,0 4075,0 @@ -4122,7 +4122,7 @@ index,label 4120,0 4121,0 4122,0 -4123,1 +4123,0 4124,0 4125,0 4126,0 @@ -4130,7 +4130,7 @@ index,label 4128,0 4129,0 4130,0 -4131,0 +4131,1 4132,0 4133,0 4134,0 @@ -4156,13 +4156,13 @@ index,label 4154,0 4155,0 4156,0 -4157,1 +4157,0 4158,0 4159,0 4160,0 4161,0 4162,0 -4163,1 +4163,0 4164,0 4165,0 4166,0 @@ -4251,7 +4251,7 @@ index,label 4249,0 4250,0 4251,0 -4252,1 +4252,0 4253,0 4254,0 4255,0 @@ -4297,7 +4297,7 @@ index,label 4295,0 4296,0 4297,0 -4298,1 +4298,0 4299,0 4300,0 4301,0 @@ -4345,16 +4345,16 @@ index,label 4343,0 4344,1 4345,0 -4346,1 +4346,0 4347,0 -4348,1 +4348,0 4349,0 4350,1 4351,0 4352,0 4353,0 4354,0 -4355,1 +4355,0 4356,0 4357,0 4358,0 @@ -4375,7 +4375,7 @@ index,label 4373,0 4374,0 4375,0 -4376,0 +4376,1 4377,0 4378,0 4379,0 @@ -4386,7 +4386,7 @@ index,label 4384,0 4385,0 4386,0 -4387,1 +4387,0 4388,0 4389,1 4390,0 @@ -4421,7 +4421,7 @@ index,label 4419,0 4420,0 4421,0 -4422,1 +4422,0 4423,0 4424,0 4425,0 @@ -4433,11 +4433,11 @@ index,label 4431,0 4432,0 4433,0 -4434,1 +4434,0 4435,0 4436,0 4437,0 -4438,1 +4438,0 4439,0 4440,0 4441,0 @@ -4471,7 +4471,7 @@ index,label 4469,0 4470,0 4471,0 -4472,0 +4472,1 4473,0 4474,0 4475,0 @@ -4488,7 +4488,7 @@ index,label 4486,0 4487,0 4488,0 -4489,1 +4489,0 4490,0 4491,1 4492,1 @@ -4505,10 +4505,10 @@ index,label 4503,0 4504,0 4505,0 -4506,1 +4506,0 4507,0 4508,0 -4509,1 +4509,0 4510,0 4511,0 4512,0 @@ -4524,7 +4524,7 @@ index,label 4522,0 4523,0 4524,0 -4525,1 +4525,0 4526,0 4527,0 4528,0 @@ -4562,7 +4562,7 @@ index,label 4560,0 4561,0 4562,0 -4563,0 +4563,1 4564,0 4565,0 4566,0 @@ -4576,7 +4576,7 @@ index,label 4574,0 4575,0 4576,0 -4577,1 +4577,0 4578,0 4579,0 4580,0 @@ -4586,7 +4586,7 @@ index,label 4584,1 4585,0 4586,0 -4587,1 +4587,0 4588,0 4589,0 4590,0 @@ -4615,7 +4615,7 @@ index,label 4613,0 4614,0 4615,0 -4616,1 +4616,0 4617,0 4618,0 4619,0 @@ -4649,7 +4649,7 @@ index,label 4647,0 4648,0 4649,0 -4650,1 +4650,0 4651,0 4652,0 4653,0 @@ -4664,7 +4664,7 @@ index,label 4662,0 4663,0 4664,0 -4665,0 +4665,1 4666,0 4667,0 4668,0 @@ -4684,7 +4684,7 @@ index,label 4682,0 4683,0 4684,0 -4685,0 +4685,1 4686,0 4687,0 4688,0 @@ -4713,7 +4713,7 @@ index,label 4711,0 4712,0 4713,0 -4714,1 +4714,0 4715,0 4716,0 4717,0 @@ -4724,7 +4724,7 @@ index,label 4722,0 4723,0 4724,0 -4725,1 +4725,0 4726,0 4727,1 4728,0 @@ -4732,7 +4732,7 @@ index,label 4730,0 4731,0 4732,0 -4733,1 +4733,0 4734,0 4735,0 4736,0 @@ -4744,7 +4744,7 @@ index,label 4742,0 4743,0 4744,0 -4745,1 +4745,0 4746,0 4747,0 4748,0 @@ -4776,7 +4776,7 @@ index,label 4774,0 4775,0 4776,0 -4777,1 +4777,0 4778,0 4779,0 4780,0 @@ -4787,7 +4787,7 @@ index,label 4785,0 4786,0 4787,0 -4788,1 +4788,0 4789,0 4790,0 4791,0 @@ -4854,7 +4854,7 @@ index,label 4852,0 4853,0 4854,0 -4855,1 +4855,0 4856,0 4857,0 4858,0 @@ -4905,7 +4905,7 @@ index,label 4903,0 4904,0 4905,0 -4906,1 +4906,0 4907,0 4908,0 4909,0 @@ -4913,7 +4913,7 @@ index,label 4911,0 4912,0 4913,0 -4914,0 +4914,1 4915,0 4916,0 4917,0 @@ -4939,13 +4939,13 @@ index,label 4937,0 4938,0 4939,0 -4940,1 +4940,0 4941,0 4942,0 4943,0 4944,0 4945,0 -4946,1 +4946,0 4947,0 4948,0 4949,0 From 00bf177fa81d6446c9045471ab84fc4f9bd39fd2 Mon Sep 17 00:00:00 2001 From: Luis <56727115+luistatera@users.noreply.github.com> Date: Sun, 25 May 2025 22:54:11 +0200 Subject: [PATCH 4/4] Implement text classification pipeline with data preprocessing, model training, and evaluation - Added TextPreprocessor class for text cleaning and lemmatization - Integrated TfidfVectorizer and LogisticRegression into a scikit-learn Pipeline - Implemented caching mechanism for cleaned training and test data - Evaluated model performance on training and test sets with classification reports and confusion matrices - Added baseline evaluation using DummyClassifier - Processed validation data and generated predictions - Saved validation predictions to CSV and entire pipeline to a pickle file --- 1.fake-news-LogisticRegression.ipynb | 863 +++++ 1.fake-news-RandonForest copy.ipynb | 861 +++++ 1.fake-news-XGBoost.ipynb | 859 +++++ 2nd-jn-fake-news.ipynb | 637 ---- fn.ipynb | 0 jn-fake-news.ipynb | 252 -- main.py | 182 + predictions.csv | 4957 -------------------------- 8 files changed, 2765 insertions(+), 5846 deletions(-) create mode 100644 1.fake-news-LogisticRegression.ipynb create mode 100644 1.fake-news-RandonForest copy.ipynb create mode 100644 1.fake-news-XGBoost.ipynb delete mode 100644 2nd-jn-fake-news.ipynb delete mode 100644 fn.ipynb delete mode 100644 jn-fake-news.ipynb create mode 100644 main.py delete mode 100644 predictions.csv diff --git a/1.fake-news-LogisticRegression.ipynb b/1.fake-news-LogisticRegression.ipynb new file mode 100644 index 0000000..d2326aa --- /dev/null +++ b/1.fake-news-LogisticRegression.ipynb @@ -0,0 +1,863 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "id": "4dc82578", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "1ff89aef", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"dataset/data.csv\")\n", + "\n", + "# remove empty rows\n", + "df = df[df['title'] != '']\n", + "df = df[df['text'] != '']" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "4763a7f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Remove duplicate rows based on the 'text' column\n", + "df = df.drop_duplicates(subset=['text']) \n", + "\n", + "# Remove rows with 'text' is NaN\n", + "df = df.dropna(subset=['text']) \n", + "\n", + "# Remove rows with 'label' is NaN\n", + "df = df.dropna(subset=['label']) \n", + "\n", + "# Remove rows with 'text' empty or only with whitespace\n", + "df = df[df['text'].str.strip() != ''] " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "41340b02", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to\n", + "[nltk_data] /Users/luis.guimaraes/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n" + ] + } + ], + "source": [ + "import re\n", + "import string\n", + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.stem import WordNetLemmatizer\n", + "\n", + "# Download required NLTK data if not already downloaded\n", + "nltk.download('wordnet')\n", + "\n", + "def clean_text(text):\n", + " text = text.lower()\n", + " text = re.sub(r'\\[.*?\\]', '', text)\n", + " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", + " text = re.sub(r'<.*?>+', '', text)\n", + " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", + " text = re.sub(r'\\n', '', text)\n", + " text = re.sub(r'\\w*\\d\\w*', '', text)\n", + " \n", + " # Tokenize the text\n", + " tokens = word_tokenize(text)\n", + " \n", + " # Initialize Lemmatizer\n", + " lemmatizer = WordNetLemmatizer()\n", + " \n", + " # Lemmatize each token\n", + " lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]\n", + " \n", + " # Join tokens back into a string\n", + " text = ' '.join(lemmatized_tokens)\n", + " return text\n", + "\n", + "df['text_clean'] = df['title'] + \" \" + df['text']\n", + "df['text_clean'] = df['text_clean'].apply(clean_text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "296fe39d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Split the data into training and testing sets\n", + "X = df['text_clean']\n", + "y = df['label']\n", + "\n", + "# train\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "a64b0b16", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "vectorizer = TfidfVectorizer(\n", + " max_features=8000, # limit the number of features\n", + " stop_words='english',\n", + " min_df=5, # ignore rare words\n", + " max_df=0.8 # ignore overly common words\n", + ")\n", + "\n", + "# Fit only on training, transform both\n", + "X_train_vec = vectorizer.fit_transform(X_train)\n", + "X_test_vec = vectorizer.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c381617c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
LogisticRegression(C=0.5, class_weight='balanced', max_iter=1000, penalty='l1',\n",
+       "                   random_state=42, solver='saga')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "LogisticRegression(C=0.5, class_weight='balanced', max_iter=1000, penalty='l1',\n", + " random_state=42, solver='saga')" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = LogisticRegression(\n", + " class_weight='balanced',\n", + " solver='saga', # Algorithm to use in the optimization problem\n", + " penalty='l1', # Specify the norm of the penalty\n", + " C=0.5, # Inverse of regularization strength; smaller values specify stronger regularization\n", + " max_iter=1000, # Maximum number of iterations taken for the solvers to converge\n", + " random_state=42 # For reproducibility\n", + ")\n", + "\n", + "model.fit(X_train_vec, y_train)\n", + "\n", + "\n", + "#model = LogisticRegression(class_weight='balanced')\n", + "#model.fit(X_train_vec, y_train)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "484d6523", + "metadata": {}, + "outputs": [], + "source": [ + "# Predict on the test set\n", + "y_pred = model.predict(X_test_vec)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5f194883", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tfidf_vectorizer.pkl']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import joblib\n", + "\n", + "# Save model\n", + "joblib.dump(model, 'logistic_model.pkl')\n", + "\n", + "# Save TF-IDF vectorizer\n", + "joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2ef43d5f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.99 0.99 0.99 3241\n", + " 1 0.99 0.99 0.99 3954\n", + "\n", + " accuracy 0.99 7195\n", + " macro avg 0.99 0.99 0.99 7195\n", + "weighted avg 0.99 0.99 0.99 7195\n", + "\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Classification report and confusion matrix\n", + "print(classification_report(y_test, y_pred))\n", + "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n", + "plt.title(\"Confusion Matrix\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "ab90e014", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Baseline accuracy: 0.54954829742877\n" + ] + } + ], + "source": [ + "from sklearn.dummy import DummyClassifier\n", + "dummy = DummyClassifier(strategy=\"most_frequent\")\n", + "dummy.fit(X_train_vec, y_train)\n", + "print(\"Baseline accuracy:\", dummy.score(X_test_vec, y_test))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "a787659a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probabilities: [[0.99015316 0.00984684]]\n" + ] + } + ], + "source": [ + "sample_text = [\"This is a sample news article.\"] # Replace with your sample text\n", + "sample_vec = vectorizer.transform(sample_text)\n", + "\n", + "probs = model.predict_proba(sample_vec)\n", + "print(\"Probabilities:\", probs)\n" + ] + }, + { + "cell_type": "markdown", + "id": "37fa4daf", + "metadata": {}, + "source": [ + "# Load validation_data.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "e984f239", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to\n", + "[nltk_data] /Users/luis.guimaraes/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + } + ], + "source": [ + "\n", + "from nltk.stem import PorterStemmer\n", + "from nltk.tokenize import word_tokenize\n", + "nltk.download('punkt')\n", + "\n", + "# Define stemmer\n", + "stemmer = PorterStemmer()\n", + "\n", + "# Define stem_text function\n", + "def stem_text(text):\n", + "\tif isinstance(text, str):\n", + "\t\t# Tokenize the text\n", + "\t\ttokens = word_tokenize(text.lower())\n", + "\t\t# Apply stemming\n", + "\t\tstemmed_tokens = [stemmer.stem(token) for token in tokens]\n", + "\t\t# Join tokens back into a string\n", + "\t\treturn ' '.join(stemmed_tokens)\n", + "\treturn ''\n", + "\n", + "# Load validation data and prepare it for prediction\n", + "validation_df = pd.read_csv(\"dataset/validation_data.csv\")\n", + "\n", + "# Clean NaNs before applying\n", + "validation_df['title'] = validation_df['title'].fillna('')\n", + "validation_df['text'] = validation_df['text'].fillna('')\n", + "\n", + "# Apply stemming\n", + "validation_df['title'] = validation_df['title'].apply(stem_text)\n", + "validation_df['text'] = validation_df['text'].apply(stem_text)\n", + "\n", + "# Combine title and text\n", + "validation_df['text_clean'] = (validation_df['title'] + ' ' + validation_df['text']).str.strip()\n", + "validation_df = validation_df[validation_df['text_clean'] != '']\n", + "\n", + "# Prepare features (ignore label column as instructed)\n", + "X_val = validation_df['text_clean']\n", + "\n", + "# Transform using the same vectorizer used for training\n", + "X_val_vec = vectorizer.transform(X_val)\n", + "\n", + "# Get predictions (0 or 1)\n", + "predictions = model.predict(X_val_vec)\n", + "\n", + "# Add predictions to the validation dataframe\n", + "validation_df['predicted_label'] = predictions\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "217a1684", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First few predictions:\n", + " title predicted_label\n", + "0 uk 's may 'receiv regular updat ' on london tu... 0\n", + "1 uk transport polic lead investig of london inc... 0\n", + "2 pacif nation crack down on north korean ship a... 0\n", + "3 three suspect al qaeda milit kill in yemen dro... 0\n", + "4 chines academ prod beij to consid north korea ... 1\n", + "\n", + "Prediction counts:\n", + "predicted_label\n", + "0 4668\n", + "1 288\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# Display the first few predictions\n", + "print(\"First few predictions:\")\n", + "print(validation_df[['title', 'predicted_label']].head())\n", + "\n", + "validation_df[['title', 'predicted_label']].to_csv('validation_predictions.csv', index=False)\n", + "\n", + "# Create a copy with index as id\n", + "#result_df = pd.DataFrame({\n", + "# 'id': validation_df.index,\n", + "# 'predicted_label': validation_df['predicted_label']\n", + "#})\n", + "\n", + "validation_df.to_csv('validation_predictions.csv', index=False)\n", + "\n", + "# Count of each prediction class\n", + "print(\"\\nPrediction counts:\")\n", + "print(validation_df['predicted_label'].value_counts())\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.10.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/1.fake-news-RandonForest copy.ipynb b/1.fake-news-RandonForest copy.ipynb new file mode 100644 index 0000000..da31f09 --- /dev/null +++ b/1.fake-news-RandonForest copy.ipynb @@ -0,0 +1,861 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "4dc82578", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1ff89aef", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"dataset/data.csv\")\n", + "\n", + "# remove empty rows\n", + "df = df[df['title'] != '']\n", + "df = df[df['text'] != '']" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4763a7f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Remove duplicate rows based on the 'text' column\n", + "df = df.drop_duplicates(subset=['text']) \n", + "\n", + "# Remove rows with 'text' is NaN\n", + "df = df.dropna(subset=['text']) \n", + "\n", + "# Remove rows with 'label' is NaN\n", + "df = df.dropna(subset=['label']) \n", + "\n", + "# Remove rows with 'text' empty or only with whitespace\n", + "df = df[df['text'].str.strip() != ''] " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "41340b02", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to\n", + "[nltk_data] /Users/luis.guimaraes/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n", + "[nltk_data] Downloading package punkt to\n", + "[nltk_data] /Users/luis.guimaraes/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + } + ], + "source": [ + "import re\n", + "import string\n", + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.stem import WordNetLemmatizer\n", + "\n", + "# Download required NLTK data if not already downloaded\n", + "nltk.download('wordnet')\n", + "nltk.download('punkt')\n", + "\n", + "def clean_text(text):\n", + " text = text.lower()\n", + " text = re.sub(r'\\[.*?\\]', '', text)\n", + " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", + " text = re.sub(r'<.*?>+', '', text)\n", + " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", + " text = re.sub(r'\\n', '', text)\n", + " text = re.sub(r'\\w*\\d\\w*', '', text)\n", + " \n", + " # Tokenize the text\n", + " tokens = word_tokenize(text)\n", + " \n", + " # Initialize Lemmatizer\n", + " lemmatizer = WordNetLemmatizer()\n", + " \n", + " # Lemmatize each token\n", + " lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]\n", + " \n", + " # Join tokens back into a string\n", + " text = ' '.join(lemmatized_tokens)\n", + " return text\n", + "\n", + "df['text_clean'] = df['title'] + \" \" + df['text']\n", + "df['text_clean'] = df['text_clean'].apply(clean_text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "296fe39d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Split the data into training and testing sets\n", + "X = df['text_clean']\n", + "y = df['label']\n", + "\n", + "# train\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a64b0b16", + "metadata": {}, + "outputs": [], + "source": [ + "#vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')\n", + "vectorizer = TfidfVectorizer(\n", + " max_features=8000, # limit the number of features\n", + " stop_words='english',\n", + " min_df=5, # ignore rare words\n", + " max_df=0.8 # ignore overly common words\n", + ")\n", + "\n", + "# Fit only on training, transform both\n", + "X_train_vec = vectorizer.fit_transform(X_train)\n", + "X_test_vec = vectorizer.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c381617c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=42)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "model = RandomForestClassifier(\n", + " n_estimators=100, # Number of trees\n", + " max_depth=None, # Maximum depth of trees (None means unlimited)\n", + " min_samples_split=2,\n", + " min_samples_leaf=1,\n", + " class_weight='balanced', # Same as your Logistic Regression\n", + " random_state=42, # For reproducibility\n", + " n_jobs=-1 # Use all available cores\n", + ")\n", + "\n", + "# Train the model (this stays the same)\n", + "model.fit(X_train_vec, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "484d6523", + "metadata": {}, + "outputs": [], + "source": [ + "# Predict on the test set\n", + "y_pred = model.predict(X_test_vec)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5f194883", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tfidf_vectorizer_rf.pkl']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import joblib\n", + "\n", + "# Save model\n", + "joblib.dump(model, 'random_forest_model.pkl')\n", + "\n", + "# Save TF-IDF vectorizer (this stays the same)\n", + "joblib.dump(vectorizer, 'tfidf_vectorizer_rf.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2ef43d5f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 1.00 0.99 0.99 3241\n", + " 1 0.99 1.00 1.00 3954\n", + "\n", + " accuracy 1.00 7195\n", + " macro avg 1.00 1.00 1.00 7195\n", + "weighted avg 1.00 1.00 1.00 7195\n", + "\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Classification report and confusion matrix\n", + "print(classification_report(y_test, y_pred))\n", + "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n", + "plt.title(\"Confusion Matrix\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ab90e014", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Baseline accuracy: 0.54954829742877\n" + ] + } + ], + "source": [ + "from sklearn.dummy import DummyClassifier\n", + "dummy = DummyClassifier(strategy=\"most_frequent\")\n", + "dummy.fit(X_train_vec, y_train)\n", + "print(\"Baseline accuracy:\", dummy.score(X_test_vec, y_test))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a787659a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probabilities: [[0.99 0.01]]\n" + ] + } + ], + "source": [ + "sample_text = [\"This is a sample news article.\"] # Replace with your sample text\n", + "sample_vec = vectorizer.transform(sample_text)\n", + "\n", + "probs = model.predict_proba(sample_vec)\n", + "print(\"Probabilities:\", probs)\n" + ] + }, + { + "cell_type": "markdown", + "id": "37fa4daf", + "metadata": {}, + "source": [ + "# Load validation_data.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "e984f239", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to\n", + "[nltk_data] /Users/luis.guimaraes/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + } + ], + "source": [ + "\n", + "# Import stemmer from nltk\n", + "from nltk.stem import PorterStemmer\n", + "nltk.download('punkt') # Need this for word_tokenize\n", + "\n", + "# Initialize stemmer\n", + "stemmer = PorterStemmer()\n", + "\n", + "# Define stem_text function\n", + "def stem_text(text):\n", + "\tif isinstance(text, str):\n", + "\t\t# Tokenize the text\n", + "\t\ttokens = word_tokenize(text.lower())\n", + "\t\t# Apply stemming\n", + "\t\tstemmed_tokens = [stemmer.stem(token) for token in tokens]\n", + "\t\t# Join tokens back into a string\n", + "\t\treturn ' '.join(stemmed_tokens)\n", + "\treturn ''\n", + "\n", + "# Load validation data and prepare it for prediction\n", + "validation_df = pd.read_csv(\"dataset/validation_data.csv\")\n", + "\n", + "# Clean NaNs before applying\n", + "validation_df['title'] = validation_df['title'].fillna('')\n", + "validation_df['text'] = validation_df['text'].fillna('')\n", + "\n", + "# Apply stemming\n", + "validation_df['title'] = validation_df['title'].apply(stem_text)\n", + "validation_df['text'] = validation_df['text'].apply(stem_text)\n", + "\n", + "# Combine title and text\n", + "validation_df['text_clean'] = (validation_df['title'] + ' ' + validation_df['text']).str.strip()\n", + "validation_df = validation_df[validation_df['text_clean'] != '']\n", + "\n", + "# Prepare features (ignore label column as instructed)\n", + "X_val = validation_df['text_clean']\n", + "\n", + "# Transform using the same vectorizer used for training\n", + "X_val_vec = vectorizer.transform(X_val)\n", + "\n", + "# Get predictions (0 or 1)\n", + "predictions = model.predict(X_val_vec)\n", + "\n", + "# Add predictions to the validation dataframe\n", + "validation_df['predicted_label'] = predictions\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "217a1684", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First few predictions:\n", + " title predicted_label\n", + "0 uk 's may 'receiv regular updat ' on london tu... 1\n", + "1 uk transport polic lead investig of london inc... 0\n", + "2 pacif nation crack down on north korean ship a... 1\n", + "3 three suspect al qaeda milit kill in yemen dro... 1\n", + "4 chines academ prod beij to consid north korea ... 1\n", + "\n", + "Prediction counts:\n", + "predicted_label\n", + "0 4207\n", + "1 749\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# Display the first few predictions\n", + "print(\"First few predictions:\")\n", + "print(validation_df[['title', 'predicted_label']].head())\n", + "\n", + "validation_df[['title', 'predicted_label']].to_csv('validation_predictions.csv', index=False)\n", + "\n", + "# Create a copy with index as id\n", + "#result_df = pd.DataFrame({\n", + "# 'id': validation_df.index,\n", + "# 'predicted_label': validation_df['predicted_label']\n", + "#})\n", + "\n", + "validation_df.to_csv('validation_predictions-rf.csv', index=False)\n", + "\n", + "# Count of each prediction class\n", + "print(\"\\nPrediction counts:\")\n", + "print(validation_df['predicted_label'].value_counts())\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.10.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/1.fake-news-XGBoost.ipynb b/1.fake-news-XGBoost.ipynb new file mode 100644 index 0000000..6781eda --- /dev/null +++ b/1.fake-news-XGBoost.ipynb @@ -0,0 +1,859 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "4dc82578", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.metrics import classification_report, confusion_matrix\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1ff89aef", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"dataset/data.csv\")\n", + "\n", + "# remove empty rows\n", + "df = df[df['title'] != '']\n", + "df = df[df['text'] != '']" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "4763a7f6", + "metadata": {}, + "outputs": [], + "source": [ + "# Remove duplicate rows based on the 'text' column\n", + "df = df.drop_duplicates(subset=['text']) \n", + "\n", + "# Remove rows with 'text' is NaN\n", + "df = df.dropna(subset=['text']) \n", + "\n", + "# Remove rows with 'label' is NaN\n", + "df = df.dropna(subset=['label']) \n", + "\n", + "# Remove rows with 'text' empty or only with whitespace\n", + "df = df[df['text'].str.strip() != ''] " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "41340b02", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to\n", + "[nltk_data] /Users/luis.guimaraes/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n" + ] + } + ], + "source": [ + "import re\n", + "import string\n", + "import nltk\n", + "from nltk.tokenize import word_tokenize\n", + "from nltk.stem import WordNetLemmatizer\n", + "\n", + "# Download required NLTK data if not already downloaded\n", + "nltk.download('wordnet')\n", + "\n", + "def clean_text(text):\n", + " text = text.lower()\n", + " text = re.sub(r'\\[.*?\\]', '', text)\n", + " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", + " text = re.sub(r'<.*?>+', '', text)\n", + " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", + " text = re.sub(r'\\n', '', text)\n", + " text = re.sub(r'\\w*\\d\\w*', '', text)\n", + " \n", + " # Tokenize the text\n", + " tokens = word_tokenize(text)\n", + " \n", + " # Initialize Lemmatizer\n", + " lemmatizer = WordNetLemmatizer()\n", + " \n", + " # Lemmatize each token\n", + " lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]\n", + " \n", + " # Join tokens back into a string\n", + " text = ' '.join(lemmatized_tokens)\n", + " return text\n", + "\n", + "df['text_clean'] = df['title'] + \" \" + df['text']\n", + "df['text_clean'] = df['text_clean'].apply(clean_text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "296fe39d", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Split the data into training and testing sets\n", + "X = df['text_clean']\n", + "y = df['label']\n", + "\n", + "# train\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a64b0b16", + "metadata": {}, + "outputs": [], + "source": [ + "#vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')\n", + "vectorizer = TfidfVectorizer(\n", + " max_features=8000, # limit the number of features\n", + " stop_words='english',\n", + " min_df=5, # ignore rare words\n", + " max_df=0.8 # ignore overly common words\n", + ")\n", + "\n", + "# Fit only on training, transform both\n", + "X_train_vec = vectorizer.fit_transform(X_train)\n", + "X_test_vec = vectorizer.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c381617c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "RandomForestClassifier(class_weight='balanced', n_jobs=-1, random_state=42)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Install XGBoost if needed\n", + "!pip install xgboost\n", + "\n", + "model = RandomForestClassifier(\n", + " n_estimators=100, # Number of trees\n", + " max_depth=None, # Maximum depth of trees (None means unlimited)\n", + " min_samples_split=2,\n", + " min_samples_leaf=1,\n", + " class_weight='balanced', # Same as your Logistic Regression\n", + " random_state=42, # For reproducibility\n", + " n_jobs=-1 # Use all available cores\n", + ")\n", + "\n", + "# Train the model (this stays the same)\n", + "model.fit(X_train_vec, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "484d6523", + "metadata": {}, + "outputs": [], + "source": [ + "# Predict on the test set\n", + "y_pred = model.predict(X_test_vec)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "5f194883", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['tfidf_vectorizer_rf.pkl']" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import joblib\n", + "\n", + "# Save model\n", + "joblib.dump(model, 'random_forest_model.pkl')\n", + "\n", + "# Save TF-IDF vectorizer (this stays the same)\n", + "joblib.dump(vectorizer, 'tfidf_vectorizer_rf.pkl')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2ef43d5f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 1.00 0.99 0.99 3241\n", + " 1 0.99 1.00 1.00 3954\n", + "\n", + " accuracy 1.00 7195\n", + " macro avg 1.00 1.00 1.00 7195\n", + "weighted avg 1.00 1.00 1.00 7195\n", + "\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "# Classification report and confusion matrix\n", + "print(classification_report(y_test, y_pred))\n", + "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n", + "plt.title(\"Confusion Matrix\")\n", + "plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "ab90e014", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Baseline accuracy: 0.54954829742877\n" + ] + } + ], + "source": [ + "from sklearn.dummy import DummyClassifier\n", + "dummy = DummyClassifier(strategy=\"most_frequent\")\n", + "dummy.fit(X_train_vec, y_train)\n", + "print(\"Baseline accuracy:\", dummy.score(X_test_vec, y_test))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a787659a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Probabilities: [[0.99 0.01]]\n" + ] + } + ], + "source": [ + "sample_text = [\"This is a sample news article.\"] # Replace with your sample text\n", + "sample_vec = vectorizer.transform(sample_text)\n", + "\n", + "probs = model.predict_proba(sample_vec)\n", + "print(\"Probabilities:\", probs)\n" + ] + }, + { + "cell_type": "markdown", + "id": "37fa4daf", + "metadata": {}, + "source": [ + "# Load validation_data.csv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e984f239", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to\n", + "[nltk_data] /Users/luis.guimaraes/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n" + ] + } + ], + "source": [ + "\n", + "# Import stemmer from nltk\n", + "from nltk.stem import PorterStemmer\n", + "nltk.download('punkt') # Need this for word_tokenize\n", + "\n", + "# Initialize stemmer\n", + "stemmer = PorterStemmer()\n", + "\n", + "# Define stem_text function\n", + "def stem_text(text):\n", + "\tif isinstance(text, str):\n", + "\t\t# Tokenize the text\n", + "\t\ttokens = word_tokenize(text.lower())\n", + "\t\t# Apply stemming\n", + "\t\tstemmed_tokens = [stemmer.stem(token) for token in tokens]\n", + "\t\t# Join tokens back into a string\n", + "\t\treturn ' '.join(stemmed_tokens)\n", + "\treturn ''\n", + "\n", + "# Load validation data and prepare it for prediction\n", + "validation_df = pd.read_csv(\"dataset/validation_data.csv\")\n", + "\n", + "# Clean NaNs before applying\n", + "validation_df['title'] = validation_df['title'].fillna('')\n", + "validation_df['text'] = validation_df['text'].fillna('')\n", + "\n", + "# Apply stemming\n", + "validation_df['title'] = validation_df['title'].apply(stem_text)\n", + "validation_df['text'] = validation_df['text'].apply(stem_text)\n", + "\n", + "# Combine title and text\n", + "validation_df['text_clean'] = (validation_df['title'] + ' ' + validation_df['text']).str.strip()\n", + "validation_df = validation_df[validation_df['text_clean'] != '']\n", + "\n", + "# Prepare features (ignore label column as instructed)\n", + "X_val = validation_df['text_clean']\n", + "\n", + "# Transform using the same vectorizer used for training\n", + "X_val_vec = vectorizer.transform(X_val)\n", + "\n", + "# Get predictions (0 or 1)\n", + "predictions = model.predict(X_val_vec)\n", + "\n", + "# Add predictions to the validation dataframe\n", + "validation_df['predicted_label'] = predictions\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "217a1684", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "First few predictions:\n", + " title predicted_label\n", + "0 uk 's may 'receiv regular updat ' on london tu... 1\n", + "1 uk transport polic lead investig of london inc... 1\n", + "2 pacif nation crack down on north korean ship a... 1\n", + "3 three suspect al qaeda milit kill in yemen dro... 1\n", + "4 chines academ prod beij to consid north korea ... 1\n", + "\n", + "Prediction counts:\n", + "predicted_label\n", + "0 3688\n", + "1 1268\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "# Display the first few predictions\n", + "print(\"First few predictions:\")\n", + "print(validation_df[['title', 'predicted_label']].head())\n", + "\n", + "validation_df[['title', 'predicted_label']].to_csv('validation_predictions.csv', index=False)\n", + "\n", + "# Create a copy with index as id\n", + "#result_df = pd.DataFrame({\n", + "# 'id': validation_df.index,\n", + "# 'predicted_label': validation_df['predicted_label']\n", + "#})\n", + "\n", + "validation_df.to_csv('validation_predictions-rf.csv', index=False)\n", + "\n", + "# Count of each prediction class\n", + "print(\"\\nPrediction counts:\")\n", + "print(validation_df['predicted_label'].value_counts())\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "3.10.12", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/2nd-jn-fake-news.ipynb b/2nd-jn-fake-news.ipynb deleted file mode 100644 index 436832a..0000000 --- a/2nd-jn-fake-news.ipynb +++ /dev/null @@ -1,637 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "cb329d99", - "metadata": {}, - "source": [ - "# Project: Fake News Classification" - ] - }, - { - "cell_type": "markdown", - "id": "24995232", - "metadata": {}, - "source": [ - "# 1. Imports & Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b24551f5", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install pandas\n", - "import pandas as pd\n", - "import numpy as np\n", - "%pip install matplotlib\n", - "import matplotlib.pyplot as plt\n", - "%pip install seaborn\n", - "import seaborn as sns\n", - "import re\n", - "import string\n", - "%pip install scikit-learn\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import classification_report, confusion_matrix" - ] - }, - { - "cell_type": "markdown", - "id": "9b5d0a83", - "metadata": {}, - "source": [ - "# 2. Load Data" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "88a6ab38", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "index", - "rawType": "int64", - "type": "integer" - }, - { - "name": "label", - "rawType": "int64", - "type": "integer" - }, - { - "name": "title", - "rawType": "object", - "type": "string" - }, - { - "name": "text", - "rawType": "object", - "type": "string" - }, - { - "name": "subject", - "rawType": "object", - "type": "string" - }, - { - "name": "date", - "rawType": "object", - "type": "string" - } - ], - "ref": "eb22746b-4811-43a1-be70-df9c2f720498", - "rows": [ - [ - "0", - "1", - "As U.S. budget fight looms, Republicans flip their fiscal script", - "WASHINGTON (Reuters) - The head of a conservative Republican faction in the U.S. Congress, who voted this month for a huge expansion of the national debt to pay for tax cuts, called himself a “fiscal conservative” on Sunday and urged budget restraint in 2018. In keeping with a sharp pivot under way among Republicans, U.S. Representative Mark Meadows, speaking on CBS’ “Face the Nation,” drew a hard line on federal spending, which lawmakers are bracing to do battle over in January. When they return from the holidays on Wednesday, lawmakers will begin trying to pass a federal budget in a fight likely to be linked to other issues, such as immigration policy, even as the November congressional election campaigns approach in which Republicans will seek to keep control of Congress. President Donald Trump and his Republicans want a big budget increase in military spending, while Democrats also want proportional increases for non-defense “discretionary” spending on programs that support education, scientific research, infrastructure, public health and environmental protection. “The (Trump) administration has already been willing to say: ‘We’re going to increase non-defense discretionary spending ... by about 7 percent,’” Meadows, chairman of the small but influential House Freedom Caucus, said on the program. “Now, Democrats are saying that’s not enough, we need to give the government a pay raise of 10 to 11 percent. For a fiscal conservative, I don’t see where the rationale is. ... Eventually you run out of other people’s money,” he said. Meadows was among Republicans who voted in late December for their party’s debt-financed tax overhaul, which is expected to balloon the federal budget deficit and add about $1.5 trillion over 10 years to the $20 trillion national debt. “It’s interesting to hear Mark talk about fiscal responsibility,” Democratic U.S. Representative Joseph Crowley said on CBS. Crowley said the Republican tax bill would require the United States to borrow $1.5 trillion, to be paid off by future generations, to finance tax cuts for corporations and the rich. “This is one of the least ... fiscally responsible bills we’ve ever seen passed in the history of the House of Representatives. I think we’re going to be paying for this for many, many years to come,” Crowley said. Republicans insist the tax package, the biggest U.S. tax overhaul in more than 30 years, will boost the economy and job growth. House Speaker Paul Ryan, who also supported the tax bill, recently went further than Meadows, making clear in a radio interview that welfare or “entitlement reform,” as the party often calls it, would be a top Republican priority in 2018. In Republican parlance, “entitlement” programs mean food stamps, housing assistance, Medicare and Medicaid health insurance for the elderly, poor and disabled, as well as other programs created by Washington to assist the needy. Democrats seized on Ryan’s early December remarks, saying they showed Republicans would try to pay for their tax overhaul by seeking spending cuts for social programs. But the goals of House Republicans may have to take a back seat to the Senate, where the votes of some Democrats will be needed to approve a budget and prevent a government shutdown. Democrats will use their leverage in the Senate, which Republicans narrowly control, to defend both discretionary non-defense programs and social spending, while tackling the issue of the “Dreamers,” people brought illegally to the country as children. Trump in September put a March 2018 expiration date on the Deferred Action for Childhood Arrivals, or DACA, program, which protects the young immigrants from deportation and provides them with work permits. The president has said in recent Twitter messages he wants funding for his proposed Mexican border wall and other immigration law changes in exchange for agreeing to help the Dreamers. Representative Debbie Dingell told CBS she did not favor linking that issue to other policy objectives, such as wall funding. “We need to do DACA clean,” she said. On Wednesday, Trump aides will meet with congressional leaders to discuss those issues. That will be followed by a weekend of strategy sessions for Trump and Republican leaders on Jan. 6 and 7, the White House said. Trump was also scheduled to meet on Sunday with Florida Republican Governor Rick Scott, who wants more emergency aid. The House has passed an $81 billion aid package after hurricanes in Florida, Texas and Puerto Rico, and wildfires in California. The package far exceeded the $44 billion requested by the Trump administration. The Senate has not yet voted on the aid. ", - "politicsNews", - "December 31, 2017 " - ], - [ - "1", - "1", - "U.S. military to accept transgender recruits on Monday: Pentagon", - "WASHINGTON (Reuters) - Transgender people will be allowed for the first time to enlist in the U.S. military starting on Monday as ordered by federal courts, the Pentagon said on Friday, after President Donald Trump’s administration decided not to appeal rulings that blocked his transgender ban. Two federal appeals courts, one in Washington and one in Virginia, last week rejected the administration’s request to put on hold orders by lower court judges requiring the military to begin accepting transgender recruits on Jan. 1. A Justice Department official said the administration will not challenge those rulings. “The Department of Defense has announced that it will be releasing an independent study of these issues in the coming weeks. So rather than litigate this interim appeal before that occurs, the administration has decided to wait for DOD’s study and will continue to defend the president’s lawful authority in District Court in the meantime,” the official said, speaking on condition of anonymity. In September, the Pentagon said it had created a panel of senior officials to study how to implement a directive by Trump to prohibit transgender individuals from serving. The Defense Department has until Feb. 21 to submit a plan to Trump. Lawyers representing currently-serving transgender service members and aspiring recruits said they had expected the administration to appeal the rulings to the conservative-majority Supreme Court, but were hoping that would not happen. Pentagon spokeswoman Heather Babb said in a statement: “As mandated by court order, the Department of Defense is prepared to begin accessing transgender applicants for military service Jan. 1. All applicants must meet all accession standards.” Jennifer Levi, a lawyer with gay, lesbian and transgender advocacy group GLAD, called the decision not to appeal “great news.” “I’m hoping it means the government has come to see that there is no way to justify a ban and that it’s not good for the military or our country,” Levi said. Both GLAD and the American Civil Liberties Union represent plaintiffs in the lawsuits filed against the administration. In a move that appealed to his hard-line conservative supporters, Trump announced in July that he would prohibit transgender people from serving in the military, reversing Democratic President Barack Obama’s policy of accepting them. Trump said on Twitter at the time that the military “cannot be burdened with the tremendous medical costs and disruption that transgender in the military would entail.” Four federal judges - in Baltimore, Washington, D.C., Seattle and Riverside, California - have issued rulings blocking Trump’s ban while legal challenges to the Republican president’s policy proceed. The judges said the ban would likely violate the right under the U.S. Constitution to equal protection under the law. The Pentagon on Dec. 8 issued guidelines to recruitment personnel in order to enlist transgender applicants by Jan. 1. The memo outlined medical requirements and specified how the applicants’ sex would be identified and even which undergarments they would wear. The Trump administration previously said in legal papers that the armed forces were not prepared to train thousands of personnel on the medical standards needed to process transgender applicants and might have to accept “some individuals who are not medically fit for service.” The Obama administration had set a deadline of July 1, 2017, to begin accepting transgender recruits. But Trump’s defense secretary, James Mattis, postponed that date to Jan. 1, 2018, which the president’s ban then put off indefinitely. Trump has taken other steps aimed at rolling back transgender rights. In October, his administration said a federal law banning gender-based workplace discrimination does not protect transgender employees, reversing another Obama-era position. In February, Trump rescinded guidance issued by the Obama administration saying that public schools should allow transgender students to use the restroom that corresponds to their gender identity. ", - "politicsNews", - "December 29, 2017 " - ], - [ - "2", - "1", - "Senior U.S. Republican senator: 'Let Mr. Mueller do his job'", - "WASHINGTON (Reuters) - The special counsel investigation of links between Russia and President Trump’s 2016 election campaign should continue without interference in 2018, despite calls from some Trump administration allies and Republican lawmakers to shut it down, a prominent Republican senator said on Sunday. Lindsey Graham, who serves on the Senate armed forces and judiciary committees, said Department of Justice Special Counsel Robert Mueller needs to carry on with his Russia investigation without political interference. “This investigation will go forward. It will be an investigation conducted without political influence,” Graham said on CBS’s Face the Nation news program. “And we all need to let Mr. Mueller do his job. I think he’s the right guy at the right time.” The question of how Russia may have interfered in the election, and how Trump’s campaign may have had links with or co-ordinated any such effort, has loomed over the White House since Trump took office in January. It shows no sign of receding as Trump prepares for his second year in power, despite intensified rhetoric from some Trump allies in recent weeks accusing Mueller’s team of bias against the Republican president. Trump himself seemed to undercut his supporters in an interview last week with the New York Times in which he said he expected Mueller was “going to be fair.” Russia’s role in the election and the question of possible links to the Trump campaign are the focus of multiple inquiries in Washington. Three committees of the Senate and the House of Representatives are investigating, as well as Mueller, whose team in May took over an earlier probe launched by the U.S. Federal Bureau of Investigation (FBI). Several members of the Trump campaign and administration have been convicted or indicted in the investigation. Trump and his allies deny any collusion with Russia during the campaign, and the Kremlin has denied meddling in the election. Graham said he still wants an examination of the FBI’s use of a dossier on links between Trump and Russia that was compiled by a former British spy, Christopher Steele, which prompted Trump allies and some Republicans to question Mueller’s inquiry. On Saturday, the New York Times reported that it was not that dossier that triggered an early FBI probe, but a tip from former Trump campaign foreign policy adviser George Papadopoulos to an Australian diplomat that Russia had damaging information about former Trump rival Hillary Clinton. “I want somebody to look at the way the Department of Justice used this dossier. It bothers me greatly the way they used it, and I want somebody to look at it,” Graham said. But he said the Russia investigation must continue. “As a matter of fact, it would hurt us if we ignored it,” he said. ", - "politicsNews", - "December 31, 2017 " - ], - [ - "3", - "1", - "FBI Russia probe helped by Australian diplomat tip-off: NYT", - "WASHINGTON (Reuters) - Trump campaign adviser George Papadopoulos told an Australian diplomat in May 2016 that Russia had political dirt on Democratic presidential candidate Hillary Clinton, the New York Times reported on Saturday. The conversation between Papadopoulos and the diplomat, Alexander Downer, in London was a driving factor behind the FBI’s decision to open a counter-intelligence investigation of Moscow’s contacts with the Trump campaign, the Times reported. Two months after the meeting, Australian officials passed the information that came from Papadopoulos to their American counterparts when leaked Democratic emails began appearing online, according to the newspaper, which cited four current and former U.S. and foreign officials. Besides the information from the Australians, the probe by the Federal Bureau of Investigation was also propelled by intelligence from other friendly governments, including the British and Dutch, the Times said. Papadopoulos, a Chicago-based international energy lawyer, pleaded guilty on Oct. 30 to lying to FBI agents about contacts with people who claimed to have ties to top Russian officials. It was the first criminal charge alleging links between the Trump campaign and Russia. The White House has played down the former aide’s campaign role, saying it was “extremely limited” and that any actions he took would have been on his own. The New York Times, however, reported that Papadopoulos helped set up a meeting between then-candidate Donald Trump and Egyptian President Abdel Fattah al-Sisi and edited the outline of Trump’s first major foreign policy speech in April 2016. The federal investigation, which is now being led by Special Counsel Robert Mueller, has hung over Trump’s White House since he took office almost a year ago. Some Trump allies have recently accused Mueller’s team of being biased against the Republican president. Lawyers for Papadopoulos did not immediately respond to requests by Reuters for comment. Mueller’s office declined to comment. Trump’s White House attorney, Ty Cobb, declined to comment on the New York Times report. “Out of respect for the special counsel and his process, we are not commenting on matters such as this,” he said in a statement. Mueller has charged four Trump associates, including Papadopoulos, in his investigation. Russia has denied interfering in the U.S. election and Trump has said there was no collusion between his campaign and Moscow. ", - "politicsNews", - "December 30, 2017 " - ], - [ - "4", - "1", - "Trump wants Postal Service to charge 'much more' for Amazon shipments", - "SEATTLE/WASHINGTON (Reuters) - President Donald Trump called on the U.S. Postal Service on Friday to charge “much more” to ship packages for Amazon (AMZN.O), picking another fight with an online retail giant he has criticized in the past. “Why is the United States Post Office, which is losing many billions of dollars a year, while charging Amazon and others so little to deliver their packages, making Amazon richer and the Post Office dumber and poorer? Should be charging MUCH MORE!” Trump wrote on Twitter. The president’s tweet drew fresh attention to the fragile finances of the Postal Service at a time when tens of millions of parcels have just been shipped all over the country for the holiday season. The U.S. Postal Service, which runs at a big loss, is an independent agency within the federal government and does not receive tax dollars for operating expenses, according to its website. Package delivery has become an increasingly important part of its business as the Internet has led to a sharp decline in the amount of first-class letters. The president does not determine postal rates. They are set by the Postal Regulatory Commission, an independent government agency with commissioners selected by the president from both political parties. That panel raised prices on packages by almost 2 percent in November. Amazon was founded by Jeff Bezos, who remains the chief executive officer of the retail company and is the richest person in the world, according to Bloomberg News. Bezos also owns The Washington Post, a newspaper Trump has repeatedly railed against in his criticisms of the news media. In tweets over the past year, Trump has said the “Amazon Washington Post” fabricated stories. He has said Amazon does not pay sales tax, which is not true, and so hurts other retailers, part of a pattern by the former businessman and reality television host of periodically turning his ire on big American companies since he took office in January. Daniel Ives, a research analyst at GBH Insights, said Trump’s comment could be taken as a warning to the retail giant. However, he said he was not concerned for Amazon. “We do not see any price hikes in the future. However, that is a risk that Amazon is clearly aware of and (it) is building out its distribution (system) aggressively,” he said. Amazon has shown interest in the past in shifting into its own delivery service, including testing drones for deliveries. In 2015, the company spent $11.5 billion on shipping, 46 percent of its total operating expenses that year. Amazon shares were down 0.86 percent to $1,175.90 by early afternoon. Overall, U.S. stock prices were down slightly on Friday. Satish Jindel, president of ShipMatrix Inc, which analyzes shipping data, disputed the idea that the Postal Service charges less than United Parcel Service Inc (UPS.N) and FedEx Corp (FDX.N), the other biggest players in the parcel delivery business in the United States. Many customers get lower rates from UPS and FedEx than they would get from the post office for comparable services, he said. The Postal Service delivers about 62 percent of Amazon packages, for about 3.5 to 4 million a day during the current peak year-end holiday shipping season, Jindel said. The Seattle-based company and the post office have an agreement in which mail carriers take Amazon packages on the last leg of their journeys, from post offices to customers’ doorsteps. Amazon’s No. 2 carrier is UPS, at 21 percent, and FedEx is third, with 8 percent or so, according to Jindel. Trump’s comment tapped into a debate over whether Postal Service pricing has kept pace with the rise of e-commerce, which has flooded the mail with small packages.Private companies like UPS have long claimed the current system unfairly undercuts their business. Steve Gaut, a spokesman for UPS, noted that the company values its “productive relationship” with the postal service, but that it has filed with the Postal Regulatory Commission its concerns about the postal service’s methods for covering costs. Representatives for Amazon, the White House, the U.S. Postal Service and FedEx declined comment or were not immediately available for comment on Trump’s tweet. According to its annual report, the Postal Service lost $2.74 billion this year, and its deficit has ballooned to $61.86 billion. While the Postal Service’s revenue for first class mail, marketing mail and periodicals is flat or declining, revenue from package delivery is up 44 percent since 2014 to $19.5 billion in the fiscal year ended Sept. 30, 2017. But it also lost about $2 billion in revenue when a temporary surcharge expired in April 2016. According to a Government Accountability Office report in February, the service is facing growing personnel expenses, particularly $73.4 billion in unfunded pension and benefits liabilities. The Postal Service has not announced any plans to cut costs. By law, the Postal Service has to set prices for package delivery to cover the costs attributable to that service. But the postal service allocates only 5.5 percent of its total costs to its business of shipping packages even though that line of business is 28 percent of its total revenue. ", - "politicsNews", - "December 29, 2017 " - ] - ], - "shape": { - "columns": 5, - "rows": 5 - } - }, - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
labeltitletextsubjectdate
01As U.S. budget fight looms, Republicans flip t...WASHINGTON (Reuters) - The head of a conservat...politicsNewsDecember 31, 2017
11U.S. military to accept transgender recruits o...WASHINGTON (Reuters) - Transgender people will...politicsNewsDecember 29, 2017
21Senior U.S. Republican senator: 'Let Mr. Muell...WASHINGTON (Reuters) - The special counsel inv...politicsNewsDecember 31, 2017
31FBI Russia probe helped by Australian diplomat...WASHINGTON (Reuters) - Trump campaign adviser ...politicsNewsDecember 30, 2017
41Trump wants Postal Service to charge 'much mor...SEATTLE/WASHINGTON (Reuters) - President Donal...politicsNewsDecember 29, 2017
\n", - "
" - ], - "text/plain": [ - " label title \\\n", - "0 1 As U.S. budget fight looms, Republicans flip t... \n", - "1 1 U.S. military to accept transgender recruits o... \n", - "2 1 Senior U.S. Republican senator: 'Let Mr. Muell... \n", - "3 1 FBI Russia probe helped by Australian diplomat... \n", - "4 1 Trump wants Postal Service to charge 'much mor... \n", - "\n", - " text subject \\\n", - "0 WASHINGTON (Reuters) - The head of a conservat... politicsNews \n", - "1 WASHINGTON (Reuters) - Transgender people will... politicsNews \n", - "2 WASHINGTON (Reuters) - The special counsel inv... politicsNews \n", - "3 WASHINGTON (Reuters) - Trump campaign adviser ... politicsNews \n", - "4 SEATTLE/WASHINGTON (Reuters) - President Donal... politicsNews \n", - "\n", - " date \n", - "0 December 31, 2017 \n", - "1 December 29, 2017 \n", - "2 December 31, 2017 \n", - "3 December 30, 2017 \n", - "4 December 29, 2017 " - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = pd.read_csv(\"dataset/data.csv\")\n", - "df.head()\n", - "#print(df['label'].unique())" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "4679c0d5", - "metadata": {}, - "outputs": [], - "source": [ - "X = df['title'] + \" \" + df['text']\n", - "y = df['label']\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "9a740ccc", - "metadata": {}, - "outputs": [], - "source": [ - "def clean_text(text):\n", - " text = text.lower()\n", - " text = re.sub(r'\\[.*?\\]', '', text)\n", - " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", - " text = re.sub(r'<.*?>+', '', text)\n", - " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", - " text = re.sub(r'\\n', '', text)\n", - " text = re.sub(r'\\w*\\d\\w*', '', text)\n", - " return text\n", - "\n", - "df['text_clean'] = df['title'] + \" \" + df['text']\n", - "df['text_clean'] = df['text_clean'].apply(clean_text)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "b441f8e6", - "metadata": {}, - "outputs": [], - "source": [ - "# Remove duplicate rows based on the 'text' column\n", - "df = df.drop_duplicates(subset=['text_clean']) \n", - "\n", - "# Remove rows with 'text' is NaN\n", - "df = df.dropna(subset=['text_clean']) \n", - "\n", - "# Remove rows with 'label' is NaN\n", - "df = df.dropna(subset=['label']) \n", - "\n", - "# Remove rows with 'text' empty or only with whitespace\n", - "df = df[df['text_clean'].str.strip() != ''] " - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "id": "dccc6d3a", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.microsoft.datawrangler.viewer.v0+json": { - "columns": [ - { - "name": "label", - "rawType": "int64", - "type": "integer" - }, - { - "name": "proportion", - "rawType": "float64", - "type": "float" - } - ], - "ref": "36239f7f-979d-4840-b3d4-7941796addad", - "rows": [ - [ - "1", - "0.543295629255436" - ], - [ - "0", - "0.45670437074456405" - ] - ], - "shape": { - "columns": 1, - "rows": 2 - } - }, - "text/plain": [ - "label\n", - "1 0.543296\n", - "0 0.456704\n", - "Name: proportion, dtype: float64" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['label'].value_counts(normalize=True)\n" - ] - }, - { - "cell_type": "markdown", - "id": "b88121d1", - "metadata": {}, - "source": [ - "# 5. Data Exploration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55e16def", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Index: 36424 entries, 0 to 39939\n", - "Data columns (total 6 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 label 36424 non-null int64 \n", - " 1 title 36424 non-null object\n", - " 2 text 36424 non-null object\n", - " 3 subject 36424 non-null object\n", - " 4 date 36424 non-null object\n", - " 5 text_clean 36424 non-null object\n", - "dtypes: int64(1), object(5)\n", - "memory usage: 1.9+ MB\n" - ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjkAAAHCCAYAAAANVtgqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8ekN5oAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAx6klEQVR4nO3dB3gVdb7/8W8oCUV6CxGkL53QBILSlkhAFmVlV6QIKEW8oEAQEC9LvXfDBWkrbVlpd4UVcBVpl97EBCnS21LCglcCFkiWlkA4/+f7e/4z9xxIgGBCyO+8X88zezIz3zNn5rgxH39lJsDj8XgEAADAMtky+wQAAAAyAiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQfAQylbtqz06NFD/IE/XStgM0IO4OdOnz4tb731lpQvX15y5col+fPnl+eee06mTZsmN27ckCfZggULJCAgwGcpXry4tGjRQv7nf/4ns08PQCbLkdknACDzrF69Wn7/+99LUFCQdOvWTWrUqCFJSUmyY8cOGTJkiBw5ckTmzJkjT7qxY8dKuXLlRB/Fd/HiRRN+XnzxRVm5cqX85je/yezTA5BJCDmAn4qNjZXXXntNypQpI5s3b5aSJUu6+/r16yenTp0yISgraNOmjdSvX99d79mzp5QoUUL+9re/EXIAP0Z3FeCnJkyYIFevXpW5c+f6BBxHxYoVZcCAAam+/+eff5b33ntPatasKU899ZTp5tKwceDAgXtqP/roI6levbrkyZNHChUqZALJ4sWL3f3/+te/ZODAgWYsjLYqaZfTCy+8IN9+++0jXVvBggUld+7ckiOH73/Hffjhh9K4cWMpUqSI2V+vXj357LPPHni8h73WrVu3mi6zpUuXyn/+539KqVKlTBdgy5YtTWi82zfffGNanPQ7yZs3r9SqVct0E3o7fvy4/O53v5PChQubY+l3t2LFikf6XgB/Q0sO4Ke0K0fH4egf/Udx5swZWb58uenu0q4i7Sb685//LM2aNZOjR49KSEiIqfvLX/4i7777rvlDraHp5s2bcvDgQfMHvnPnzqamb9++Jmz0799fqlWrJj/99JPpMjt27JjUrVv3gecSHx8vP/74o+muunTpkglVGuC6du3qU6cB4qWXXpIuXbqYbrlPP/3UnP+qVaukbdu2v/haHePHj5ds2bKZYKTnpoFSP1Ov2bFhwwbTyqQBU7+X4OBgc716Lk641O5CHR/19NNPy/vvv2+CkAao9u3by9///nf57W9/m8Z/aoCf8QDwO/Hx8R799X/55Zcf+j1lypTxdO/e3V2/efOmJzk52acmNjbWExQU5Bk7dqy7TT+jevXq9z12gQIFPP369fOk1fz588113L3oOSxYsOCe+uvXr/usJyUleWrUqOH59a9/nS7XumXLFvP5VatW9SQmJrrbp02bZrYfOnTIrN++fdtTrlw58zmXL1/2Oe6dO3fcn1u2bOmpWbOm+Xzv/Y0bN/ZUqlQpTd8V4I/orgL8UEJCgnnNly/fIx9Du5W0tUIlJyeb1hftyqlcubJPN5N2HX333Xeye/fuVI+lNdrK8f333z/SucyYMcO0jOjyySefmNlVvXr1ks8//9ynTruoHJcvXzatLE2aNHlgt9jDXqvjjTfekMDAQHddP8NpEVL79u0zY6K0i06v3Zt2dzldZDpW6tVXXzXdedpSpYt+dkREhJw8eVL+93//9xG+LcB/EHIAP6RjSpT+8XxUd+7ckSlTpkilSpVMCChatKgUK1bMdEVpeHAMGzbMBIIGDRqYWh3U/PXXX/scS7tzDh8+LKVLlzZ1o0ePdgPBw9D3hIeHm0W7hXTAtHZ7afeXdks5tCuoUaNGZmyLjnHR8501a5bP+f6Sa3U888wzPus65sYJVs60faWz2VKjY3i0++0Pf/iD+SzvZdSoUaZGu+YApI6QA/hpyNFxJBosHtUf//hHiYyMlKZNm5rWk3Xr1pmWFB1grKHAUbVqVTlx4oQZ//L888+bsST66vyhVtpaoaFGx9LoeU2cONEc51HvdaOtLtqac+HCBdPiob766iszHkcDzsyZM2XNmjXmfHVckIaJ9LhWR/bs2VM8zoM+x5tzXB3X47RS3b3o4HAAqWPgMeCndNCr3gMnJiZGwsLC0vx+HSisQUJnZ3m7cuWKaenwpgNmO3bsaBZtWXnllVfM7KPhw4eb0KF0AO6//du/mUVbKHTAsdboLKZHcfv2bfOqA5CVhiv9LA0o2hrjmD9/frpe68OoUKGCedWQqa1PKdFB4Spnzpyp1gC4P1pyAD81dOhQEz507IrOFrqbdqncPZ357taKu1smli1bds84ER1D4k3HqmhXkr731q1bZozL3V0+OoVcW3QSExMf6dr0uOvXrzefpS1JzvnqeBf9PMfZs2fNrKkHedhrfVga4HSW1tSpU01Q8uZ8jn4HzZs3N7O4tEXqbj/88MMjfTbgT2jJAfyUtibovWq0dUWDgPcdj6Ojo80f8fs9v0lbgvROwzrIVqehHzp0SBYtWuS2QDhatWplpkfrVGi9QZ9Ok54+fbqZsq0Dn/WPvN5PRqeYh4aGmvE7GzduNAOVJ02a9FDXot1aej8Zpa1Ael3aTaXTrp3xR/p5kydPltatW5suKq3TAcva5aNja+7nYa81Ld1pOhaoXbt2Urt2bXNcbcnSa9Bp49rapPT8tGtP78/Tu3dv83kaSLX1TQdzp3RPIgBeMnt6F4DM9Y9//MPTu3dvT9myZT2BgYGefPnyeZ577jnPRx995DN1OaVp1YMHD/aULFnSkzt3bvOemJgYT7Nmzczi+POf/+xp2rSpp0iRImbKdYUKFTxDhgwx09iVTrXW9dDQUPPZefPmNT/PnDnzkaaQ58qVy1O7dm3PrFmzfKZjq7lz55qp13oeVapUMe8fNWqUeZ+3R71WZwr5smXL7plurtv187zt2LHD88ILL7jXXatWLfO9ezt9+rSnW7dunuDgYE/OnDk9Tz/9tOc3v/mN57PPPnvg9wP4uwD9H+/QAwAAYAPG5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWMmvbwaoz4bRpx7rDcmcJ/8CAIAnm979Rh8wrHdG15trpsavQ44GHH3qMQAAyHrOnz9v7pieGr8OOdqC43xJzq3fAQDAky0hIcE0Ujh/x1Pj1yHH6aLSgEPIAQAga3nQUBMGHgMAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArJSmkBMVFSXPPvusuflO8eLFpX379nLixAmfmps3b0q/fv2kSJEi8tRTT0mHDh3k4sWLPjXnzp2Ttm3bSp48ecxxhgwZIrdv3/ap2bp1q9StW1eCgoKkYsWKsmDBgnvOZ8aMGVK2bFnJlSuXNGzYUHbt2pW2qwcAANZKU8jZtm2bCTA7d+6UDRs2yK1bt6RVq1Zy7do1t2bQoEGycuVKWbZsmanXRye88sor7v7k5GQTcJKSkiQ6OloWLlxoAszIkSPdmtjYWFPTokUL2b9/vwwcOFB69eol69atc2uWLFkikZGRMmrUKPn2228lNDRUIiIi5NKlS7/8WwEAAFmf5xe4dOmSRw+xbds2s37lyhVPzpw5PcuWLXNrjh07ZmpiYmLM+po1azzZsmXzxMXFuTWzZs3y5M+f35OYmGjWhw4d6qlevbrPZ3Xs2NETERHhrjdo0MDTr18/dz05OdkTEhLiiYqKeujzj4+PN+emrwAAIGt42L/fv2hMTnx8vHktXLiwed27d69p3QkPD3drqlSpIs8884zExMSYdX2tWbOmlChRwq3RFhh9DsWRI0fcGu9jODXOMbQVSD/Lu0afQqrrTk1KEhMTzed4LwAAwE6PHHLu3LljupGee+45qVGjhtkWFxcngYGBUrBgQZ9aDTS6z6nxDjjOfmff/Wo0lNy4cUN+/PFH0+2VUo1zjNTGFBUoUMBdeAI5AAD2euSQo2NzDh8+LJ9++qlkFcOHDzetT86iTx8HAAB2eqSnkPfv319WrVol27dvl1KlSrnbg4ODTVfSlStXfFpzdHaV7nNq7p4F5cy+8q65e0aWruuTwnPnzi3Zs2c3S0o1zjFSojO1dAEAAPZLU0uOx+MxAeeLL76QzZs3S7ly5Xz216tXT3LmzCmbNm1yt+kUc50yHhYWZtb19dChQz6zoHSmlgaYatWquTXex3BqnGNol5h+lneNdp/pulMDAAD8W460dlEtXrxYvvzyS3OvHGf8i45v0RYWfe3Zs6eZ2q2DkTW4vPPOOyZ4NGrUyNTqlHMNM6+//rpMmDDBHGPEiBHm2E4rS9++fWX69OkydOhQefPNN02gWrp0qaxevdo9F/2M7t27S/369aVBgwYydepUM5X9jTfeSN9vyFJl3/+/7xL2Ozu+bWafAgA82SFn1qxZ5rV58+Y+2+fPny89evQwP0+ZMsXMdNKbAOpsJp0VNXPmTLdWu5m0q+vtt9824Sdv3rwmrIwdO9at0RYiDTR6z51p06aZLrGPP/7YHMvRsWNH+eGHH8z9dTQo1a5dW9auXXvPYGQAAOCfAnQeufgpna2lrU86CFlbnfwJLTn+hZYcAP7495tnVwEAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAK6U55Gzfvl3atWsnISEhEhAQIMuXL/fZr9tSWiZOnOjWlC1b9p7948eP9znOwYMHpUmTJpIrVy4pXbq0TJgw4Z5zWbZsmVSpUsXU1KxZU9asWZPWywEAAJZKc8i5du2ahIaGyowZM1Lcf+HCBZ9l3rx5JsR06NDBp27s2LE+de+88467LyEhQVq1aiVlypSRvXv3moA0evRomTNnjlsTHR0tnTp1kp49e8q+ffukffv2Zjl8+HBaLwkAAFgoR1rf0KZNG7OkJjg42Gf9yy+/lBYtWkj58uV9tufLl++eWseiRYskKSnJBKTAwECpXr267N+/XyZPnix9+vQxNdOmTZPWrVvLkCFDzPq4ceNkw4YNMn36dJk9e3ZaLwsAAFgmQ8fkXLx4UVavXm1aW+6m3VNFihSROnXqmJaa27dvu/tiYmKkadOmJuA4IiIi5MSJE3L58mW3Jjw83OeYWqPbAQAA0tySkxYLFy40LTavvPKKz/Z3331X6tatK4ULFzbdTsOHDzddVtpSo+Li4qRcuXI+7ylRooS7r1ChQubV2eZdo9tTk5iYaBbvbjEAAGCnDA052t3UpUsXMzDYW2RkpPtzrVq1TIvNW2+9JVFRURIUFJRh56PHHzNmTIYdHwAA+EF31VdffWW6l3r16vXA2oYNG5ruqrNnz5p1HaujXV3enHVnHE9qNamN81HaYhQfH+8u58+ff6RrAwAAfhxy5s6dK/Xq1TMzsR5EBxVny5ZNihcvbtbDwsLMVPVbt265NTqouHLlyqaryqnZtGmTz3G0RrenRluJ8ufP77MAAAA7pTnkXL161YQSXVRsbKz5+dy5cz5jXfQeNim14ujA4KlTp8qBAwfkzJkzZibVoEGDpGvXrm6A6dy5s+nC0gHLR44ckSVLlpjZVN7dXAMGDJC1a9fKpEmT5Pjx42aK+Z49e6R///6P+l0AAAB/HpOjQUKnhDuc4NG9e3dZsGCB+fnTTz8Vj8dj7mOTUmuK7tdQooOAdYCxhhzvAFOgQAFZv3699OvXz7QGFS1aVEaOHOlOH1eNGzeWxYsXy4gRI+SDDz6QSpUqmRsT1qhRI+3fAgAAsE6AR9OIn9IWJw1UOj7H37quyr6/OrNPAY/R2fFtM/sUAOCx//3m2VUAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEppfqwDAODJxh3N/Qt3NE8dLTkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICV0hxytm/fLu3atZOQkBAJCAiQ5cuX++zv0aOH2e69tG7d2qfm559/li5dukj+/PmlYMGC0rNnT7l69apPzcGDB6VJkyaSK1cuKV26tEyYMOGec1m2bJlUqVLF1NSsWVPWrFmT1ssBAACWSnPIuXbtmoSGhsqMGTNSrdFQc+HCBXf529/+5rNfA86RI0dkw4YNsmrVKhOc+vTp4+5PSEiQVq1aSZkyZWTv3r0yceJEGT16tMyZM8etiY6Olk6dOpmAtG/fPmnfvr1ZDh8+nNZLAgAAFsqR1je0adPGLPcTFBQkwcHBKe47duyYrF27Vnbv3i3169c32z766CN58cUX5cMPPzQtRIsWLZKkpCSZN2+eBAYGSvXq1WX//v0yefJkNwxNmzbNhKkhQ4aY9XHjxpnQNH36dJk9e3ZaLwsAAFgmQ8bkbN26VYoXLy6VK1eWt99+W3766Sd3X0xMjOmicgKOCg8Pl2zZssk333zj1jRt2tQEHEdERIScOHFCLl++7Nbo+7xpjW5PTWJiomkl8l4AAICd0j3kaOvKf//3f8umTZvkv/7rv2Tbtm2m5Sc5Odnsj4uLMwHIW44cOaRw4cJmn1NTokQJnxpn/UE1zv6UREVFSYECBdxFx/oAAAA7pbm76kFee+0192cdDFyrVi2pUKGCad1p2bKlZKbhw4dLZGSku64tOQQdAADslOFTyMuXLy9FixaVU6dOmXUdq3Pp0iWfmtu3b5sZV844Hn29ePGiT42z/qCa1MYCOWOFdEaX9wIAAOyU4SHnu+++M2NySpYsadbDwsLkypUrZtaUY/PmzXLnzh1p2LChW6Mzrm7duuXW6KBiHeNTqFAht0a7xLxpjW4HAABIc8jR+9noTCddVGxsrPn53LlzZp/Odtq5c6ecPXvWhJCXX35ZKlasaAYFq6pVq5pxO71795Zdu3bJ119/Lf379zfdXDqzSnXu3NkMOtbp4TrVfMmSJWY2lXdX04ABA8wsrUmTJsnx48fNFPM9e/aYYwEAAKQ55GiQqFOnjlmUBg/9eeTIkZI9e3ZzE7+XXnpJfvWrX5mQUq9ePfnqq69MV5FDp4jrTfx0jI5OHX/++ed97oGjg4LXr19vApS+f/Dgweb43vfSady4sSxevNi8T+/b89lnn5kbE9aoUeOXfysAACDLC/B4PB7xUzrwWANVfHy8343PKfv+6sw+BTxGZ8e3zexTwGPE77d/8cff74SH/PvNs6sAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJXSHHK2b98u7dq1k5CQEAkICJDly5e7+27duiXDhg2TmjVrSt68eU1Nt27d5Pvvv/c5RtmyZc17vZfx48f71Bw8eFCaNGkiuXLlktKlS8uECRPuOZdly5ZJlSpVTI1+5po1a9J6OQAAwFJpDjnXrl2T0NBQmTFjxj37rl+/Lt9++6384Q9/MK+ff/65nDhxQl566aV7aseOHSsXLlxwl3feecfdl5CQIK1atZIyZcrI3r17ZeLEiTJ69GiZM2eOWxMdHS2dOnWSnj17yr59+6R9+/ZmOXz4cFovCQAAWChHWt/Qpk0bs6SkQIECsmHDBp9t06dPlwYNGsi5c+fkmWeecbfny5dPgoODUzzOokWLJCkpSebNmyeBgYFSvXp12b9/v0yePFn69OljaqZNmyatW7eWIUOGmPVx48aZz9bPmz17dlovCwAAWCbDx+TEx8eb7qiCBQv6bNfuqSJFikidOnVMS83t27fdfTExMdK0aVMTcBwRERGmVejy5ctuTXh4uM8xtUa3AwAApLklJy1u3rxpxuhot1L+/Pnd7e+++67UrVtXChcubLqdhg8fbrqstKVGxcXFSbly5XyOVaJECXdfoUKFzKuzzbtGt6cmMTHRLN7dYgAAwE4ZFnJ0EPKrr74qHo9HZs2a5bMvMjLS/blWrVqmxeatt96SqKgoCQoKyqhTMscfM2ZMhh0fAABY3l3lBJx//vOfZpyMdytOSho2bGi6q86ePWvWdazOxYsXfWqcdWccT2o1qY3zUdpipN1nznL+/PlHvkYAAOBnIccJOCdPnpSNGzeacTcPooOKs2XLJsWLFzfrYWFhZqq6HsuhYaly5cqmq8qp2bRpk89xtEa3p0ZbiTRweS8AAMBOae6uunr1qpw6dcpdj42NNSFFx9eULFlSfve735np46tWrZLk5GR3jIzu124pHRj8zTffSIsWLcwMK10fNGiQdO3a1Q0wnTt3Nt1KOj1cx/TotHCdTTVlyhT3cwcMGCDNmjWTSZMmSdu2beXTTz+VPXv2+EwzBwAA/ivNIUeDhAaUu8fXdO/e3dzLZsWKFWa9du3aPu/bsmWLNG/e3LSmaCDRWh0ErAOMNeR4j9PRqejr16+Xfv36Sb169aRo0aIycuRId/q4aty4sSxevFhGjBghH3zwgVSqVMncmLBGjRqP9k0AAACrBHh0ZLCf0tlVGqh0fI6/dV2VfX91Zp8CHqOz49tm9ingMeL327/44+93wkP+/ebZVQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgpTSHnO3bt0u7du0kJCREAgICZPny5T77PR6PjBw5UkqWLCm5c+eW8PBwOXnypE/Nzz//LF26dJH8+fNLwYIFpWfPnnL16lWfmoMHD0qTJk0kV65cUrp0aZkwYcI957Js2TKpUqWKqalZs6asWbMmrZcDAAAsleaQc+3aNQkNDZUZM2akuF/DyJ/+9CeZPXu2fPPNN5I3b16JiIiQmzdvujUacI4cOSIbNmyQVatWmeDUp08fd39CQoK0atVKypQpI3v37pWJEyfK6NGjZc6cOW5NdHS0dOrUyQSkffv2Sfv27c1y+PDhtH8LAADAOgEebXp51DcHBMgXX3xhwoXSQ2kLz+DBg+W9994z2+Lj46VEiRKyYMECee211+TYsWNSrVo12b17t9SvX9/UrF27Vl588UX57rvvzPtnzZol//7v/y5xcXESGBhoat5//33TanT8+HGz3rFjRxO4NCQ5GjVqJLVr1zYB62FomCpQoIA5R21V8idl31+d2aeAx+js+LaZfQp4jPj99i/++Pud8JB/v9N1TE5sbKwJJtpF5dCTaNiwocTExJh1fdUuKifgKK3Pli2baflxapo2beoGHKWtQSdOnJDLly+7Nd6f49Q4n5OSxMRE88V4LwAAwE7pGnI04ChtufGm684+fS1evLjP/hw5ckjhwoV9alI6hvdnpFbj7E9JVFSUCV3OomN9AACAnfxqdtXw4cNN05aznD9/PrNPCQAAZIWQExwcbF4vXrzos13XnX36eunSJZ/9t2/fNjOuvGtSOob3Z6RW4+xPSVBQkOm7814AAICd0jXklCtXzoSMTZs2udt03IuOtQkLCzPr+nrlyhUza8qxefNmuXPnjhm749TojKtbt265NToTq3LlylKoUCG3xvtznBrncwAAgH9Lc8jR+9ns37/fLM5gY/353LlzZrbVwIED5T/+4z9kxYoVcujQIenWrZuZMeXMwKpataq0bt1aevfuLbt27ZKvv/5a+vfvb2ZeaZ3q3LmzGXSs08N1qvmSJUtk2rRpEhkZ6Z7HgAEDzKysSZMmmRlXOsV8z5495lgAAAA50voGDRItWrRw153g0b17dzNNfOjQoWZqt973Rltsnn/+eRNG9IZ9jkWLFpkw0rJlSzOrqkOHDubeOg4dFLx+/Xrp16+f1KtXT4oWLWpuMOh9L53GjRvL4sWLZcSIEfLBBx9IpUqVzBTzGjVq/JLvAwAAWOIX3Scnq+M+OfAX/ngfDX/G77d/8cff74TMuE8OAADAk4KQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYKd1DTtmyZSUgIOCepV+/fmZ/8+bN79nXt29fn2OcO3dO2rZtK3ny5JHixYvLkCFD5Pbt2z41W7dulbp160pQUJBUrFhRFixYkN6XAgAAsrAc6X3A3bt3S3Jysrt++PBheeGFF+T3v/+9u613794yduxYd13DjEPfqwEnODhYoqOj5cKFC9KtWzfJmTOn/PGPfzQ1sbGxpkbD0aJFi2TTpk3Sq1cvKVmypERERKT3JQEAgCwo3UNOsWLFfNbHjx8vFSpUkGbNmvmEGg0xKVm/fr0cPXpUNm7cKCVKlJDatWvLuHHjZNiwYTJ69GgJDAyU2bNnS7ly5WTSpEnmPVWrVpUdO3bIlClTCDkAACDjx+QkJSXJJ598Im+++abplnJo60vRokWlRo0aMnz4cLl+/bq7LyYmRmrWrGkCjkODS0JCghw5csStCQ8P9/ksrdHt95OYmGiO470AAAA7pXtLjrfly5fLlStXpEePHu62zp07S5kyZSQkJEQOHjxoWmhOnDghn3/+udkfFxfnE3CUs6777lejoeXGjRuSO3fuFM8nKipKxowZk+7XCQAA/CzkzJ07V9q0aWMCjaNPnz7uz9pio+NoWrZsKadPnzbdWhlJW40iIyPddQ1FpUuXztDPBAAAloWcf/7zn2ZcjdNCk5qGDRua11OnTpmQo2N1du3a5VNz8eJF8+qM49FXZ5t3Tf78+VNtxVE6E0sXAABgvwwbkzN//nwz/VtnQd3P/v37zau26KiwsDA5dOiQXLp0ya3ZsGGDCTDVqlVza3RGlTet0e0AAAAZFnLu3LljQk737t0lR47/ayzSLimdKbV37145e/asrFixwkwPb9q0qdSqVcvUtGrVyoSZ119/XQ4cOCDr1q2TESNGmPvsOK0wOnX8zJkzMnToUDl+/LjMnDlTli5dKoMGDeKfKgAAyLiQo91UekM/nVXlTad/6z4NMlWqVJHBgwdLhw4dZOXKlW5N9uzZZdWqVeZVW2a6du1qgpD3fXV0+vjq1atN601oaKiZSv7xxx8zfRwAAGTsmBwNMR6P557tOsh327ZtD3y/zr5as2bNfWv0zsn79u37RecJAADsxbOrAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICV0j3kjB49WgICAnyWKlWquPtv3rwp/fr1kyJFishTTz0lHTp0kIsXL/oc49y5c9K2bVvJkyePFC9eXIYMGSK3b9/2qdm6davUrVtXgoKCpGLFirJgwYL0vhQAAJCFZUhLTvXq1eXChQvusmPHDnffoEGDZOXKlbJs2TLZtm2bfP/99/LKK6+4+5OTk03ASUpKkujoaFm4cKEJMCNHjnRrYmNjTU2LFi1k//79MnDgQOnVq5esW7cuIy4HAABkQTky5KA5ckhwcPA92+Pj42Xu3LmyePFi+fWvf222zZ8/X6pWrSo7d+6URo0ayfr16+Xo0aOyceNGKVGihNSuXVvGjRsnw4YNM61EgYGBMnv2bClXrpxMmjTJHEPfr0FqypQpEhERkRGXBAAAspgMack5efKkhISESPny5aVLly6m+0nt3btXbt26JeHh4W6tdmU988wzEhMTY9b1tWbNmibgODS4JCQkyJEjR9wa72M4Nc4xAAAA0r0lp2HDhqZ7qXLlyqarasyYMdKkSRM5fPiwxMXFmZaYggUL+rxHA43uU/rqHXCc/c6++9VoELpx44bkzp07xXNLTEw0i0PrAQCAndI95LRp08b9uVatWib0lClTRpYuXZpq+HhcoqKiTOgCAAD2y/Ap5Npq86tf/UpOnTplxunogOIrV6741OjsKmcMj77ePdvKWX9QTf78+e8bpIYPH27GBTnL+fPn0+06AQCAn4Wcq1evyunTp6VkyZJSr149yZkzp2zatMndf+LECTNmJywszKzr66FDh+TSpUtuzYYNG0yAqVatmlvjfQynxjlGanS6uR7HewEAAHZK95Dz3nvvmanhZ8+eNVPAf/vb30r27NmlU6dOUqBAAenZs6dERkbKli1bzEDkN954w4QTnVmlWrVqZcLM66+/LgcOHDDTwkeMGGHuraMhRfXt21fOnDkjQ4cOlePHj8vMmTNNd5hOTwcAAMiQMTnfffedCTQ//fSTFCtWTJ5//nkzPVx/VjrNO1u2bOYmgDoIWGdFaUhxaCBatWqVvP322yb85M2bV7p37y5jx451a3T6+OrVq02omTZtmpQqVUo+/vhjpo8DAABXgMfj8Yif0tlV2rqk43P8reuq7PurM/sU8BidHd82s08BjxG/3/7FH3+/Ex7y7zfPrgIAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAK6V7yImKipJnn31W8uXLJ8WLF5f27dvLiRMnfGqaN28uAQEBPkvfvn19as6dOydt27aVPHnymOMMGTJEbt++7VOzdetWqVu3rgQFBUnFihVlwYIF6X05AAAgi0r3kLNt2zbp16+f7Ny5UzZs2CC3bt2SVq1aybVr13zqevfuLRcuXHCXCRMmuPuSk5NNwElKSpLo6GhZuHChCTAjR450a2JjY01NixYtZP/+/TJw4EDp1auXrFu3Lr0vCQAAZEE50vuAa9eu9VnXcKItMXv37pWmTZu627WFJjg4OMVjrF+/Xo4ePSobN26UEiVKSO3atWXcuHEybNgwGT16tAQGBsrs2bOlXLlyMmnSJPOeqlWryo4dO2TKlCkSERGR3pcFAACymAwfkxMfH29eCxcu7LN90aJFUrRoUalRo4YMHz5crl+/7u6LiYmRmjVrmoDj0OCSkJAgR44ccWvCw8N9jqk1uj01iYmJ5hjeCwAAsFO6t+R4u3PnjulGeu6550yYcXTu3FnKlCkjISEhcvDgQdNCo+N2Pv/8c7M/Li7OJ+AoZ1333a9Gg8uNGzckd+7cKY4XGjNmTIZcKwAA8KOQo2NzDh8+bLqRvPXp08f9WVtsSpYsKS1btpTTp09LhQoVMux8tMUoMjLSXddAVLp06Qz7PAAAYGF3Vf/+/WXVqlWyZcsWKVWq1H1rGzZsaF5PnTplXnWszsWLF31qnHVnHE9qNfnz50+xFUfpLCzd770AAAA7pXvI8Xg8JuB88cUXsnnzZjM4+EF0dpTSFh0VFhYmhw4dkkuXLrk1OlNLQ0m1atXcmk2bNvkcR2t0OwAAQLaM6KL65JNPZPHixeZeOTp2RhcdJ6O0S0pnSulsq7Nnz8qKFSukW7duZuZVrVq1TI1OOdcw8/rrr8uBAwfMtPARI0aYY2trjNL76pw5c0aGDh0qx48fl5kzZ8rSpUtl0KBB6X1JAAAgC0r3kDNr1iwzo0pv+KctM86yZMkSs1+nf+vUcA0yVapUkcGDB0uHDh1k5cqV7jGyZ89uurr0VVtmunbtaoLQ2LFj3RptIVq9erVpvQkNDTVTyT/++GOmjwMAgIwZeKzdVfejA331hoEPorOv1qxZc98aDVL79u1L8zkCAAD78ewqAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAFiJkAMAAKxEyAEAAFYi5AAAACsRcgAAgJUIOQAAwEqEHAAAYCVCDgAAsBIhBwAAWImQAwAArETIAQAAViLkAAAAKxFyAACAlQg5AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGClLB9yZsyYIWXLlpVcuXJJw4YNZdeuXZl9SgAA4AmQpUPOkiVLJDIyUkaNGiXffvuthIaGSkREhFy6dCmzTw0AAGSyLB1yJk+eLL1795Y33nhDqlWrJrNnz5Y8efLIvHnzMvvUAABAJsuyIScpKUn27t0r4eHh7rZs2bKZ9ZiYmEw9NwAAkPlySBb1448/SnJyspQoUcJnu64fP348xfckJiaaxREfH29eExISxN/cSbye2aeAx8gf/z/uz/j99i/++Pud8P+v2ePx2BlyHkVUVJSMGTPmnu2lS5fOlPMBHpcCUzP7DABkFH/+/f7Xv/4lBQoUsC/kFC1aVLJnzy4XL1702a7rwcHBKb5n+PDhZqCy486dO/Lzzz9LkSJFJCAgIMPPGZmf/DXQnj9/XvLnz5/ZpwMgHfH77V88Ho8JOCEhIfety7IhJzAwUOrVqyebNm2S9u3bu6FF1/v375/ie4KCgszirWDBgo/lfPHk0H8B8i9BwE78fvuPAvdpwcnyIUdpq0z37t2lfv360qBBA5k6dapcu3bNzLYCAAD+LUuHnI4dO8oPP/wgI0eOlLi4OKldu7asXbv2nsHIAADA/2TpkKO0ayq17inAm3ZV6o0j7+6yBJD18fuNlAR4HjT/CgAAIAvKsjcDBAAAuB9CDgAAsBIhBwAAWImQAwAArJTlZ1cBAPyLPrtw3rx55mHMevsQpXe6b9y4sfTo0UOKFSuW2aeIJwSzqwAAWcbu3bslIiJC8uTJI+Hh4e590fSRPnrH++vXr8u6devMTWIBQg78kj7fRu+pof81CCDraNSokYSGhsrs2bPveeag/jnr27evHDx40LTyAIQc+KUDBw5I3bp1JTk5ObNPBUAa5M6dW/bt2ydVqlRJcf/x48elTp06cuPGjcd+bnjyMCYHVlqxYsV99585c+axnQuA9KNjb3bt2pVqyNF9PNoHDkIOrKRPptem7Ps1VN7d1A3gyffee+9Jnz59ZO/evdKyZct7xuT85S9/kQ8//DCzTxNPCLqrYKWnn35aZs6cKS+//HKK+/fv3y/16tWjuwrIgpYsWSJTpkwxQcf5Hc6ePbv5nY6MjJRXX301s08RTwhCDqz00ksvmafSjx07NtUxOdpvf+fOncd+bgDSx61bt8x0clW0aFHJmTNnZp8SnjB0V8FKQ4YMkWvXrqW6v2LFirJly5bHek4A0peGmpIlS2b2aeAJRksOAACwEo91AAAAViLkAAAAKxFyAACAlQg5AJ5YzZs3l4EDBz5U7datW829j65cufKLPrNs2bIyderUX3QMAE8GQg4AALASIQcAAFiJkAMgS/jrX/8q9evXl3z58pnnF3Xu3FkuXbp0T93XX38ttWrVkly5cpknVh8+fNhn/44dO6RJkybmQY+lS5eWd9999773VAKQdRFyAGSZu9uOGzfO3K16+fLlcvbsWenRo0eKN4KcNGmS7N69W4oVKybt2rUz71WnT5+W1q1bS4cOHeTgwYPm8QAaevr3758JVwQgo3HHYwBZwptvvun+XL58efnTn/4kzz77rFy9elWeeuopd9+oUaPkhRdeMD8vXLhQSpUqJV988YV5nlFUVJR06dLFHcxcqVIlc5xmzZrJrFmzTOsPAHvQkgMgS9CHMWqrzDPPPGO6rDSYqHPnzvnUhYWFuT8XLlxYKleuLMeOHTPr2gq0YMECE4qcJSIiwjzDLDY29jFfEYCMRksOgCeejpnRMKLLokWLTDeUhhtdT0pKeujjaKvPW2+9Zcbh3E3DEwC7EHIAPPGOHz8uP/30k4wfP94MFlZ79uxJsXbnzp1uYLl8+bL84x//kKpVq5r1unXrytGjR80DWgHYj+4qAE88DS2BgYHy0UcfyZkzZ2TFihVmEHJKxo4dK5s2bTKzqnRgctGiRaV9+/Zm37BhwyQ6OtoMNN6/f7+cPHlSvvzySwYeA5Yi5AB44mn3lI6lWbZsmVSrVs206Hz44Ycp1uq+AQMGSL169SQuLk5WrlxpApLSqeXbtm0zrTs6jbxOnToycuRICQkJecxXBOBxCPB4PJ7H8kkAAACPES05AADASoQcAABgJUIOAACwEiEHAABYiZADAACsRMgBAABWIuQAAAArEXIAAICVCDkAAMBKhBwAAGAlQg4AALASIQcAAIiN/h/1p9nWmA5w/gAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df.info()\n", - "df['label'].value_counts().plot(kind='bar', title='Class Balance')\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "e3c79fc6", - "metadata": {}, - "source": [ - "# 7. Vectorization + Model Training" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "5741a809", - "metadata": {}, - "outputs": [], - "source": [ - "# vectorizer = TfidfVectorizer(max_features=5000)\n", - "vectorizer = TfidfVectorizer()\n", - "\n", - "X_train_vec = vectorizer.fit_transform(X_train) # transform the training data\n", - "\n", - "X_test_vec = vectorizer.transform(X_test) # transform only\n", - "\n", - "model = LogisticRegression()\n", - "\n", - "model.fit(X_train_vec, y_train) # train the model using the training data\n", - "\n", - "y_pred = model.predict(X_test_vec)\n" - ] - }, - { - "cell_type": "markdown", - "id": "9706ead4", - "metadata": {}, - "source": [ - "# 8. Evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e5b52270", - "metadata": {}, - "outputs": [], - "source": [ - "print(classification_report(y_test, y_pred))\n", - "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "0555f1bf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.9896106398535254" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from sklearn.metrics import f1_score\n", - "f1_score(y_test, y_pred, average='weighted')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "c49ac4a6", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "✅ Correct Predictions Sample:\n", - "11181 White House confident Supreme Court will uphol...\n", - "17721 Illinois man charged with kidnapping, death of...\n", - "34887 NOT KIDDING: Students Are Given Counseling Aft...\n", - "26367 WATCH: Robert De Niro Makes Sure To Perfectly...\n", - "11339 Venezuelans scramble to survive as merchants d...\n", - "dtype: object\n", - "❌ Incorrect Predictions Sample:\n", - "36134 N. KOREA WARNS It Will Strike U.S. Bases In S....\n", - "35791 GOTCHA! CLINTON CHARITY “MISFILED” MILLIONS IN...\n", - "2133 Instant View: Reaction to disbanding of Trump ...\n", - "10261 Trump's 'obsession' with anchorwoman Kelly is ...\n", - "37012 CANADA’S OBAMA? WATCH New Prime Minister Call ...\n", - "dtype: object\n" - ] - } - ], - "source": [ - "# Show correctly predicted examples\n", - "correct = X_test[y_test == y_pred]\n", - "print(\"✅ Correct Predictions Sample:\")\n", - "print(correct.sample(5))\n", - "\n", - "# Show incorrect predictions\n", - "incorrect = X_test[y_test != y_pred]\n", - "print(\"❌ Incorrect Predictions Sample:\")\n", - "print(incorrect.sample(5))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "033beb70", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "🔍 Top predictive words: [(19.84848710357449, 'reuters'), (18.659089790349725, 'said'), (8.91313089066927, 'on'), (6.749188523164079, 'washington'), (5.338842565654789, 'in'), (4.882292853983793, 'republican'), (4.379473297016656, 'wednesday'), (4.0831862881709515, 'thursday'), (4.007826342292402, 'tuesday'), (3.8843498360894237, 'presidential'), (3.4691150007402456, 'had'), (3.4679091837900216, 'friday'), (3.446936293565944, 'minister'), (3.424262556326454, 'edt'), (3.3858258917952204, 'nov'), (3.307878382165057, 'reporters'), (3.24999287779387, 'told'), (3.2313675932622723, 'democratic'), (3.213999961333914, 'its'), (3.144683560067598, 'monday')]\n" - ] - } - ], - "source": [ - "feature_names = vectorizer.get_feature_names_out()\n", - "coefficients = model.coef_[0]\n", - "top_features = sorted(zip(coefficients, feature_names), reverse=True)[:20]\n", - "print(\"🔍 Top predictive words:\", top_features)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "d141d50c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "F1-score: 0.9896106398535254\n" - ] - } - ], - "source": [ - "from sklearn.metrics import f1_score\n", - "print(\"F1-score:\", f1_score(y_test, y_pred, average='weighted'))\n" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "84c99dc5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "🔝 Words pushing prediction to 1: ['presidential' 'tuesday' 'thursday' 'wednesday' 'republican' 'in'\n", - " 'washington' 'on' 'said' 'reuters']\n", - "🔻 Words pushing prediction to 0: ['video' 'via' 'this' 'just' 'hillary' 'gop' 'is' 'that' 'com' 'image']\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "\n", - "feature_names = vectorizer.get_feature_names_out()\n", - "coefficients = model.coef_[0]\n", - "top_positive = np.argsort(coefficients)[-10:]\n", - "top_negative = np.argsort(coefficients)[:10]\n", - "\n", - "print(\"🔝 Words pushing prediction to 1:\", feature_names[top_positive])\n", - "print(\"🔻 Words pushing prediction to 0:\", feature_names[top_negative])\n" - ] - }, - { - "cell_type": "markdown", - "id": "e050b32c", - "metadata": {}, - "source": [ - "# 9. Predict Validation Set" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "d4f99cf7", - "metadata": {}, - "outputs": [], - "source": [ - "val_df = pd.read_csv(\"dataset/validation_data.csv\")\n", - "val_df.head()\n", - "# print(val_df['label'].unique())\n", - "\n", - "val_df['text_clean'] = (val_df['title'] + \" \" + val_df['text']).apply(clean_text)\n", - "X_val_vec = vectorizer.transform(val_df['text_clean'])\n", - "val_df['label'] = model.predict(X_val_vec)\n", - "\n", - "# Save predictions\n", - "val_df.reset_index(inplace=True)\n", - "val_df[['index', 'label']].to_csv(\"predictions.csv\", index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/fn.ipynb b/fn.ipynb deleted file mode 100644 index e69de29..0000000 diff --git a/jn-fake-news.ipynb b/jn-fake-news.ipynb deleted file mode 100644 index d2d3d24..0000000 --- a/jn-fake-news.ipynb +++ /dev/null @@ -1,252 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "cb329d99", - "metadata": {}, - "source": [ - "# Project: Fake News Classification" - ] - }, - { - "cell_type": "markdown", - "id": "24995232", - "metadata": {}, - "source": [ - "# 1. Imports & Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b24551f5", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install pandas\n", - "import pandas as pd\n", - "import numpy as np\n", - "%pip install matplotlib\n", - "import matplotlib.pyplot as plt\n", - "%pip install seaborn\n", - "import seaborn as sns\n", - "import re\n", - "import string\n", - "\n", - "%pip install scikit-learn\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.feature_extraction.text import TfidfVectorizer\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.metrics import classification_report, confusion_matrix\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "id": "9b5d0a83", - "metadata": {}, - "source": [ - "# 2. Load Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88a6ab38", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv(\"dataset/data.csv\")\n", - "val_df = pd.read_csv(\"dataset/validation_data.csv\")\n", - "df.head()\n", - "val_df.head()\n", - "# print(val_df['label'].unique())\n" - ] - }, - { - "cell_type": "markdown", - "id": "6937c16d", - "metadata": {}, - "source": [ - "# 3. Clening Up Data" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "b441f8e6", - "metadata": {}, - "outputs": [], - "source": [ - "# Remove duplicate rows based on the 'text' column\n", - "df = df.drop_duplicates(subset=['text']) \n", - "\n", - "# Remove rows with 'text' is NaN\n", - "df = df.dropna(subset=['text']) \n", - "\n", - "# Remove rows with 'label' is NaN\n", - "df = df.dropna(subset=['label']) \n", - "\n", - "# Remove rows with 'text' empty or only with whitespace\n", - "df = df[df['text'].str.strip() != ''] " - ] - }, - { - "cell_type": "markdown", - "id": "879ec064", - "metadata": {}, - "source": [ - "# 4. Preprocessing" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "9a740ccc", - "metadata": {}, - "outputs": [], - "source": [ - "def clean_text(text):\n", - " text = text.lower()\n", - " text = re.sub(r'\\[.*?\\]', '', text)\n", - " text = re.sub(r'http\\S+|www\\S+|https\\S+', '', text)\n", - " text = re.sub(r'<.*?>+', '', text)\n", - " text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)\n", - " text = re.sub(r'\\n', '', text)\n", - " text = re.sub(r'\\w*\\d\\w*', '', text)\n", - " return text\n", - "\n", - "df['text_clean'] = df['title'] + \" \" + df['text']\n", - "# df['text_clean'] = df['text_clean'].apply(clean_text)\n" - ] - }, - { - "cell_type": "markdown", - "id": "b88121d1", - "metadata": {}, - "source": [ - "# 5. Data Exploration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "55e16def", - "metadata": {}, - "outputs": [], - "source": [ - "df.info()\n", - "df['label'].value_counts().plot(kind='bar', title='Class Balance')\n" - ] - }, - { - "cell_type": "markdown", - "id": "6f814b30", - "metadata": {}, - "source": [ - "# 6. Train/Test Split" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "4679c0d5", - "metadata": {}, - "outputs": [], - "source": [ - "X = df['title'] + \" \" + df['text_clean']\n", - "y = df['label']\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n" - ] - }, - { - "cell_type": "markdown", - "id": "e3c79fc6", - "metadata": {}, - "source": [ - "# 7. Vectorization + Model Training" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "5741a809", - "metadata": {}, - "outputs": [], - "source": [ - "vectorizer = TfidfVectorizer(max_features=5000)\n", - "X_train_vec = vectorizer.fit_transform(X_train)\n", - "X_test_vec = vectorizer.transform(X_test)\n", - "\n", - "model = LogisticRegression()\n", - "model.fit(X_train_vec, y_train)\n", - "\n", - "y_pred = model.predict(X_test_vec)\n" - ] - }, - { - "cell_type": "markdown", - "id": "9706ead4", - "metadata": {}, - "source": [ - "# 8. Evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e5b52270", - "metadata": {}, - "outputs": [], - "source": [ - "print(classification_report(y_test, y_pred))\n", - "sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d')\n" - ] - }, - { - "cell_type": "markdown", - "id": "e050b32c", - "metadata": {}, - "source": [ - "# 9. Predict Validation Set" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "d4f99cf7", - "metadata": {}, - "outputs": [], - "source": [ - "val_df['text_clean'] = (val_df['title'] + \" \" + val_df['text']).apply(clean_text)\n", - "X_val_vec = vectorizer.transform(val_df['text_clean'])\n", - "val_df['label'] = model.predict(X_val_vec)\n", - "\n", - "# Save predictions\n", - "val_df.reset_index(inplace=True)\n", - "val_df[['index', 'label']].to_csv(\"predictions.csv\", index=False)\n" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/main.py b/main.py new file mode 100644 index 0000000..980e6d2 --- /dev/null +++ b/main.py @@ -0,0 +1,182 @@ +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.pipeline import Pipeline +from sklearn.dummy import DummyClassifier +import seaborn as sns +import matplotlib.pyplot as plt +import re +import string +import nltk +from nltk.tokenize import word_tokenize +from nltk.stem import WordNetLemmatizer +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import classification_report, confusion_matrix +from sklearn.base import BaseEstimator, TransformerMixin +from sklearn.pipeline import Pipeline +from sklearn.dummy import DummyClassifier +import seaborn as sns +import matplotlib.pyplot as plt +import re +import string +import nltk +from nltk.tokenize import word_tokenize +from nltk.stem import WordNetLemmatizer +import joblib +import os + +# Download required NLTK data +nltk.download('wordnet') +nltk.download('punkt') + +# Custom text preprocessor class +class TextPreprocessor(BaseEstimator, TransformerMixin): + def __init__(self): + self.lemmatizer = WordNetLemmatizer() + + def clean_text(self, text): + text = text.lower() + text = re.sub(r'\[.*?\]', '', text) + text = re.sub(r'http\S+|www\S+|https\S+', '', text) + text = re.sub(r'<.*?>+', '', text) + text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text) + text = re.sub(r'\n', '', text) + text = re.sub(r'\w*\d\w*', '', text) + + tokens = word_tokenize(text) + lemmatized_tokens = [self.lemmatizer.lemmatize(token) for token in tokens] + return ' '.join(lemmatized_tokens) + + def fit(self, X, y=None): + return self + + def transform(self, X): + # Check if 'cleaned_text' column exists, if so, return it directly + if isinstance(X, pd.DataFrame) and 'cleaned_text' in X.columns: + return X['cleaned_text'] + + # Otherwise, perform cleaning + if isinstance(X, pd.DataFrame): + # Combine title and text for DataFrame input + return (X['title'] + " " + X['text']).apply(self.clean_text) + return X.apply(self.clean_text) + +# Define cache file paths +TRAIN_CACHE_PATH = 'dataset/train_cleaned.parquet' +TEST_CACHE_PATH = 'dataset/test_cleaned.parquet' + +# Load and prepare base data +df = pd.read_csv("dataset/data.csv") + +# Data cleaning +df = df[ + (df['title'] != '') & + (df['text'] != '') & + (df['text'].str.strip() != '') +].drop_duplicates(subset=['text']).dropna(subset=['text', 'label']) + +# Split data FIRST to prevent leakage +train_df, test_df = train_test_split( + df[['title', 'text', 'label']], # Keep raw text for pipeline processing + test_size=0.2, + random_state=42, + stratify=df['label'] +) + +print("\nTraining set class distribution:") +print(train_df['label'].value_counts()) +print("\nTest set class distribution:") +print(test_df['label'].value_counts()) + +# Check for cached cleaned data +if os.path.exists(TRAIN_CACHE_PATH) and os.path.exists(TEST_CACHE_PATH): + print("Loading cleaned data from cache...") + train_df = pd.read_parquet(TRAIN_CACHE_PATH) + test_df = pd.read_parquet(TEST_CACHE_PATH) + print("Cleaned data loaded.") +else: + print("Cleaning data and saving to cache...") + # Apply cleaning and add 'cleaned_text' column + preprocessor = TextPreprocessor() + train_df['cleaned_text'] = preprocessor.transform(train_df) + test_df['cleaned_text'] = preprocessor.transform(test_df) + + # Save cleaned data to parquet + train_df.to_parquet(TRAIN_CACHE_PATH, index=False) + test_df.to_parquet(TEST_CACHE_PATH, index=False) + print("Cleaned data saved to cache.") + + +# Create preprocessing pipeline +# The TextPreprocessor will now use the 'cleaned_text' column if it exists +pipeline = Pipeline([ + ('preprocessor', TextPreprocessor()), + ('vectorizer', TfidfVectorizer( + max_features=8000, + stop_words='english', + min_df=5, + max_df=0.8 + )), + ('classifier', LogisticRegression( + class_weight='balanced', + solver='saga', + penalty='l1', + C=0.5, + max_iter=1000, + random_state=42 + )) +]) + +# Train model +# The pipeline will now use the 'cleaned_text' column from the loaded dataframes +pipeline.fit(train_df, train_df['label']) + +# Evaluate +y_pred = pipeline.predict(test_df) +print("Evaluation on Test Set:") +print(classification_report(test_df['label'], y_pred)) +sns.heatmap(confusion_matrix(test_df['label'], y_pred), annot=True, fmt='d') +plt.title("Confusion Matrix (Test Set)") +plt.show() + +# Evaluate on Training Set +print("\nEvaluation on Training Set:") +y_train_pred = pipeline.predict(train_df) +print(classification_report(train_df['label'], y_train_pred)) +sns.heatmap(confusion_matrix(train_df['label'], y_train_pred), annot=True, fmt='d') +plt.title("Confusion Matrix (Training Set)") +plt.show() + +# Save entire pipeline +joblib.dump(pipeline, 'text_classification_pipeline.pkl') + +# Baseline evaluation +dummy = DummyClassifier(strategy="most_frequent") +# Use the original text columns for baseline evaluation +dummy.fit(train_df[['title', 'text']], train_df['label']) +print("Baseline accuracy:", dummy.score(test_df[['title', 'text']], test_df['label'])) + +# Validation processing (using same lemmatization) +validation_df = pd.read_csv("dataset/validation_data.csv").fillna({'title': '', 'text': ''}) +# The TextPreprocessor will clean the validation data as it's not cached +validation_df['cleaned_text'] = TextPreprocessor().transform(validation_df) +print("Validation data processed.") + +print("Generating predictions for validation data...") +# Predict using the cleaned text column +predictions = pipeline.predict(validation_df) +validation_df['predicted_label'] = predictions +print("Predictions generated.") + +# Save results +print("Saving validation predictions...") +validation_df[['title', 'predicted_label']].to_csv('validation_predictions.csv', index=False) +print("Validation predictions saved.") +print("\nPrediction counts:") +print(validation_df['predicted_label'].value_counts()) diff --git a/predictions.csv b/predictions.csv deleted file mode 100644 index 6861553..0000000 --- a/predictions.csv +++ /dev/null @@ -1,4957 +0,0 @@ -index,label -0,1 -1,1 -2,1 -3,1 -4,1 -5,1 -6,1 -7,1 -8,1 -9,1 -10,1 -11,1 -12,1 -13,1 -14,1 -15,1 -16,1 -17,1 -18,1 -19,1 -20,1 -21,1 -22,1 -23,1 -24,1 -25,1 -26,1 -27,1 -28,1 -29,1 -30,1 -31,1 -32,1 -33,1 -34,1 -35,1 -36,1 -37,1 -38,1 -39,1 -40,1 -41,1 -42,1 -43,1 -44,1 -45,1 -46,1 -47,1 -48,1 -49,1 -50,1 -51,1 -52,1 -53,1 -54,1 -55,1 -56,1 -57,1 -58,1 -59,1 -60,1 -61,1 -62,1 -63,1 -64,1 -65,1 -66,1 -67,1 -68,1 -69,1 -70,1 -71,1 -72,1 -73,1 -74,1 -75,1 -76,1 -77,1 -78,1 -79,1 -80,1 -81,1 -82,1 -83,1 -84,1 -85,1 -86,1 -87,1 -88,1 -89,1 -90,1 -91,1 -92,1 -93,1 -94,1 -95,1 -96,1 -97,1 -98,1 -99,1 -100,1 -101,1 -102,1 -103,1 -104,1 -105,1 -106,1 -107,1 -108,1 -109,1 -110,1 -111,1 -112,1 -113,1 -114,1 -115,1 -116,1 -117,1 -118,1 -119,1 -120,1 -121,1 -122,1 -123,1 -124,1 -125,1 -126,1 -127,1 -128,1 -129,1 -130,1 -131,1 -132,1 -133,1 -134,1 -135,1 -136,1 -137,1 -138,1 -139,1 -140,1 -141,1 -142,1 -143,1 -144,1 -145,1 -146,1 -147,1 -148,1 -149,1 -150,1 -151,1 -152,1 -153,1 -154,1 -155,1 -156,1 -157,1 -158,1 -159,1 -160,1 -161,1 -162,1 -163,1 -164,1 -165,1 -166,1 -167,1 -168,1 -169,1 -170,1 -171,1 -172,1 -173,1 -174,1 -175,1 -176,1 -177,1 -178,1 -179,1 -180,1 -181,1 -182,1 -183,1 -184,1 -185,1 -186,1 -187,1 -188,1 -189,1 -190,1 -191,1 -192,1 -193,1 -194,1 -195,1 -196,1 -197,1 -198,1 -199,1 -200,1 -201,1 -202,1 -203,1 -204,1 -205,1 -206,1 -207,1 -208,1 -209,1 -210,1 -211,1 -212,1 -213,1 -214,1 -215,1 -216,1 -217,1 -218,1 -219,1 -220,1 -221,1 -222,1 -223,1 -224,1 -225,1 -226,1 -227,1 -228,1 -229,1 -230,1 -231,1 -232,1 -233,1 -234,1 -235,1 -236,1 -237,1 -238,1 -239,1 -240,0 -241,1 -242,1 -243,1 -244,1 -245,1 -246,1 -247,1 -248,1 -249,1 -250,1 -251,1 -252,1 -253,1 -254,1 -255,1 -256,1 -257,1 -258,1 -259,1 -260,1 -261,1 -262,1 -263,1 -264,1 -265,1 -266,1 -267,1 -268,1 -269,1 -270,1 -271,1 -272,1 -273,1 -274,1 -275,1 -276,1 -277,1 -278,1 -279,1 -280,1 -281,1 -282,1 -283,1 -284,1 -285,1 -286,1 -287,1 -288,1 -289,1 -290,1 -291,1 -292,1 -293,1 -294,1 -295,1 -296,1 -297,1 -298,1 -299,1 -300,1 -301,1 -302,1 -303,1 -304,1 -305,1 -306,1 -307,1 -308,1 -309,1 -310,1 -311,1 -312,1 -313,1 -314,1 -315,1 -316,1 -317,1 -318,1 -319,1 -320,1 -321,1 -322,1 -323,1 -324,1 -325,1 -326,1 -327,1 -328,1 -329,1 -330,1 -331,1 -332,1 -333,1 -334,1 -335,1 -336,1 -337,1 -338,1 -339,1 -340,1 -341,1 -342,1 -343,1 -344,1 -345,1 -346,1 -347,1 -348,1 -349,1 -350,1 -351,1 -352,1 -353,1 -354,1 -355,1 -356,1 -357,1 -358,1 -359,1 -360,1 -361,1 -362,1 -363,1 -364,1 -365,1 -366,1 -367,1 -368,1 -369,1 -370,1 -371,1 -372,1 -373,1 -374,1 -375,1 -376,1 -377,1 -378,1 -379,1 -380,1 -381,1 -382,1 -383,1 -384,1 -385,1 -386,1 -387,1 -388,1 -389,1 -390,1 -391,1 -392,1 -393,1 -394,1 -395,1 -396,1 -397,1 -398,1 -399,1 -400,1 -401,1 -402,1 -403,1 -404,1 -405,1 -406,1 -407,1 -408,1 -409,1 -410,1 -411,1 -412,1 -413,1 -414,1 -415,1 -416,1 -417,1 -418,1 -419,1 -420,1 -421,1 -422,1 -423,1 -424,1 -425,1 -426,1 -427,1 -428,1 -429,1 -430,1 -431,1 -432,0 -433,1 -434,1 -435,1 -436,1 -437,1 -438,1 -439,1 -440,1 -441,1 -442,1 -443,1 -444,1 -445,1 -446,1 -447,1 -448,1 -449,1 -450,1 -451,1 -452,1 -453,1 -454,1 -455,1 -456,1 -457,1 -458,1 -459,1 -460,1 -461,1 -462,1 -463,1 -464,1 -465,1 -466,1 -467,1 -468,1 -469,1 -470,1 -471,1 -472,1 -473,1 -474,1 -475,1 -476,1 -477,1 -478,1 -479,1 -480,1 -481,1 -482,1 -483,1 -484,1 -485,1 -486,1 -487,1 -488,1 -489,1 -490,1 -491,1 -492,1 -493,1 -494,1 -495,1 -496,0 -497,1 -498,1 -499,1 -500,1 -501,1 -502,1 -503,1 -504,1 -505,1 -506,1 -507,1 -508,1 -509,1 -510,1 -511,1 -512,1 -513,1 -514,1 -515,1 -516,1 -517,0 -518,1 -519,1 -520,1 -521,1 -522,1 -523,1 -524,1 -525,1 -526,1 -527,1 -528,1 -529,1 -530,1 -531,1 -532,1 -533,1 -534,0 -535,1 -536,1 -537,1 -538,1 -539,1 -540,1 -541,1 -542,1 -543,1 -544,1 -545,1 -546,1 -547,1 -548,1 -549,1 -550,1 -551,1 -552,1 -553,1 -554,1 -555,1 -556,1 -557,1 -558,1 -559,1 -560,1 -561,1 -562,1 -563,1 -564,1 -565,1 -566,1 -567,1 -568,1 -569,1 -570,1 -571,1 -572,1 -573,1 -574,1 -575,1 -576,1 -577,1 -578,1 -579,1 -580,1 -581,1 -582,1 -583,1 -584,1 -585,1 -586,1 -587,1 -588,1 -589,1 -590,1 -591,1 -592,1 -593,1 -594,1 -595,1 -596,1 -597,1 -598,1 -599,1 -600,1 -601,1 -602,1 -603,1 -604,1 -605,1 -606,1 -607,1 -608,1 -609,1 -610,1 -611,1 -612,1 -613,1 -614,1 -615,1 -616,1 -617,1 -618,1 -619,1 -620,1 -621,1 -622,1 -623,1 -624,1 -625,1 -626,1 -627,1 -628,1 -629,1 -630,1 -631,1 -632,1 -633,1 -634,1 -635,1 -636,1 -637,1 -638,1 -639,1 -640,1 -641,1 -642,1 -643,1 -644,1 -645,1 -646,1 -647,1 -648,1 -649,1 -650,1 -651,1 -652,1 -653,1 -654,1 -655,1 -656,1 -657,1 -658,1 -659,1 -660,1 -661,1 -662,1 -663,1 -664,1 -665,1 -666,1 -667,1 -668,1 -669,1 -670,1 -671,1 -672,1 -673,1 -674,1 -675,1 -676,1 -677,1 -678,1 -679,1 -680,1 -681,1 -682,1 -683,1 -684,1 -685,1 -686,1 -687,1 -688,1 -689,1 -690,1 -691,1 -692,1 -693,1 -694,1 -695,1 -696,1 -697,1 -698,1 -699,1 -700,1 -701,1 -702,1 -703,1 -704,1 -705,1 -706,1 -707,1 -708,1 -709,1 -710,1 -711,1 -712,1 -713,1 -714,1 -715,1 -716,1 -717,1 -718,1 -719,1 -720,1 -721,1 -722,1 -723,1 -724,1 -725,1 -726,1 -727,1 -728,1 -729,1 -730,1 -731,1 -732,1 -733,1 -734,1 -735,1 -736,1 -737,1 -738,1 -739,1 -740,1 -741,1 -742,1 -743,1 -744,1 -745,1 -746,1 -747,1 -748,1 -749,1 -750,1 -751,1 -752,1 -753,1 -754,1 -755,1 -756,1 -757,1 -758,1 -759,1 -760,1 -761,1 -762,1 -763,1 -764,1 -765,1 -766,1 -767,1 -768,1 -769,1 -770,1 -771,1 -772,1 -773,1 -774,1 -775,1 -776,1 -777,1 -778,1 -779,1 -780,1 -781,1 -782,1 -783,1 -784,1 -785,1 -786,1 -787,1 -788,1 -789,1 -790,1 -791,1 -792,1 -793,1 -794,1 -795,1 -796,1 -797,1 -798,1 -799,1 -800,1 -801,1 -802,1 -803,1 -804,1 -805,1 -806,1 -807,1 -808,1 -809,1 -810,1 -811,1 -812,1 -813,1 -814,1 -815,1 -816,1 -817,1 -818,1 -819,1 -820,1 -821,1 -822,1 -823,1 -824,1 -825,1 -826,1 -827,1 -828,1 -829,1 -830,1 -831,1 -832,1 -833,1 -834,1 -835,1 -836,1 -837,1 -838,1 -839,1 -840,1 -841,1 -842,1 -843,1 -844,1 -845,1 -846,1 -847,1 -848,1 -849,1 -850,1 -851,1 -852,1 -853,1 -854,1 -855,1 -856,1 -857,1 -858,1 -859,1 -860,1 -861,1 -862,1 -863,1 -864,1 -865,1 -866,1 -867,1 -868,1 -869,1 -870,1 -871,1 -872,1 -873,1 -874,1 -875,1 -876,1 -877,1 -878,1 -879,1 -880,1 -881,1 -882,1 -883,1 -884,1 -885,1 -886,1 -887,1 -888,1 -889,1 -890,1 -891,1 -892,1 -893,1 -894,1 -895,1 -896,1 -897,1 -898,1 -899,1 -900,1 -901,1 -902,1 -903,1 -904,1 -905,1 -906,1 -907,1 -908,1 -909,1 -910,1 -911,1 -912,1 -913,1 -914,1 -915,1 -916,1 -917,1 -918,1 -919,1 -920,1 -921,1 -922,1 -923,1 -924,1 -925,1 -926,1 -927,1 -928,1 -929,1 -930,1 -931,1 -932,1 -933,1 -934,1 -935,1 -936,1 -937,1 -938,1 -939,1 -940,1 -941,1 -942,1 -943,1 -944,1 -945,1 -946,1 -947,1 -948,1 -949,1 -950,1 -951,1 -952,1 -953,1 -954,1 -955,1 -956,1 -957,1 -958,1 -959,1 -960,1 -961,1 -962,1 -963,1 -964,1 -965,1 -966,1 -967,1 -968,1 -969,1 -970,1 -971,1 -972,1 -973,1 -974,1 -975,1 -976,1 -977,1 -978,1 -979,1 -980,1 -981,1 -982,1 -983,1 -984,1 -985,1 -986,1 -987,1 -988,1 -989,1 -990,1 -991,1 -992,1 -993,1 -994,1 -995,1 -996,1 -997,1 -998,1 -999,1 -1000,1 -1001,1 -1002,1 -1003,1 -1004,1 -1005,1 -1006,1 -1007,1 -1008,1 -1009,1 -1010,1 -1011,1 -1012,1 -1013,1 -1014,1 -1015,1 -1016,1 -1017,1 -1018,1 -1019,1 -1020,1 -1021,1 -1022,1 -1023,1 -1024,1 -1025,1 -1026,1 -1027,1 -1028,1 -1029,1 -1030,1 -1031,1 -1032,1 -1033,1 -1034,1 -1035,1 -1036,1 -1037,1 -1038,1 -1039,1 -1040,1 -1041,1 -1042,1 -1043,1 -1044,1 -1045,1 -1046,1 -1047,1 -1048,1 -1049,1 -1050,1 -1051,1 -1052,1 -1053,1 -1054,1 -1055,1 -1056,1 -1057,1 -1058,1 -1059,1 -1060,1 -1061,1 -1062,1 -1063,1 -1064,1 -1065,1 -1066,1 -1067,1 -1068,1 -1069,1 -1070,1 -1071,1 -1072,1 -1073,1 -1074,1 -1075,1 -1076,1 -1077,0 -1078,1 -1079,1 -1080,1 -1081,1 -1082,1 -1083,1 -1084,1 -1085,1 -1086,1 -1087,1 -1088,1 -1089,1 -1090,1 -1091,1 -1092,1 -1093,1 -1094,1 -1095,1 -1096,1 -1097,1 -1098,1 -1099,1 -1100,1 -1101,1 -1102,1 -1103,1 -1104,1 -1105,1 -1106,1 -1107,1 -1108,1 -1109,1 -1110,1 -1111,1 -1112,1 -1113,1 -1114,1 -1115,1 -1116,1 -1117,1 -1118,1 -1119,1 -1120,1 -1121,1 -1122,1 -1123,1 -1124,1 -1125,1 -1126,1 -1127,1 -1128,1 -1129,1 -1130,1 -1131,1 -1132,1 -1133,1 -1134,1 -1135,1 -1136,1 -1137,1 -1138,1 -1139,1 -1140,1 -1141,1 -1142,1 -1143,0 -1144,1 -1145,1 -1146,1 -1147,1 -1148,1 -1149,1 -1150,1 -1151,1 -1152,1 -1153,1 -1154,1 -1155,1 -1156,1 -1157,1 -1158,1 -1159,1 -1160,1 -1161,1 -1162,1 -1163,1 -1164,1 -1165,1 -1166,1 -1167,1 -1168,1 -1169,1 -1170,1 -1171,1 -1172,1 -1173,1 -1174,1 -1175,1 -1176,1 -1177,1 -1178,1 -1179,1 -1180,1 -1181,1 -1182,1 -1183,1 -1184,1 -1185,1 -1186,1 -1187,1 -1188,1 -1189,1 -1190,1 -1191,1 -1192,1 -1193,1 -1194,1 -1195,1 -1196,1 -1197,1 -1198,1 -1199,1 -1200,1 -1201,1 -1202,1 -1203,1 -1204,1 -1205,1 -1206,1 -1207,1 -1208,1 -1209,1 -1210,1 -1211,1 -1212,1 -1213,1 -1214,1 -1215,1 -1216,1 -1217,1 -1218,1 -1219,1 -1220,1 -1221,1 -1222,1 -1223,1 -1224,1 -1225,1 -1226,1 -1227,1 -1228,1 -1229,1 -1230,1 -1231,1 -1232,1 -1233,1 -1234,1 -1235,1 -1236,1 -1237,1 -1238,1 -1239,1 -1240,1 -1241,1 -1242,1 -1243,1 -1244,1 -1245,1 -1246,1 -1247,1 -1248,1 -1249,1 -1250,1 -1251,1 -1252,1 -1253,1 -1254,1 -1255,1 -1256,1 -1257,1 -1258,1 -1259,1 -1260,1 -1261,1 -1262,1 -1263,1 -1264,1 -1265,1 -1266,1 -1267,1 -1268,1 -1269,1 -1270,1 -1271,1 -1272,1 -1273,1 -1274,1 -1275,1 -1276,1 -1277,1 -1278,1 -1279,1 -1280,1 -1281,1 -1282,1 -1283,1 -1284,1 -1285,1 -1286,1 -1287,1 -1288,1 -1289,1 -1290,1 -1291,1 -1292,1 -1293,1 -1294,1 -1295,1 -1296,1 -1297,1 -1298,1 -1299,1 -1300,1 -1301,1 -1302,1 -1303,1 -1304,1 -1305,1 -1306,1 -1307,1 -1308,1 -1309,1 -1310,1 -1311,1 -1312,1 -1313,1 -1314,1 -1315,1 -1316,1 -1317,1 -1318,1 -1319,1 -1320,1 -1321,1 -1322,1 -1323,1 -1324,1 -1325,1 -1326,1 -1327,1 -1328,1 -1329,1 -1330,1 -1331,1 -1332,1 -1333,1 -1334,1 -1335,1 -1336,1 -1337,1 -1338,1 -1339,1 -1340,1 -1341,1 -1342,1 -1343,0 -1344,0 -1345,1 -1346,1 -1347,1 -1348,1 -1349,1 -1350,1 -1351,1 -1352,1 -1353,1 -1354,1 -1355,1 -1356,1 -1357,1 -1358,1 -1359,1 -1360,1 -1361,1 -1362,1 -1363,1 -1364,1 -1365,1 -1366,1 -1367,1 -1368,1 -1369,1 -1370,1 -1371,1 -1372,1 -1373,1 -1374,1 -1375,1 -1376,1 -1377,1 -1378,1 -1379,1 -1380,1 -1381,1 -1382,1 -1383,1 -1384,1 -1385,1 -1386,1 -1387,1 -1388,1 -1389,1 -1390,1 -1391,1 -1392,1 -1393,1 -1394,1 -1395,1 -1396,1 -1397,1 -1398,1 -1399,1 -1400,1 -1401,0 -1402,1 -1403,1 -1404,1 -1405,1 -1406,1 -1407,1 -1408,1 -1409,1 -1410,1 -1411,1 -1412,1 -1413,1 -1414,1 -1415,1 -1416,1 -1417,1 -1418,0 -1419,0 -1420,0 -1421,0 -1422,0 -1423,0 -1424,0 -1425,0 -1426,0 -1427,0 -1428,0 -1429,0 -1430,0 -1431,0 -1432,0 -1433,0 -1434,0 -1435,0 -1436,0 -1437,0 -1438,0 -1439,0 -1440,0 -1441,0 -1442,0 -1443,0 -1444,0 -1445,0 -1446,0 -1447,0 -1448,0 -1449,0 -1450,0 -1451,0 -1452,0 -1453,0 -1454,0 -1455,0 -1456,0 -1457,0 -1458,0 -1459,0 -1460,0 -1461,0 -1462,0 -1463,0 -1464,0 -1465,0 -1466,0 -1467,0 -1468,0 -1469,0 -1470,0 -1471,0 -1472,0 -1473,0 -1474,0 -1475,0 -1476,0 -1477,0 -1478,0 -1479,0 -1480,0 -1481,0 -1482,0 -1483,0 -1484,0 -1485,0 -1486,0 -1487,0 -1488,0 -1489,0 -1490,0 -1491,0 -1492,0 -1493,0 -1494,0 -1495,0 -1496,0 -1497,0 -1498,0 -1499,0 -1500,0 -1501,0 -1502,0 -1503,0 -1504,0 -1505,0 -1506,0 -1507,0 -1508,0 -1509,0 -1510,0 -1511,0 -1512,0 -1513,0 -1514,0 -1515,0 -1516,0 -1517,0 -1518,0 -1519,0 -1520,0 -1521,0 -1522,0 -1523,1 -1524,0 -1525,0 -1526,0 -1527,0 -1528,0 -1529,0 -1530,0 -1531,0 -1532,0 -1533,0 -1534,0 -1535,0 -1536,0 -1537,0 -1538,0 -1539,0 -1540,0 -1541,0 -1542,0 -1543,0 -1544,0 -1545,0 -1546,0 -1547,0 -1548,0 -1549,0 -1550,0 -1551,0 -1552,0 -1553,0 -1554,0 -1555,0 -1556,0 -1557,0 -1558,0 -1559,0 -1560,0 -1561,0 -1562,0 -1563,0 -1564,0 -1565,0 -1566,0 -1567,0 -1568,0 -1569,0 -1570,0 -1571,0 -1572,0 -1573,0 -1574,0 -1575,0 -1576,0 -1577,0 -1578,1 -1579,0 -1580,0 -1581,0 -1582,0 -1583,0 -1584,0 -1585,0 -1586,0 -1587,0 -1588,0 -1589,0 -1590,0 -1591,0 -1592,0 -1593,0 -1594,0 -1595,0 -1596,0 -1597,0 -1598,0 -1599,0 -1600,0 -1601,0 -1602,0 -1603,0 -1604,0 -1605,0 -1606,0 -1607,0 -1608,0 -1609,0 -1610,0 -1611,0 -1612,0 -1613,0 -1614,0 -1615,0 -1616,0 -1617,0 -1618,0 -1619,0 -1620,0 -1621,0 -1622,0 -1623,0 -1624,0 -1625,0 -1626,0 -1627,0 -1628,0 -1629,0 -1630,0 -1631,0 -1632,0 -1633,0 -1634,0 -1635,0 -1636,0 -1637,0 -1638,0 -1639,0 -1640,0 -1641,0 -1642,0 -1643,0 -1644,0 -1645,0 -1646,0 -1647,0 -1648,0 -1649,0 -1650,0 -1651,0 -1652,0 -1653,0 -1654,0 -1655,0 -1656,0 -1657,0 -1658,0 -1659,0 -1660,0 -1661,0 -1662,0 -1663,0 -1664,0 -1665,0 -1666,0 -1667,0 -1668,0 -1669,0 -1670,0 -1671,0 -1672,0 -1673,0 -1674,0 -1675,0 -1676,0 -1677,0 -1678,0 -1679,0 -1680,0 -1681,0 -1682,0 -1683,0 -1684,0 -1685,0 -1686,0 -1687,0 -1688,0 -1689,0 -1690,0 -1691,0 -1692,0 -1693,0 -1694,0 -1695,0 -1696,0 -1697,0 -1698,0 -1699,0 -1700,0 -1701,0 -1702,0 -1703,0 -1704,0 -1705,0 -1706,0 -1707,0 -1708,0 -1709,1 -1710,0 -1711,0 -1712,0 -1713,0 -1714,0 -1715,0 -1716,0 -1717,0 -1718,0 -1719,0 -1720,0 -1721,0 -1722,0 -1723,0 -1724,0 -1725,0 -1726,0 -1727,0 -1728,0 -1729,0 -1730,0 -1731,0 -1732,0 -1733,0 -1734,0 -1735,0 -1736,0 -1737,0 -1738,0 -1739,0 -1740,0 -1741,0 -1742,0 -1743,0 -1744,0 -1745,0 -1746,0 -1747,0 -1748,0 -1749,0 -1750,0 -1751,0 -1752,0 -1753,0 -1754,0 -1755,0 -1756,0 -1757,0 -1758,0 -1759,0 -1760,0 -1761,0 -1762,0 -1763,0 -1764,0 -1765,0 -1766,0 -1767,0 -1768,0 -1769,0 -1770,0 -1771,0 -1772,0 -1773,0 -1774,0 -1775,0 -1776,0 -1777,0 -1778,0 -1779,0 -1780,0 -1781,0 -1782,0 -1783,0 -1784,0 -1785,0 -1786,0 -1787,0 -1788,0 -1789,0 -1790,0 -1791,0 -1792,0 -1793,0 -1794,0 -1795,0 -1796,0 -1797,0 -1798,0 -1799,0 -1800,0 -1801,0 -1802,0 -1803,1 -1804,0 -1805,0 -1806,0 -1807,0 -1808,0 -1809,0 -1810,0 -1811,0 -1812,0 -1813,0 -1814,0 -1815,0 -1816,0 -1817,0 -1818,0 -1819,0 -1820,0 -1821,0 -1822,0 -1823,0 -1824,0 -1825,0 -1826,0 -1827,0 -1828,0 -1829,0 -1830,0 -1831,0 -1832,0 -1833,0 -1834,0 -1835,0 -1836,0 -1837,0 -1838,0 -1839,0 -1840,0 -1841,0 -1842,0 -1843,1 -1844,0 -1845,0 -1846,0 -1847,0 -1848,0 -1849,0 -1850,0 -1851,0 -1852,0 -1853,0 -1854,0 -1855,0 -1856,0 -1857,0 -1858,0 -1859,0 -1860,0 -1861,0 -1862,0 -1863,0 -1864,0 -1865,0 -1866,0 -1867,0 -1868,0 -1869,0 -1870,0 -1871,0 -1872,0 -1873,0 -1874,0 -1875,0 -1876,0 -1877,0 -1878,0 -1879,1 -1880,0 -1881,0 -1882,0 -1883,0 -1884,0 -1885,0 -1886,0 -1887,0 -1888,0 -1889,0 -1890,0 -1891,0 -1892,0 -1893,0 -1894,0 -1895,0 -1896,0 -1897,0 -1898,0 -1899,0 -1900,0 -1901,0 -1902,0 -1903,0 -1904,0 -1905,0 -1906,0 -1907,0 -1908,0 -1909,0 -1910,0 -1911,0 -1912,0 -1913,0 -1914,0 -1915,0 -1916,0 -1917,0 -1918,0 -1919,0 -1920,0 -1921,0 -1922,0 -1923,0 -1924,0 -1925,0 -1926,0 -1927,0 -1928,0 -1929,0 -1930,1 -1931,0 -1932,0 -1933,0 -1934,0 -1935,0 -1936,0 -1937,0 -1938,0 -1939,0 -1940,0 -1941,0 -1942,0 -1943,0 -1944,1 -1945,0 -1946,0 -1947,0 -1948,0 -1949,0 -1950,0 -1951,0 -1952,0 -1953,0 -1954,0 -1955,0 -1956,0 -1957,0 -1958,0 -1959,0 -1960,0 -1961,0 -1962,0 -1963,0 -1964,1 -1965,0 -1966,0 -1967,0 -1968,0 -1969,0 -1970,0 -1971,0 -1972,0 -1973,0 -1974,0 -1975,0 -1976,0 -1977,0 -1978,0 -1979,0 -1980,0 -1981,0 -1982,0 -1983,0 -1984,0 -1985,0 -1986,0 -1987,0 -1988,0 -1989,0 -1990,0 -1991,0 -1992,0 -1993,0 -1994,0 -1995,0 -1996,0 -1997,0 -1998,0 -1999,0 -2000,0 -2001,0 -2002,0 -2003,0 -2004,0 -2005,0 -2006,0 -2007,0 -2008,0 -2009,0 -2010,0 -2011,0 -2012,0 -2013,0 -2014,0 -2015,0 -2016,0 -2017,0 -2018,0 -2019,0 -2020,0 -2021,0 -2022,0 -2023,0 -2024,0 -2025,0 -2026,0 -2027,0 -2028,0 -2029,0 -2030,0 -2031,0 -2032,0 -2033,0 -2034,0 -2035,0 -2036,0 -2037,0 -2038,0 -2039,0 -2040,0 -2041,0 -2042,0 -2043,0 -2044,0 -2045,0 -2046,0 -2047,0 -2048,0 -2049,0 -2050,0 -2051,0 -2052,0 -2053,0 -2054,0 -2055,0 -2056,0 -2057,0 -2058,0 -2059,0 -2060,0 -2061,0 -2062,0 -2063,0 -2064,0 -2065,0 -2066,0 -2067,0 -2068,0 -2069,0 -2070,0 -2071,0 -2072,0 -2073,0 -2074,0 -2075,0 -2076,0 -2077,0 -2078,0 -2079,0 -2080,0 -2081,0 -2082,0 -2083,0 -2084,0 -2085,0 -2086,0 -2087,0 -2088,0 -2089,0 -2090,0 -2091,0 -2092,0 -2093,0 -2094,0 -2095,0 -2096,0 -2097,0 -2098,0 -2099,0 -2100,0 -2101,0 -2102,0 -2103,0 -2104,0 -2105,0 -2106,0 -2107,0 -2108,0 -2109,0 -2110,0 -2111,0 -2112,0 -2113,0 -2114,0 -2115,0 -2116,0 -2117,0 -2118,0 -2119,0 -2120,0 -2121,0 -2122,0 -2123,0 -2124,0 -2125,0 -2126,0 -2127,0 -2128,0 -2129,0 -2130,0 -2131,0 -2132,0 -2133,0 -2134,0 -2135,0 -2136,0 -2137,0 -2138,0 -2139,0 -2140,0 -2141,0 -2142,0 -2143,0 -2144,0 -2145,0 -2146,0 -2147,0 -2148,0 -2149,0 -2150,0 -2151,0 -2152,0 -2153,0 -2154,0 -2155,0 -2156,0 -2157,0 -2158,0 -2159,0 -2160,0 -2161,0 -2162,0 -2163,0 -2164,0 -2165,1 -2166,0 -2167,0 -2168,0 -2169,0 -2170,0 -2171,0 -2172,0 -2173,0 -2174,0 -2175,0 -2176,0 -2177,0 -2178,0 -2179,0 -2180,0 -2181,0 -2182,0 -2183,0 -2184,0 -2185,0 -2186,0 -2187,0 -2188,0 -2189,0 -2190,0 -2191,0 -2192,0 -2193,0 -2194,0 -2195,0 -2196,0 -2197,0 -2198,0 -2199,0 -2200,0 -2201,0 -2202,0 -2203,0 -2204,0 -2205,1 -2206,0 -2207,0 -2208,0 -2209,0 -2210,0 -2211,0 -2212,0 -2213,0 -2214,0 -2215,0 -2216,0 -2217,0 -2218,0 -2219,0 -2220,0 -2221,0 -2222,0 -2223,0 -2224,0 -2225,0 -2226,0 -2227,0 -2228,0 -2229,0 -2230,0 -2231,0 -2232,0 -2233,0 -2234,0 -2235,0 -2236,0 -2237,0 -2238,0 -2239,0 -2240,0 -2241,0 -2242,0 -2243,0 -2244,0 -2245,0 -2246,0 -2247,0 -2248,0 -2249,0 -2250,0 -2251,1 -2252,0 -2253,0 -2254,0 -2255,0 -2256,0 -2257,0 -2258,0 -2259,0 -2260,0 -2261,0 -2262,0 -2263,0 -2264,0 -2265,0 -2266,0 -2267,0 -2268,0 -2269,0 -2270,0 -2271,0 -2272,0 -2273,0 -2274,0 -2275,0 -2276,0 -2277,0 -2278,0 -2279,0 -2280,0 -2281,0 -2282,0 -2283,0 -2284,0 -2285,0 -2286,0 -2287,0 -2288,0 -2289,0 -2290,0 -2291,0 -2292,0 -2293,0 -2294,0 -2295,0 -2296,0 -2297,0 -2298,0 -2299,0 -2300,0 -2301,0 -2302,0 -2303,0 -2304,0 -2305,0 -2306,0 -2307,0 -2308,0 -2309,0 -2310,0 -2311,0 -2312,0 -2313,0 -2314,0 -2315,0 -2316,0 -2317,0 -2318,0 -2319,0 -2320,0 -2321,0 -2322,0 -2323,0 -2324,0 -2325,0 -2326,0 -2327,0 -2328,0 -2329,0 -2330,0 -2331,0 -2332,0 -2333,0 -2334,1 -2335,0 -2336,0 -2337,0 -2338,0 -2339,0 -2340,0 -2341,0 -2342,0 -2343,0 -2344,0 -2345,0 -2346,0 -2347,0 -2348,0 -2349,0 -2350,0 -2351,0 -2352,0 -2353,0 -2354,0 -2355,1 -2356,1 -2357,0 -2358,0 -2359,0 -2360,0 -2361,0 -2362,0 -2363,0 -2364,0 -2365,0 -2366,0 -2367,0 -2368,0 -2369,0 -2370,0 -2371,0 -2372,0 -2373,0 -2374,0 -2375,0 -2376,0 -2377,0 -2378,0 -2379,0 -2380,0 -2381,0 -2382,0 -2383,0 -2384,0 -2385,0 -2386,0 -2387,0 -2388,0 -2389,0 -2390,0 -2391,0 -2392,0 -2393,0 -2394,0 -2395,0 -2396,0 -2397,0 -2398,0 -2399,0 -2400,0 -2401,0 -2402,0 -2403,0 -2404,0 -2405,0 -2406,0 -2407,1 -2408,0 -2409,0 -2410,0 -2411,0 -2412,0 -2413,0 -2414,0 -2415,0 -2416,0 -2417,0 -2418,0 -2419,0 -2420,0 -2421,0 -2422,0 -2423,1 -2424,0 -2425,0 -2426,0 -2427,0 -2428,0 -2429,0 -2430,0 -2431,0 -2432,0 -2433,0 -2434,0 -2435,0 -2436,0 -2437,1 -2438,0 -2439,0 -2440,0 -2441,0 -2442,0 -2443,0 -2444,0 -2445,0 -2446,0 -2447,0 -2448,0 -2449,0 -2450,0 -2451,0 -2452,0 -2453,0 -2454,0 -2455,0 -2456,0 -2457,0 -2458,0 -2459,0 -2460,0 -2461,0 -2462,0 -2463,0 -2464,0 -2465,0 -2466,0 -2467,0 -2468,0 -2469,0 -2470,0 -2471,0 -2472,0 -2473,0 -2474,0 -2475,0 -2476,0 -2477,1 -2478,0 -2479,0 -2480,0 -2481,0 -2482,0 -2483,0 -2484,0 -2485,0 -2486,0 -2487,0 -2488,0 -2489,0 -2490,0 -2491,0 -2492,0 -2493,0 -2494,0 -2495,0 -2496,0 -2497,0 -2498,0 -2499,0 -2500,0 -2501,0 -2502,0 -2503,0 -2504,0 -2505,0 -2506,0 -2507,0 -2508,1 -2509,0 -2510,0 -2511,0 -2512,0 -2513,0 -2514,0 -2515,0 -2516,0 -2517,0 -2518,0 -2519,0 -2520,0 -2521,0 -2522,0 -2523,0 -2524,0 -2525,0 -2526,0 -2527,0 -2528,0 -2529,0 -2530,0 -2531,0 -2532,0 -2533,0 -2534,0 -2535,0 -2536,0 -2537,0 -2538,0 -2539,0 -2540,1 -2541,0 -2542,0 -2543,0 -2544,0 -2545,0 -2546,0 -2547,0 -2548,0 -2549,0 -2550,0 -2551,0 -2552,0 -2553,0 -2554,0 -2555,0 -2556,0 -2557,0 -2558,0 -2559,0 -2560,0 -2561,0 -2562,0 -2563,0 -2564,0 -2565,0 -2566,0 -2567,0 -2568,0 -2569,0 -2570,0 -2571,0 -2572,0 -2573,0 -2574,0 -2575,0 -2576,0 -2577,0 -2578,0 -2579,0 -2580,0 -2581,0 -2582,0 -2583,0 -2584,0 -2585,0 -2586,0 -2587,0 -2588,0 -2589,0 -2590,0 -2591,0 -2592,0 -2593,0 -2594,0 -2595,1 -2596,0 -2597,0 -2598,0 -2599,0 -2600,0 -2601,0 -2602,0 -2603,0 -2604,0 -2605,0 -2606,0 -2607,0 -2608,0 -2609,0 -2610,0 -2611,0 -2612,0 -2613,0 -2614,0 -2615,0 -2616,0 -2617,0 -2618,0 -2619,0 -2620,0 -2621,0 -2622,0 -2623,0 -2624,0 -2625,0 -2626,0 -2627,0 -2628,0 -2629,0 -2630,0 -2631,0 -2632,0 -2633,0 -2634,0 -2635,0 -2636,0 -2637,0 -2638,0 -2639,0 -2640,0 -2641,0 -2642,0 -2643,0 -2644,0 -2645,0 -2646,0 -2647,0 -2648,0 -2649,1 -2650,0 -2651,0 -2652,0 -2653,0 -2654,0 -2655,0 -2656,0 -2657,0 -2658,0 -2659,0 -2660,0 -2661,0 -2662,0 -2663,1 -2664,0 -2665,0 -2666,0 -2667,0 -2668,0 -2669,0 -2670,0 -2671,0 -2672,0 -2673,0 -2674,0 -2675,0 -2676,0 -2677,0 -2678,0 -2679,0 -2680,0 -2681,0 -2682,0 -2683,0 -2684,0 -2685,0 -2686,0 -2687,0 -2688,0 -2689,0 -2690,0 -2691,0 -2692,0 -2693,0 -2694,0 -2695,0 -2696,0 -2697,0 -2698,0 -2699,0 -2700,0 -2701,0 -2702,0 -2703,0 -2704,0 -2705,0 -2706,0 -2707,0 -2708,0 -2709,0 -2710,0 -2711,0 -2712,0 -2713,0 -2714,0 -2715,0 -2716,0 -2717,0 -2718,0 -2719,0 -2720,0 -2721,0 -2722,0 -2723,0 -2724,0 -2725,0 -2726,0 -2727,0 -2728,0 -2729,0 -2730,0 -2731,0 -2732,0 -2733,0 -2734,0 -2735,0 -2736,0 -2737,0 -2738,0 -2739,0 -2740,0 -2741,0 -2742,0 -2743,0 -2744,0 -2745,0 -2746,0 -2747,0 -2748,0 -2749,0 -2750,0 -2751,0 -2752,0 -2753,0 -2754,0 -2755,0 -2756,0 -2757,0 -2758,0 -2759,0 -2760,0 -2761,0 -2762,0 -2763,0 -2764,0 -2765,0 -2766,0 -2767,0 -2768,0 -2769,0 -2770,0 -2771,0 -2772,0 -2773,0 -2774,0 -2775,0 -2776,0 -2777,0 -2778,0 -2779,0 -2780,0 -2781,0 -2782,1 -2783,0 -2784,0 -2785,0 -2786,0 -2787,0 -2788,0 -2789,0 -2790,0 -2791,0 -2792,0 -2793,1 -2794,0 -2795,0 -2796,0 -2797,0 -2798,0 -2799,0 -2800,0 -2801,0 -2802,0 -2803,0 -2804,0 -2805,0 -2806,0 -2807,0 -2808,0 -2809,0 -2810,0 -2811,0 -2812,0 -2813,0 -2814,0 -2815,0 -2816,0 -2817,0 -2818,0 -2819,0 -2820,0 -2821,0 -2822,0 -2823,0 -2824,0 -2825,0 -2826,1 -2827,0 -2828,0 -2829,0 -2830,0 -2831,0 -2832,1 -2833,0 -2834,0 -2835,0 -2836,0 -2837,0 -2838,0 -2839,0 -2840,0 -2841,0 -2842,0 -2843,0 -2844,0 -2845,0 -2846,0 -2847,0 -2848,0 -2849,0 -2850,0 -2851,0 -2852,0 -2853,0 -2854,0 -2855,0 -2856,0 -2857,0 -2858,0 -2859,0 -2860,0 -2861,0 -2862,0 -2863,0 -2864,0 -2865,0 -2866,0 -2867,0 -2868,0 -2869,0 -2870,0 -2871,1 -2872,0 -2873,0 -2874,0 -2875,0 -2876,0 -2877,0 -2878,0 -2879,0 -2880,0 -2881,0 -2882,0 -2883,0 -2884,0 -2885,0 -2886,0 -2887,0 -2888,0 -2889,0 -2890,0 -2891,0 -2892,0 -2893,0 -2894,0 -2895,0 -2896,0 -2897,0 -2898,0 -2899,0 -2900,0 -2901,0 -2902,0 -2903,0 -2904,0 -2905,0 -2906,0 -2907,0 -2908,0 -2909,0 -2910,0 -2911,0 -2912,0 -2913,0 -2914,0 -2915,0 -2916,0 -2917,0 -2918,0 -2919,0 -2920,1 -2921,0 -2922,0 -2923,0 -2924,0 -2925,0 -2926,0 -2927,0 -2928,0 -2929,0 -2930,0 -2931,0 -2932,0 -2933,0 -2934,0 -2935,0 -2936,0 -2937,0 -2938,0 -2939,0 -2940,0 -2941,0 -2942,0 -2943,0 -2944,0 -2945,0 -2946,0 -2947,0 -2948,0 -2949,0 -2950,0 -2951,0 -2952,0 -2953,0 -2954,0 -2955,0 -2956,0 -2957,0 -2958,0 -2959,0 -2960,0 -2961,0 -2962,0 -2963,0 -2964,0 -2965,0 -2966,0 -2967,0 -2968,0 -2969,0 -2970,0 -2971,0 -2972,0 -2973,0 -2974,0 -2975,0 -2976,0 -2977,0 -2978,0 -2979,0 -2980,0 -2981,0 -2982,0 -2983,0 -2984,0 -2985,0 -2986,0 -2987,0 -2988,0 -2989,0 -2990,0 -2991,0 -2992,0 -2993,0 -2994,0 -2995,0 -2996,0 -2997,0 -2998,0 -2999,0 -3000,0 -3001,0 -3002,0 -3003,0 -3004,0 -3005,0 -3006,0 -3007,0 -3008,0 -3009,0 -3010,0 -3011,0 -3012,0 -3013,0 -3014,0 -3015,0 -3016,0 -3017,0 -3018,0 -3019,0 -3020,0 -3021,0 -3022,0 -3023,0 -3024,0 -3025,0 -3026,0 -3027,0 -3028,0 -3029,1 -3030,0 -3031,0 -3032,0 -3033,0 -3034,0 -3035,0 -3036,0 -3037,0 -3038,0 -3039,0 -3040,0 -3041,0 -3042,1 -3043,0 -3044,1 -3045,0 -3046,0 -3047,0 -3048,0 -3049,0 -3050,0 -3051,0 -3052,0 -3053,0 -3054,0 -3055,0 -3056,0 -3057,0 -3058,0 -3059,0 -3060,1 -3061,1 -3062,0 -3063,0 -3064,0 -3065,0 -3066,0 -3067,0 -3068,0 -3069,0 -3070,0 -3071,0 -3072,0 -3073,0 -3074,0 -3075,0 -3076,0 -3077,0 -3078,0 -3079,0 -3080,0 -3081,0 -3082,0 -3083,0 -3084,0 -3085,0 -3086,0 -3087,0 -3088,0 -3089,0 -3090,0 -3091,0 -3092,0 -3093,0 -3094,0 -3095,0 -3096,0 -3097,0 -3098,0 -3099,0 -3100,0 -3101,0 -3102,0 -3103,0 -3104,0 -3105,0 -3106,0 -3107,0 -3108,0 -3109,0 -3110,0 -3111,0 -3112,0 -3113,0 -3114,0 -3115,0 -3116,0 -3117,0 -3118,0 -3119,0 -3120,0 -3121,0 -3122,0 -3123,0 -3124,0 -3125,0 -3126,0 -3127,1 -3128,0 -3129,0 -3130,0 -3131,0 -3132,0 -3133,0 -3134,0 -3135,0 -3136,0 -3137,0 -3138,0 -3139,0 -3140,0 -3141,0 -3142,0 -3143,0 -3144,0 -3145,0 -3146,0 -3147,0 -3148,0 -3149,0 -3150,0 -3151,0 -3152,0 -3153,0 -3154,0 -3155,0 -3156,0 -3157,0 -3158,0 -3159,0 -3160,0 -3161,0 -3162,0 -3163,0 -3164,0 -3165,0 -3166,0 -3167,0 -3168,0 -3169,0 -3170,0 -3171,0 -3172,0 -3173,0 -3174,0 -3175,0 -3176,0 -3177,0 -3178,0 -3179,0 -3180,0 -3181,0 -3182,0 -3183,0 -3184,0 -3185,0 -3186,0 -3187,0 -3188,0 -3189,0 -3190,0 -3191,0 -3192,0 -3193,0 -3194,0 -3195,0 -3196,0 -3197,0 -3198,0 -3199,0 -3200,0 -3201,0 -3202,0 -3203,0 -3204,0 -3205,0 -3206,0 -3207,0 -3208,0 -3209,0 -3210,0 -3211,0 -3212,0 -3213,0 -3214,0 -3215,0 -3216,0 -3217,0 -3218,0 -3219,0 -3220,0 -3221,0 -3222,0 -3223,0 -3224,0 -3225,0 -3226,0 -3227,0 -3228,0 -3229,0 -3230,0 -3231,0 -3232,0 -3233,0 -3234,0 -3235,0 -3236,0 -3237,0 -3238,0 -3239,0 -3240,0 -3241,0 -3242,1 -3243,0 -3244,0 -3245,0 -3246,0 -3247,0 -3248,0 -3249,0 -3250,0 -3251,0 -3252,0 -3253,0 -3254,0 -3255,0 -3256,0 -3257,0 -3258,1 -3259,0 -3260,0 -3261,0 -3262,0 -3263,0 -3264,0 -3265,0 -3266,0 -3267,0 -3268,0 -3269,0 -3270,0 -3271,0 -3272,0 -3273,0 -3274,0 -3275,0 -3276,0 -3277,0 -3278,0 -3279,0 -3280,0 -3281,0 -3282,0 -3283,0 -3284,0 -3285,0 -3286,0 -3287,0 -3288,0 -3289,0 -3290,0 -3291,0 -3292,0 -3293,0 -3294,0 -3295,0 -3296,0 -3297,0 -3298,0 -3299,0 -3300,1 -3301,0 -3302,0 -3303,0 -3304,0 -3305,0 -3306,0 -3307,0 -3308,0 -3309,0 -3310,0 -3311,0 -3312,0 -3313,0 -3314,0 -3315,0 -3316,0 -3317,0 -3318,0 -3319,0 -3320,0 -3321,0 -3322,0 -3323,0 -3324,0 -3325,0 -3326,0 -3327,0 -3328,0 -3329,0 -3330,0 -3331,0 -3332,0 -3333,0 -3334,0 -3335,0 -3336,0 -3337,0 -3338,0 -3339,1 -3340,0 -3341,0 -3342,0 -3343,0 -3344,0 -3345,0 -3346,0 -3347,0 -3348,0 -3349,0 -3350,0 -3351,1 -3352,0 -3353,0 -3354,0 -3355,0 -3356,0 -3357,0 -3358,0 -3359,0 -3360,0 -3361,0 -3362,0 -3363,0 -3364,0 -3365,0 -3366,0 -3367,0 -3368,0 -3369,0 -3370,0 -3371,0 -3372,0 -3373,0 -3374,0 -3375,0 -3376,0 -3377,0 -3378,0 -3379,0 -3380,0 -3381,0 -3382,0 -3383,0 -3384,0 -3385,0 -3386,0 -3387,0 -3388,0 -3389,0 -3390,0 -3391,0 -3392,0 -3393,0 -3394,0 -3395,0 -3396,0 -3397,1 -3398,0 -3399,0 -3400,0 -3401,0 -3402,0 -3403,0 -3404,0 -3405,0 -3406,0 -3407,1 -3408,0 -3409,0 -3410,0 -3411,0 -3412,0 -3413,0 -3414,0 -3415,0 -3416,0 -3417,0 -3418,0 -3419,0 -3420,0 -3421,0 -3422,0 -3423,0 -3424,0 -3425,0 -3426,0 -3427,0 -3428,0 -3429,0 -3430,0 -3431,0 -3432,0 -3433,0 -3434,0 -3435,0 -3436,0 -3437,1 -3438,0 -3439,0 -3440,0 -3441,0 -3442,0 -3443,0 -3444,0 -3445,0 -3446,0 -3447,0 -3448,0 -3449,0 -3450,0 -3451,0 -3452,0 -3453,0 -3454,0 -3455,0 -3456,0 -3457,1 -3458,0 -3459,0 -3460,0 -3461,0 -3462,0 -3463,0 -3464,0 -3465,0 -3466,0 -3467,0 -3468,0 -3469,0 -3470,0 -3471,0 -3472,0 -3473,0 -3474,0 -3475,0 -3476,0 -3477,0 -3478,0 -3479,0 -3480,0 -3481,0 -3482,0 -3483,0 -3484,1 -3485,0 -3486,0 -3487,0 -3488,0 -3489,0 -3490,0 -3491,0 -3492,0 -3493,0 -3494,0 -3495,1 -3496,0 -3497,0 -3498,0 -3499,0 -3500,0 -3501,0 -3502,0 -3503,0 -3504,0 -3505,0 -3506,0 -3507,1 -3508,0 -3509,0 -3510,0 -3511,0 -3512,0 -3513,0 -3514,0 -3515,0 -3516,0 -3517,0 -3518,0 -3519,0 -3520,0 -3521,0 -3522,0 -3523,0 -3524,0 -3525,0 -3526,0 -3527,1 -3528,0 -3529,0 -3530,0 -3531,0 -3532,0 -3533,0 -3534,0 -3535,0 -3536,0 -3537,0 -3538,0 -3539,0 -3540,0 -3541,0 -3542,0 -3543,0 -3544,0 -3545,0 -3546,0 -3547,0 -3548,0 -3549,0 -3550,0 -3551,0 -3552,0 -3553,0 -3554,0 -3555,0 -3556,0 -3557,0 -3558,0 -3559,0 -3560,0 -3561,1 -3562,0 -3563,0 -3564,0 -3565,0 -3566,0 -3567,1 -3568,0 -3569,0 -3570,0 -3571,0 -3572,0 -3573,0 -3574,0 -3575,0 -3576,0 -3577,0 -3578,1 -3579,1 -3580,0 -3581,0 -3582,0 -3583,0 -3584,0 -3585,0 -3586,0 -3587,0 -3588,0 -3589,0 -3590,0 -3591,0 -3592,0 -3593,1 -3594,0 -3595,0 -3596,0 -3597,0 -3598,0 -3599,0 -3600,0 -3601,0 -3602,0 -3603,0 -3604,0 -3605,0 -3606,1 -3607,0 -3608,0 -3609,0 -3610,0 -3611,0 -3612,0 -3613,0 -3614,0 -3615,0 -3616,0 -3617,0 -3618,0 -3619,0 -3620,0 -3621,0 -3622,0 -3623,0 -3624,0 -3625,0 -3626,0 -3627,0 -3628,0 -3629,0 -3630,0 -3631,0 -3632,1 -3633,0 -3634,0 -3635,0 -3636,0 -3637,0 -3638,0 -3639,0 -3640,0 -3641,0 -3642,0 -3643,0 -3644,0 -3645,0 -3646,0 -3647,0 -3648,0 -3649,0 -3650,0 -3651,0 -3652,0 -3653,0 -3654,0 -3655,0 -3656,0 -3657,0 -3658,0 -3659,0 -3660,0 -3661,0 -3662,0 -3663,0 -3664,1 -3665,0 -3666,0 -3667,0 -3668,0 -3669,0 -3670,0 -3671,0 -3672,0 -3673,0 -3674,0 -3675,0 -3676,0 -3677,0 -3678,1 -3679,0 -3680,0 -3681,0 -3682,0 -3683,0 -3684,0 -3685,0 -3686,0 -3687,0 -3688,0 -3689,1 -3690,0 -3691,0 -3692,0 -3693,0 -3694,1 -3695,0 -3696,0 -3697,0 -3698,0 -3699,0 -3700,0 -3701,0 -3702,0 -3703,0 -3704,0 -3705,0 -3706,0 -3707,0 -3708,1 -3709,1 -3710,0 -3711,1 -3712,0 -3713,0 -3714,0 -3715,0 -3716,0 -3717,0 -3718,0 -3719,0 -3720,0 -3721,0 -3722,0 -3723,0 -3724,0 -3725,0 -3726,0 -3727,0 -3728,0 -3729,0 -3730,0 -3731,0 -3732,0 -3733,0 -3734,0 -3735,0 -3736,0 -3737,0 -3738,0 -3739,0 -3740,0 -3741,0 -3742,0 -3743,0 -3744,0 -3745,0 -3746,0 -3747,0 -3748,0 -3749,0 -3750,1 -3751,0 -3752,0 -3753,0 -3754,0 -3755,0 -3756,0 -3757,0 -3758,0 -3759,0 -3760,0 -3761,0 -3762,0 -3763,0 -3764,0 -3765,0 -3766,0 -3767,1 -3768,0 -3769,0 -3770,0 -3771,0 -3772,0 -3773,0 -3774,0 -3775,0 -3776,0 -3777,0 -3778,0 -3779,0 -3780,1 -3781,0 -3782,0 -3783,0 -3784,0 -3785,0 -3786,0 -3787,0 -3788,0 -3789,0 -3790,0 -3791,0 -3792,0 -3793,0 -3794,0 -3795,0 -3796,0 -3797,0 -3798,0 -3799,0 -3800,0 -3801,1 -3802,0 -3803,0 -3804,0 -3805,0 -3806,0 -3807,0 -3808,0 -3809,0 -3810,0 -3811,0 -3812,0 -3813,1 -3814,0 -3815,0 -3816,0 -3817,0 -3818,0 -3819,0 -3820,0 -3821,0 -3822,0 -3823,0 -3824,0 -3825,0 -3826,0 -3827,0 -3828,0 -3829,0 -3830,0 -3831,0 -3832,0 -3833,0 -3834,0 -3835,0 -3836,0 -3837,0 -3838,0 -3839,0 -3840,0 -3841,0 -3842,0 -3843,0 -3844,0 -3845,0 -3846,0 -3847,1 -3848,0 -3849,0 -3850,0 -3851,0 -3852,0 -3853,0 -3854,0 -3855,0 -3856,0 -3857,0 -3858,0 -3859,0 -3860,0 -3861,0 -3862,0 -3863,0 -3864,0 -3865,0 -3866,0 -3867,0 -3868,0 -3869,0 -3870,0 -3871,0 -3872,0 -3873,0 -3874,0 -3875,0 -3876,0 -3877,0 -3878,0 -3879,0 -3880,0 -3881,0 -3882,1 -3883,0 -3884,0 -3885,0 -3886,0 -3887,0 -3888,0 -3889,0 -3890,0 -3891,0 -3892,0 -3893,0 -3894,0 -3895,0 -3896,0 -3897,0 -3898,0 -3899,0 -3900,0 -3901,0 -3902,1 -3903,0 -3904,0 -3905,0 -3906,0 -3907,0 -3908,0 -3909,0 -3910,0 -3911,0 -3912,0 -3913,0 -3914,1 -3915,0 -3916,0 -3917,0 -3918,0 -3919,0 -3920,0 -3921,0 -3922,0 -3923,0 -3924,0 -3925,0 -3926,0 -3927,0 -3928,0 -3929,0 -3930,0 -3931,0 -3932,0 -3933,0 -3934,0 -3935,0 -3936,0 -3937,0 -3938,0 -3939,0 -3940,0 -3941,0 -3942,0 -3943,0 -3944,1 -3945,0 -3946,0 -3947,0 -3948,0 -3949,0 -3950,0 -3951,0 -3952,0 -3953,0 -3954,0 -3955,0 -3956,0 -3957,0 -3958,0 -3959,0 -3960,0 -3961,0 -3962,0 -3963,0 -3964,0 -3965,0 -3966,0 -3967,0 -3968,0 -3969,0 -3970,0 -3971,0 -3972,0 -3973,0 -3974,0 -3975,0 -3976,0 -3977,0 -3978,0 -3979,0 -3980,0 -3981,0 -3982,0 -3983,0 -3984,0 -3985,0 -3986,0 -3987,0 -3988,0 -3989,0 -3990,0 -3991,0 -3992,0 -3993,0 -3994,0 -3995,0 -3996,0 -3997,0 -3998,0 -3999,0 -4000,0 -4001,0 -4002,0 -4003,0 -4004,0 -4005,0 -4006,0 -4007,0 -4008,0 -4009,0 -4010,0 -4011,0 -4012,0 -4013,0 -4014,0 -4015,0 -4016,0 -4017,0 -4018,0 -4019,0 -4020,0 -4021,0 -4022,0 -4023,0 -4024,0 -4025,0 -4026,0 -4027,0 -4028,1 -4029,0 -4030,0 -4031,0 -4032,0 -4033,0 -4034,0 -4035,0 -4036,0 -4037,0 -4038,0 -4039,0 -4040,0 -4041,0 -4042,0 -4043,0 -4044,0 -4045,0 -4046,0 -4047,1 -4048,0 -4049,0 -4050,0 -4051,0 -4052,0 -4053,0 -4054,0 -4055,0 -4056,0 -4057,0 -4058,0 -4059,0 -4060,0 -4061,0 -4062,0 -4063,0 -4064,0 -4065,0 -4066,0 -4067,0 -4068,0 -4069,0 -4070,0 -4071,0 -4072,0 -4073,0 -4074,0 -4075,0 -4076,0 -4077,0 -4078,0 -4079,0 -4080,0 -4081,0 -4082,0 -4083,0 -4084,0 -4085,0 -4086,0 -4087,0 -4088,0 -4089,0 -4090,0 -4091,0 -4092,0 -4093,0 -4094,0 -4095,0 -4096,0 -4097,0 -4098,0 -4099,0 -4100,0 -4101,0 -4102,0 -4103,0 -4104,0 -4105,1 -4106,0 -4107,0 -4108,0 -4109,0 -4110,0 -4111,0 -4112,0 -4113,0 -4114,0 -4115,0 -4116,0 -4117,0 -4118,0 -4119,0 -4120,0 -4121,0 -4122,0 -4123,0 -4124,0 -4125,0 -4126,0 -4127,0 -4128,0 -4129,0 -4130,0 -4131,1 -4132,0 -4133,0 -4134,0 -4135,0 -4136,0 -4137,0 -4138,0 -4139,0 -4140,0 -4141,0 -4142,0 -4143,0 -4144,0 -4145,0 -4146,0 -4147,0 -4148,0 -4149,0 -4150,0 -4151,0 -4152,0 -4153,0 -4154,0 -4155,0 -4156,0 -4157,0 -4158,0 -4159,0 -4160,0 -4161,0 -4162,0 -4163,0 -4164,0 -4165,0 -4166,0 -4167,0 -4168,0 -4169,0 -4170,0 -4171,0 -4172,0 -4173,0 -4174,0 -4175,0 -4176,0 -4177,0 -4178,0 -4179,0 -4180,1 -4181,0 -4182,0 -4183,0 -4184,0 -4185,0 -4186,0 -4187,0 -4188,0 -4189,0 -4190,1 -4191,0 -4192,0 -4193,0 -4194,0 -4195,0 -4196,0 -4197,0 -4198,0 -4199,0 -4200,0 -4201,0 -4202,0 -4203,0 -4204,0 -4205,0 -4206,0 -4207,0 -4208,0 -4209,0 -4210,0 -4211,0 -4212,0 -4213,0 -4214,0 -4215,0 -4216,0 -4217,0 -4218,0 -4219,0 -4220,1 -4221,0 -4222,0 -4223,0 -4224,0 -4225,0 -4226,0 -4227,0 -4228,0 -4229,0 -4230,0 -4231,0 -4232,0 -4233,0 -4234,0 -4235,0 -4236,0 -4237,0 -4238,0 -4239,0 -4240,1 -4241,0 -4242,0 -4243,0 -4244,0 -4245,0 -4246,0 -4247,0 -4248,0 -4249,0 -4250,0 -4251,0 -4252,0 -4253,0 -4254,0 -4255,0 -4256,0 -4257,0 -4258,0 -4259,0 -4260,0 -4261,0 -4262,0 -4263,0 -4264,0 -4265,0 -4266,0 -4267,1 -4268,0 -4269,0 -4270,0 -4271,0 -4272,0 -4273,0 -4274,0 -4275,0 -4276,0 -4277,0 -4278,1 -4279,0 -4280,0 -4281,0 -4282,0 -4283,0 -4284,0 -4285,0 -4286,0 -4287,0 -4288,0 -4289,0 -4290,1 -4291,0 -4292,0 -4293,0 -4294,0 -4295,0 -4296,0 -4297,0 -4298,0 -4299,0 -4300,0 -4301,0 -4302,0 -4303,0 -4304,0 -4305,0 -4306,0 -4307,0 -4308,0 -4309,0 -4310,1 -4311,0 -4312,0 -4313,0 -4314,0 -4315,0 -4316,0 -4317,0 -4318,0 -4319,0 -4320,0 -4321,0 -4322,0 -4323,0 -4324,0 -4325,0 -4326,0 -4327,0 -4328,0 -4329,0 -4330,0 -4331,0 -4332,0 -4333,0 -4334,0 -4335,0 -4336,0 -4337,0 -4338,0 -4339,0 -4340,0 -4341,0 -4342,0 -4343,0 -4344,1 -4345,0 -4346,0 -4347,0 -4348,0 -4349,0 -4350,1 -4351,0 -4352,0 -4353,0 -4354,0 -4355,0 -4356,0 -4357,0 -4358,0 -4359,0 -4360,0 -4361,1 -4362,1 -4363,0 -4364,0 -4365,0 -4366,0 -4367,0 -4368,0 -4369,0 -4370,0 -4371,0 -4372,0 -4373,0 -4374,0 -4375,0 -4376,1 -4377,0 -4378,0 -4379,0 -4380,0 -4381,0 -4382,0 -4383,0 -4384,0 -4385,0 -4386,0 -4387,0 -4388,0 -4389,1 -4390,0 -4391,0 -4392,0 -4393,0 -4394,0 -4395,0 -4396,0 -4397,0 -4398,0 -4399,0 -4400,0 -4401,0 -4402,0 -4403,0 -4404,0 -4405,0 -4406,0 -4407,0 -4408,0 -4409,0 -4410,0 -4411,0 -4412,0 -4413,0 -4414,0 -4415,1 -4416,0 -4417,0 -4418,0 -4419,0 -4420,0 -4421,0 -4422,0 -4423,0 -4424,0 -4425,0 -4426,0 -4427,0 -4428,0 -4429,0 -4430,0 -4431,0 -4432,0 -4433,0 -4434,0 -4435,0 -4436,0 -4437,0 -4438,0 -4439,0 -4440,0 -4441,0 -4442,0 -4443,0 -4444,0 -4445,0 -4446,0 -4447,1 -4448,0 -4449,0 -4450,0 -4451,0 -4452,0 -4453,0 -4454,0 -4455,0 -4456,0 -4457,0 -4458,0 -4459,0 -4460,0 -4461,1 -4462,0 -4463,0 -4464,0 -4465,0 -4466,0 -4467,0 -4468,0 -4469,0 -4470,0 -4471,0 -4472,1 -4473,0 -4474,0 -4475,0 -4476,0 -4477,1 -4478,0 -4479,0 -4480,0 -4481,0 -4482,0 -4483,0 -4484,0 -4485,0 -4486,0 -4487,0 -4488,0 -4489,0 -4490,0 -4491,1 -4492,1 -4493,0 -4494,1 -4495,0 -4496,0 -4497,0 -4498,0 -4499,0 -4500,0 -4501,0 -4502,0 -4503,0 -4504,0 -4505,0 -4506,0 -4507,0 -4508,0 -4509,0 -4510,0 -4511,0 -4512,0 -4513,0 -4514,0 -4515,0 -4516,0 -4517,0 -4518,0 -4519,0 -4520,0 -4521,0 -4522,0 -4523,0 -4524,0 -4525,0 -4526,0 -4527,0 -4528,0 -4529,0 -4530,0 -4531,0 -4532,0 -4533,1 -4534,0 -4535,0 -4536,0 -4537,0 -4538,0 -4539,0 -4540,0 -4541,0 -4542,0 -4543,0 -4544,0 -4545,0 -4546,0 -4547,0 -4548,0 -4549,0 -4550,1 -4551,0 -4552,0 -4553,0 -4554,0 -4555,0 -4556,0 -4557,0 -4558,0 -4559,0 -4560,0 -4561,0 -4562,0 -4563,1 -4564,0 -4565,0 -4566,0 -4567,0 -4568,0 -4569,0 -4570,0 -4571,0 -4572,0 -4573,0 -4574,0 -4575,0 -4576,0 -4577,0 -4578,0 -4579,0 -4580,0 -4581,0 -4582,0 -4583,0 -4584,1 -4585,0 -4586,0 -4587,0 -4588,0 -4589,0 -4590,0 -4591,0 -4592,0 -4593,0 -4594,0 -4595,0 -4596,1 -4597,0 -4598,0 -4599,0 -4600,0 -4601,0 -4602,0 -4603,0 -4604,0 -4605,0 -4606,0 -4607,0 -4608,0 -4609,0 -4610,0 -4611,0 -4612,0 -4613,0 -4614,0 -4615,0 -4616,0 -4617,0 -4618,0 -4619,0 -4620,0 -4621,0 -4622,0 -4623,0 -4624,0 -4625,0 -4626,0 -4627,0 -4628,0 -4629,0 -4630,1 -4631,0 -4632,0 -4633,0 -4634,0 -4635,0 -4636,0 -4637,0 -4638,0 -4639,0 -4640,0 -4641,0 -4642,0 -4643,0 -4644,0 -4645,0 -4646,0 -4647,0 -4648,0 -4649,0 -4650,0 -4651,0 -4652,0 -4653,0 -4654,0 -4655,0 -4656,0 -4657,0 -4658,0 -4659,0 -4660,0 -4661,0 -4662,0 -4663,0 -4664,0 -4665,1 -4666,0 -4667,0 -4668,0 -4669,0 -4670,0 -4671,0 -4672,0 -4673,0 -4674,0 -4675,0 -4676,0 -4677,0 -4678,0 -4679,0 -4680,0 -4681,0 -4682,0 -4683,0 -4684,0 -4685,1 -4686,0 -4687,0 -4688,0 -4689,0 -4690,0 -4691,0 -4692,0 -4693,0 -4694,0 -4695,0 -4696,0 -4697,1 -4698,0 -4699,0 -4700,0 -4701,0 -4702,0 -4703,0 -4704,0 -4705,0 -4706,0 -4707,0 -4708,0 -4709,0 -4710,0 -4711,0 -4712,0 -4713,0 -4714,0 -4715,0 -4716,0 -4717,0 -4718,0 -4719,0 -4720,0 -4721,0 -4722,0 -4723,0 -4724,0 -4725,0 -4726,0 -4727,1 -4728,0 -4729,0 -4730,0 -4731,0 -4732,0 -4733,0 -4734,0 -4735,0 -4736,0 -4737,0 -4738,0 -4739,0 -4740,0 -4741,0 -4742,0 -4743,0 -4744,0 -4745,0 -4746,0 -4747,0 -4748,0 -4749,0 -4750,0 -4751,0 -4752,0 -4753,0 -4754,0 -4755,0 -4756,0 -4757,0 -4758,0 -4759,0 -4760,0 -4761,0 -4762,0 -4763,0 -4764,0 -4765,0 -4766,0 -4767,0 -4768,0 -4769,0 -4770,0 -4771,0 -4772,0 -4773,0 -4774,0 -4775,0 -4776,0 -4777,0 -4778,0 -4779,0 -4780,0 -4781,0 -4782,0 -4783,0 -4784,0 -4785,0 -4786,0 -4787,0 -4788,0 -4789,0 -4790,0 -4791,0 -4792,0 -4793,0 -4794,0 -4795,0 -4796,0 -4797,0 -4798,0 -4799,0 -4800,0 -4801,0 -4802,0 -4803,0 -4804,0 -4805,0 -4806,0 -4807,0 -4808,0 -4809,0 -4810,0 -4811,1 -4812,0 -4813,0 -4814,0 -4815,0 -4816,0 -4817,0 -4818,0 -4819,0 -4820,0 -4821,0 -4822,0 -4823,0 -4824,0 -4825,0 -4826,0 -4827,0 -4828,0 -4829,0 -4830,1 -4831,0 -4832,0 -4833,0 -4834,0 -4835,0 -4836,0 -4837,0 -4838,0 -4839,0 -4840,0 -4841,0 -4842,0 -4843,0 -4844,0 -4845,0 -4846,0 -4847,0 -4848,0 -4849,0 -4850,0 -4851,0 -4852,0 -4853,0 -4854,0 -4855,0 -4856,0 -4857,0 -4858,0 -4859,0 -4860,0 -4861,0 -4862,0 -4863,0 -4864,0 -4865,0 -4866,0 -4867,0 -4868,0 -4869,0 -4870,0 -4871,0 -4872,0 -4873,0 -4874,0 -4875,0 -4876,0 -4877,0 -4878,0 -4879,0 -4880,0 -4881,0 -4882,0 -4883,0 -4884,0 -4885,0 -4886,0 -4887,0 -4888,1 -4889,0 -4890,0 -4891,0 -4892,0 -4893,0 -4894,0 -4895,0 -4896,0 -4897,0 -4898,0 -4899,0 -4900,0 -4901,0 -4902,0 -4903,0 -4904,0 -4905,0 -4906,0 -4907,0 -4908,0 -4909,0 -4910,0 -4911,0 -4912,0 -4913,0 -4914,1 -4915,0 -4916,0 -4917,0 -4918,0 -4919,0 -4920,0 -4921,0 -4922,0 -4923,0 -4924,0 -4925,0 -4926,0 -4927,0 -4928,0 -4929,0 -4930,0 -4931,0 -4932,0 -4933,0 -4934,0 -4935,0 -4936,0 -4937,0 -4938,0 -4939,0 -4940,0 -4941,0 -4942,0 -4943,0 -4944,0 -4945,0 -4946,0 -4947,0 -4948,0 -4949,0 -4950,0 -4951,0 -4952,0 -4953,0 -4954,0 -4955,0