diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..66cee35
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/../../../../../../../../../:\Users\ernes\Documents\UCBerkley_MIDS\Courses\Capstone\SEN12MS\.idea/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/SEN12MS.iml b/.idea/SEN12MS.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/SEN12MS.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/deployment.xml b/.idea/deployment.xml
new file mode 100644
index 0000000..c848bae
--- /dev/null
+++ b/.idea/deployment.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="PublishConfigData" autoUpload="Always" serverName="taeil@angler.ist.berkeley.edu:22 agent" remoteFilesAllowedToDisappearOnAutoupload="false" autoUploadExternalChanges="true">
+    <serverData>
+      <paths name="Colorado_Llano">
+        <serverdata>
+          <mappings>
+            <mapping local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="taeil@angler.ist.berkeley.edu:22">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/home/taeil/SEN12MS" local="$PROJECT_DIR$" web="/" />
+          </mappings>
+        </serverdata>
+      </paths>
+      <paths name="taeil@angler.ist.berkeley.edu:22 agent">
+        <serverdata>
+          <mappings>
+            <mapping deploy="/home/taeil/SEN12MS_E/SEN12MS" local="$PROJECT_DIR$" />
+          </mappings>
+        </serverdata>
+      </paths>
+    </serverData>
+    <option name="myAutoUpload" value="ALWAYS" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..15a15b2
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding" addBOMForNewFiles="with NO BOM" />
+</project>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..127644b
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Remote Python 3.7.10 (sftp://taeil@angler.ist.berkeley.edu:22/home/taeil/python)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..c2ceaad
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/SEN12MS.iml" filepath="$PROJECT_DIR$/.idea/SEN12MS.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/remote-mappings.xml b/.idea/remote-mappings.xml
new file mode 100644
index 0000000..d528309
--- /dev/null
+++ b/.idea/remote-mappings.xml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="RemoteMappingsManager">
+    <list>
+      <list>
+        <remote-mappings server-id="python@579126cd-ef12-4aa4-b79b-d3e5433469df" />
+      </list>
+    </list>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/README.md b/README.md
index 8749d96..a8ab815 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ In this folder, other utilities that can help to load, process, or analyze the d
 ```
 export WANDB_API_KEY=<use your API key>
 export WANDB_ENTITY=cal-capstone
-export WANDB_PROJECT=SEN12MS
+export WANDB_PROJECT=scene_classification
 #export WANDB_MODE=dryrun
 ```
 
@@ -95,38 +95,43 @@ export WANDB_PROJECT=SEN12MS
 CUDA_VISIBLE_DEVICES=0 python classification/main_train.py --exp_name sem12ms_baseline --data_dir /home/ubuntu/SEN12MS/data/sen12ms/data --label_split_dir /home/ubuntu/SEN12MS/splits --use_RGB --IGBP_simple --label_type multi_label --threshold 0.1 --model ResNet50 --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 1000 --epochs 1
 
 ```
- 
- #### finetune (training from pre-trained model)   :anguished:
- 
- These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
-- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:  
-- `convert_moco_to_resnet50.py`: convert moco models to pytorch resnet50 format
 
-download pretrained models from `s3://sen12ms/pretrained_sup`
+#### convert Moco pretrained model for sen12ms eval 
+ (optional) download pretrained models from `s3://sen12ms/pretrained`
+
+Some pretrained models: 
+-  [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
+- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e) 
+
 ```
 ## remove dryrun param
-aws s3 sync s3://sen12ms/pretrained_sup . --dryrun 
+aws s3 sync s3://sen12ms/pretrained . --dryrun 
 ```
 
-convert models 
+convert moco models to pytorch resnet50 format
 ```
-# convert backbone to resnet50 
-python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth 
+# convert local file
+python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -o pretrained/moco/
+
+# download the model from W&B and convert for 12 channels 
+python classification/models/convert_moco_to_resnet50.py -n 12 -i hpt4/367tz8vs -o pretrained/moco/ 
 
-# convert query-encoder to resnet50 
-python classification/models/convert_moco_to_resnet50.py -i pretrained/moco/silvery-oath7-2rr3864e.pth -bb false 
+# rename file with more user-friendly name (TODO automate this)
+mv pretrained/moco/367tz8vs_bb_converted.pth pretrained/moco/laced-water-61_bb_converted.pth
 
 ```
 
-finetune with pretrained models 
--  [vivid-resonance-73](https://wandb.ai/cjrd/BDOpenSelfSup-tools/runs/3qjvxo2p)
-- [silvery-oath-7](https://wandb.ai/cal-capstone/hpt2/runs/2rr3864e) 
+ #### finetune (training from pre-trained model)   :anguished:
+ 
+ These arguments will be saved into a .txt file automatically. This .txt file can be used in the testing for reading the arguments. The `threshold` parameter is used to filter out the labels with lower probabilities. Note that this threshold has no influence on single-label classification. More explanation of the arguments is in the `main_train.py` file. Note that the probability label file and the split lists should be put under the same folder during training and testing. The script reads .pkl format instead of .txt files.
+- `test.py`: This python script is used to test the model. It is a semi-automatic script and reads the argument file generated in the training process to decide the label type, model type etc. However, it still requires user to input some basic arguments, such as the path of data directory. Here is an example of the input arguments:  
 
  ``` 
-CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --use_RGB --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
+CUDA_VISIBLE_DEVICES=3 python classification/main_train.py --exp_name finetune --data_dir data/sen12ms/data --label_split_dir splits --sensor_type s1s2 --IGBP_simple --label_type single_label --threshold 0.1 --model Moco --lr 0.001 --decay 1e-5 --batch_size 64 --num_workers 4 --data_size 2048 --epochs 500 --pt_name silvery-oath7-2rr3864e --pt_dir pretrained/moco --eval
  ```
 - `pt_name`: the name of the model (wandb run name)
 - `--eval`: remove this param if you want to skip evaluating after finishing the training 
+- `sensor_type`: s1, s2, s1s2 
 
 Evaluate trained models for classification (this is only if you downloaded the trained model)
 ```
diff --git a/classification/dataset.py b/classification/dataset.py
index 6e72e19..e299c33 100644
--- a/classification/dataset.py
+++ b/classification/dataset.py
@@ -58,13 +58,14 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD)
         else:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_LD_BIGEARTHNET)
-    # load only RGB   
+
+    # load only RGB
     if use_RGB and use_s2==False:
         if not for_bigearthnet:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB)
         else:
             img = load_s2(sample["s2"], imgTransform, s2_band=S2_BANDS_RGB_BIGEARTHNET)
-        
+
     # load s1 data
     if use_s1:
         if use_s2 or use_RGB:
@@ -89,7 +90,7 @@ def load_sample(sample, labels, label_type, threshold, imgTransform, use_s1, use
     else:
         loc = np.argmax(lc, axis=-1)
         lc_hot = np.zeros_like(lc).astype(np.float32)
-        lc_hot[loc] = 1
+        lc_hot[loc] = lc[0]
              
     rt_sample = {'image': img, 'label': lc_hot, 'id': sample["id"]}
     
@@ -352,7 +353,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
         if CLC_s == True:
             self.n_classes = 19
         else:
-            self.n_classes = 43
+            self.n_classes = 1
+            # self.n_classes = 43
 
             # make sure parent dir exists
         assert os.path.exists(path)
@@ -364,7 +366,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
             sample_list = None
             total_sample_size = 0
             if subset == "train" or subset == "val":
-                file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
+                # file = os.path.join(ls_dir, f'bigearthnet_train_{data_size}.pkl')
+                file = os.path.join(ls_dir, f'BigEarthNet_train_balanced_Permanently_irrigated_land_{data_size}.pkl')
                 print("BigEarthNet: Loading file ",file)
                 sample_list = pkl.load(open(file, "rb"))
                 total_sample_size = len(sample_list)
@@ -384,6 +387,7 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
             else:
                 pbar = tqdm(total=125866)  # 125866 samples in test set
                 file = os.path.join(ls_dir, 'bigearthnet_test.pkl')
+
                 sample_list = pkl.load(open(file, "rb"))
                 print("bigearthnet_test should be 125866:", len(sample_list))
 
@@ -407,7 +411,8 @@ def __init__(self, path, ls_dir=None, imgTransform=None,
               "samples from the bigearthnet subset", subset)
 
         # import lables as a dictionary
-        label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
+        # label_file = os.path.join(ls_dir, 'BigEarthNet_labels.pkl')
+        label_file = os.path.join(ls_dir, 'BigEarthNet_binary_labels_Permanently_irrigated_land.pkl')
 
         a_file = open(label_file, "rb")
         self.labels = pkl.load(a_file)
diff --git a/classification/main_train.py b/classification/main_train.py
index 0363e7c..9e00c5b 100644
--- a/classification/main_train.py
+++ b/classification/main_train.py
@@ -6,6 +6,7 @@
 import numpy as np
 from datetime import datetime 
 from tqdm import tqdm
+import json
 
 import torch
 import torch.optim as optim 
@@ -65,7 +66,7 @@
 #                     help='use sentinel-1 data')
 parser.add_argument('--use_RGB', action='store_true', default=False,
                     help='use sentinel-2 RGB bands')
-parser.add_argument('--simple_scheme', action='store_true', default=True,
+parser.add_argument('--simple_scheme', action='store_true', default=False,
                     help='use IGBP simplified scheme; otherwise: IGBP original scheme')
 parser.add_argument('--label_type', type=str, choices = label_choices,
                     default='multi_label',
@@ -132,6 +133,8 @@
     os.makedirs(logs_dir)
 
 # ----------------------------- saving files ---------------------------------
+sv_name_eval = '' # Used to save a file during the test evaluation
+
 def write_arguments_to_file(args, filename):
     with open(filename, 'w') as f:
         for key, value in vars(args).items():
@@ -149,9 +152,10 @@ def save_checkpoint(state, is_best, name):
 # -------------------------------- Main Program -------------------------------
 def main():
     global args
-    
+    global sv_name_eval
     # save configuration to file
     sv_name = datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S')
+    sv_name_eval = sv_name
     print('saving file name is ', sv_name)
 
     write_arguments_to_file(args, os.path.join(logs_dir, sv_name+'_arguments.txt'))
@@ -162,35 +166,37 @@ def main():
     use_s1 = (args.sensor_type == 's1') | (args.sensor_type == 's1s2')
     use_s2 = (args.sensor_type == 's2') | (args.sensor_type == 's1s2')
 
-    data_dir = os.path.join("data", args.dataset, "data")
+    dataset = args.dataset
+    data_dir = os.path.join("data", dataset, "data")
 
     bands_mean = {}
     bands_std = {}
     train_dataGen = None
     val_dataGen = None
     test_dataGen = None
-    if not args.use_bigearthnet:
-        print("Using SEN12MS dataset")
+
+    print(f"Using {dataset} dataset")
+    if dataset == 'sen12ms':
         bands_mean = {'s1_mean': [-11.76858, -18.294598],
                       's2_mean': [1226.4215, 1137.3799, 1139.6792, 1350.9973, 1932.9058,
                                   2211.1584, 2154.9846, 2409.1128, 2001.8622, 1356.0801]}
         bands_std = {'s1_std': [4.525339, 4.3586307],
                      's2_std': [741.6254, 740.883, 960.1045, 946.76056, 985.52747,
                                 1082.4341, 1057.7628, 1136.1942, 1132.7898, 991.48016]}
-    else:
-        # Assume bigearthnet
-        print("Using BigEarthNet dataset")
+    elif dataset == 'bigearthnet':
         # THE S2 BAND STATISTICS WERE PROVIDED BY THE BIGEARTHNET TEAM
-        bands_mean = {'s1_mean': [-11.76858, -18.294598],
+        bands_mean = {'s1_mean': [0.4994, 0.2542],
                       's2_mean': [340.76769064,429.9430203,614.21682446,590.23569706,950.68368468,1792.46290469,
                                   2075.46795189,2218.94553375,2266.46036911,2246.0605464,1594.42694882,1009.32729131]}
-        bands_std = {'s1_std': [4.525339, 4.3586307],
+        bands_std = {'s1_std': [0.1902, 0.1720],
                      's2_std': [554.81258967,572.41639287,582.87945694,675.88746967,729.89827633,1096.01480586,
                                 1273.45393088,1365.45589904,1356.13789355,1302.3292881,1079.19066363,818.86747235]}
+    else:
+        raise NameError(f"unknown dataset: {dataset}")
 
     # load datasets 
     imgTransform = transforms.Compose([ToTensor(),Normalize(bands_mean, bands_std)])
-    if not args.use_bigearthnet:
+    if dataset == 'sen12ms':
         train_dataGen = SEN12MS(data_dir, args.label_split_dir,
                                 imgTransform=imgTransform,
                                 label_type=label_type, threshold=args.threshold, subset="train",
@@ -261,7 +267,7 @@ def main():
         cudnn.benchmark = True
 
     # define number of classes
-    if not args.use_bigearthnet:
+    if dataset == 'sen12ms':
         if args.simple_scheme:
             numCls = 10
             ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']
@@ -275,13 +281,14 @@ def main():
             ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                           '11', '12', '13', '14', '15', '16', '17', '18', '19']
         else:
-            numCls = 43
-            ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
-                          '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
-                          '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
-                          '31', '32', '33', '34', '35', '36', '37', '38', '39', '40',
-                          '41', '42', '43']
-
+            numCls = 1
+            ORG_LABELS = ['0','1']
+            # numCls = 43
+            # ORG_LABELS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
+            #               '11', '12', '13', '14', '15', '16', '17', '18', '19', '20',
+            #               '21', '22', '23', '24', '25', '26', '27', '28', '29', '30',
+            #               '31', '32', '33', '34', '35', '36', '37', '38', '39', '40',
+            #               '41', '42', '43']
     
     print('num_class: ', numCls)
 
@@ -421,11 +428,11 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
     f2_score_ = F2_score()
     hamming_loss_ = Hamming_loss()
     subset_acc_ = Subset_accuracy()
-    acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
+    # acc_score_ = Accuracy_score()  # from original script, not recommeded, seems not correct
     one_err_ = One_error()
-    coverage_err_ = Coverage_error()
-    rank_loss_ = Ranking_loss()
-    labelAvgPrec_score_ = LabelAvgPrec_score()
+    # coverage_err_ = Coverage_error()
+    # rank_loss_ = Ranking_loss()
+    # labelAvgPrec_score_ = LabelAvgPrec_score()
 
     calssification_report_ = calssification_report(ORG_LABELS)
 
@@ -433,6 +440,8 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
     y_true = []
     predicted_probs = []
 
+    pred_dic = {}
+
     with torch.no_grad():
         for batch_idx, data in enumerate(tqdm(test_data_loader, desc="test")):
 
@@ -459,6 +468,15 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
             predicted_probs += list(probs)
             y_true += list(labels)
 
+            for j in range(len(data['id'])):
+                pred_dic[data['id'][j]] = {'true': str(list(list(labels)[j])),
+                                           'prediction': str(list(list(probs)[j]))
+                                           }
+
+    fileout = f"{checkpoint_dir}/{sv_name_eval}_{args.model}_{label_type}.json"
+    with open(fileout,'w') as fp:
+        json.dump(pred_dic, fp)
+
     predicted_probs = np.asarray(predicted_probs)
     # convert predicted probabilities into one/multi-hot labels
     if label_type == 'multi_label':
@@ -473,18 +491,27 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
 
     # --------------------------- evaluation with metrics
     # general
-    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+    # macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+    # macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+    # macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # hamming_loss = hamming_loss_(y_predicted, y_true)
+    # subset_acc = subset_acc_(y_predicted, y_true)
+    # macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+
+    macro_f1, micro_f1 = f1_score_(y_predicted, y_true)
+    macro_f2, micro_f2 = f2_score_(y_predicted, y_true)
+    macro_prec, micro_prec = prec_score_(y_predicted, y_true)
+    macro_rec, micro_rec = recal_score_(y_predicted, y_true)
     hamming_loss = hamming_loss_(y_predicted, y_true)
     subset_acc = subset_acc_(y_predicted, y_true)
-    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+    # macro_acc, micro_acc = acc_score_(y_predicted, y_true)
+
     # ranking-based
     one_error = one_err_(predicted_probs, y_true)
-    coverage_error = coverage_err_(predicted_probs, y_true)
-    rank_loss = rank_loss_(predicted_probs, y_true)
-    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+    # coverage_error = coverage_err_(predicted_probs, y_true)
+    # rank_loss = rank_loss_(predicted_probs, y_true)
+    # labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
 
     cls_report = calssification_report_(y_predicted, y_true)
 
@@ -497,25 +524,25 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
         info = {
             "macroPrec": macro_prec,
             "microPrec": micro_prec,
-            "samplePrec": sample_prec,
+            # "samplePrec": sample_prec,
             "macroRec": macro_rec,
             "microRec": micro_rec,
-            "sampleRec": sample_rec,
+            # "sampleRec": sample_rec,
             "macroF1": macro_f1,
             "microF1": micro_f1,
-            "sampleF1": sample_f1,
+            # "sampleF1": sample_f1,
             "macroF2": macro_f2,
             "microF2": micro_f2,
-            "sampleF2": sample_f2,
+            # "sampleF2": sample_f2,
             "HammingLoss": hamming_loss,
-            "subsetAcc": subset_acc,
-            "macroAcc": macro_acc,
-            "microAcc": micro_acc,
-            "sampleAcc": sample_acc,
+            # "subsetAcc": subset_acc,
+            # "macroAcc": macro_acc,
+            # "microAcc": micro_acc,
+            # "sampleAcc": sample_acc,
             "oneError": one_error,
-            "coverageError": coverage_error,
-            "rankLoss": rank_loss,
-            "labelAvgPrec": labelAvgPrec,
+            # "coverageError": coverage_error,
+            # "rankLoss": rank_loss,
+            # "labelAvgPrec": labelAvgPrec,
             "clsReport": cls_report,
             "multilabel_conf_mat": conf_mat,
             "class-wise Acc": cls_acc,
@@ -530,25 +557,25 @@ def eval(test_data_loader, model, label_type, numCls, use_cuda, ORG_LABELS):
         info = {
             "macroPrec": macro_prec,
             "microPrec": micro_prec,
-            "samplePrec": sample_prec,
+            # "samplePrec": sample_prec,
             "macroRec": macro_rec,
             "microRec": micro_rec,
-            "sampleRec": sample_rec,
+            # "sampleRec": sample_rec,
             "macroF1": macro_f1,
             "microF1": micro_f1,
-            "sampleF1": sample_f1,
+            # "sampleF1": sample_f1,
             "macroF2": macro_f2,
             "microF2": micro_f2,
-            "sampleF2": sample_f2,
+            # "sampleF2": sample_f2,
             "HammingLoss": hamming_loss,
             "subsetAcc": subset_acc,
             "macroAcc": macro_acc,
             "microAcc": micro_acc,
-            "sampleAcc": sample_acc,
+            # "sampleAcc": sample_acc,
             "oneError": one_error,
-            "coverageError": coverage_error,
-            "rankLoss": rank_loss,
-            "labelAvgPrec": labelAvgPrec,
+            # "coverageError": coverage_error,
+            # "rankLoss": rank_loss,
+            # "labelAvgPrec": labelAvgPrec,
             "clsReport": cls_report,
             "conf_mat": conf_mat,
             "AverageAcc": aa}
@@ -577,9 +604,10 @@ def train(trainloader, model, optimizer, lossfunc, label_type, epoch, use_cuda):
         if label_type == 'multi_label':
             labels = data["label"]
         else:
-           labels = (torch.max(data["label"], 1)[1]).type(torch.long) 
-               
-        # move data to gpu if model is on gpu
+           labels = (torch.max(data["label"], 1)[1]).type(torch.long)
+           # labels = data["label"]
+
+           # move data to gpu if model is on gpu
         if use_cuda:
             bands = bands.to(torch.device("cuda"))
             labels = labels.to(torch.device("cuda"))
@@ -612,11 +640,11 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
     f2_score_ = F2_score()
     hamming_loss_ = Hamming_loss()
     subset_acc_ = Subset_accuracy()
-    acc_score_ = Accuracy_score()
+    # acc_score_ = Accuracy_score()
     one_err_ = One_error()
-    coverage_err_ = Coverage_error()
-    rank_loss_ = Ranking_loss()
-    labelAvgPrec_score_ = LabelAvgPrec_score()
+    # coverage_err_ = Coverage_error()
+    # rank_loss_ = Ranking_loss()
+    # labelAvgPrec_score_ = LabelAvgPrec_score()
 
     # set model to evaluation mode
     model.eval()
@@ -650,8 +678,7 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
             labels = labels.cpu().numpy() # keep true & pred label at same loc.
             predicted_probs += list(probs)
             y_true += list(labels)
-            
-        
+
     predicted_probs = np.asarray(predicted_probs)
     # convert predicted probabilities into one/multi-hot labels 
     if label_type == 'multi_label':
@@ -665,44 +692,51 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
     y_true = np.asarray(y_true)
     
 
-    macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
-    macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
-    macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
-    macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # macro_f1, micro_f1, sample_f1 = f1_score_(y_predicted, y_true)
+    # macro_f2, micro_f2, sample_f2 = f2_score_(y_predicted, y_true)
+    # macro_prec, micro_prec, sample_prec = prec_score_(y_predicted, y_true)
+    # macro_rec, micro_rec, sample_rec = recal_score_(y_predicted, y_true)
+    # hamming_loss = hamming_loss_(y_predicted, y_true)
+    # subset_acc = subset_acc_(y_predicted, y_true)
+    # macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
+
+    macro_f1, micro_f1 = f1_score_(y_predicted, y_true)
+    macro_f2, micro_f2 = f2_score_(y_predicted, y_true)
+    macro_prec, micro_prec = prec_score_(y_predicted, y_true)
+    macro_rec, micro_rec = recal_score_(y_predicted, y_true)
     hamming_loss = hamming_loss_(y_predicted, y_true)
     subset_acc = subset_acc_(y_predicted, y_true)
-    macro_acc, micro_acc, sample_acc = acc_score_(y_predicted, y_true)
-
+    # macro_acc, micro_acc = acc_score_(y_predicted, y_true)
     # Note that below 4 ranking-based metrics are not applicable to single-label
     # (multi-class) classification, but they will still show the scores during 
     # validation on tensorboard
     one_error = one_err_(predicted_probs, y_true)
-    coverage_error = coverage_err_(predicted_probs, y_true)
-    rank_loss = rank_loss_(predicted_probs, y_true)
-    labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
+    # coverage_error = coverage_err_(predicted_probs, y_true)
+    # rank_loss = rank_loss_(predicted_probs, y_true)
+    # labelAvgPrec = labelAvgPrec_score_(predicted_probs, y_true)
 
     info = {
             "macroPrec" : macro_prec,
             "microPrec" : micro_prec,
-            "samplePrec" : sample_prec,
+            # "samplePrec" : sample_prec,
             "macroRec" : macro_rec,
             "microRec" : micro_rec,
-            "sampleRec" : sample_rec,
+            # "sampleRec" : sample_rec,
             "macroF1" : macro_f1,
             "microF1" : micro_f1,
-            "sampleF1" : sample_f1,
+            # "sampleF1" : sample_f1,
             "macroF2" : macro_f2,
             "microF2" : micro_f2,
-            "sampleF2" : sample_f2,
+            # "sampleF2" : sample_f2,
             "HammingLoss" : hamming_loss,
             "subsetAcc" : subset_acc,
-            "macroAcc" : macro_acc,
-            "microAcc" : micro_acc,
-            "sampleAcc" : sample_acc,
+            # "macroAcc" : macro_acc,
+            # "microAcc" : micro_acc,
+            # "sampleAcc" : sample_acc,
             "oneError" : one_error,
-            "coverageError" : coverage_error,
-            "rankLoss" : rank_loss,
-            "labelAvgPrec" : labelAvgPrec
+            # "coverageError" : coverage_error,
+            # "rankLoss" : rank_loss,
+            # "labelAvgPrec" : labelAvgPrec
             }
 
     wandb.run.summary.update(info)
@@ -710,12 +744,13 @@ def val(valloader, model, optimizer, label_type, epoch, use_cuda):
         wandb.log({tag: value, 'epoch': epoch})
         # val_writer.add_scalar(tag, value, epoch)
 
-    print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
+    # print('Validation microPrec: {:.6f} microF1: {:.6f} sampleF1: {:.6f} microF2: {:.6f} sampleF2: {:.6f}'.format(
+    print('Validation microPrec: {:.6f} microF1: {:.6f} microF2: {:.6f}'.format(
             micro_prec,
             micro_f1,
-            sample_f1,
+            # sample_f1,
             micro_f2,
-            sample_f2
+            # sample_f2
             ))
     return micro_f1
 
diff --git a/classification/metrics.py b/classification/metrics.py
index 3db79fa..7adfdad 100644
--- a/classification/metrics.py
+++ b/classification/metrics.py
@@ -103,11 +103,11 @@ def __init__(self):
 
     def forward(self, predict_labels, true_labels):
 
-        sample_prec = precision_score(true_labels, predict_labels, average='samples')
+        # sample_prec = precision_score(true_labels, predict_labels, average='samples')
         micro_prec = precision_score(true_labels, predict_labels, average='micro')
         macro_prec = precision_score(true_labels, predict_labels, average='macro')
 
-        return macro_prec, micro_prec, sample_prec    
+        return macro_prec, micro_prec#, sample_prec
 
 
 class Recall_score(nn.Module):
@@ -117,11 +117,11 @@ def __init__(self):
 
     def forward(self, predict_labels, true_labels):
 
-        sample_rec = recall_score(true_labels, predict_labels, average='samples')
+        # sample_rec = recall_score(true_labels, predict_labels, average='samples')
         micro_rec = recall_score(true_labels, predict_labels, average='micro')
         macro_rec = recall_score(true_labels, predict_labels, average='macro')
 
-        return macro_rec, micro_rec, sample_rec
+        return macro_rec, micro_rec#, sample_rec
 
 
 class F1_score(nn.Module):
@@ -133,9 +133,9 @@ def forward(self, predict_labels, true_labels):
 
         macro_f1 = f1_score(true_labels, predict_labels, average="macro")
         micro_f1 = f1_score(true_labels, predict_labels, average="micro")
-        sample_f1 = f1_score(true_labels, predict_labels, average="samples")
+        # sample_f1 = f1_score(true_labels, predict_labels, average="samples")
 
-        return macro_f1, micro_f1, sample_f1
+        return macro_f1, micro_f1#, sample_f1
 
 
 class F2_score(nn.Module):
@@ -147,9 +147,9 @@ def forward(self, predict_labels, true_labels):
 
         macro_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="macro")
         micro_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="micro")
-        sample_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="samples")
+        # sample_f2 = fbeta_score(true_labels, predict_labels, beta=2, average="samples")
 
-        return macro_f2, micro_f2, sample_f2
+        return macro_f2, micro_f2#, sample_f2
 
 class Hamming_loss(nn.Module):
 
@@ -188,9 +188,9 @@ def forward(self, predict_labels, true_labels):
         TP_sample = TP.sum(axis=1)
         union_sample = union.sum(axis=1)
 
-        sample_Acc = TP_sample/union_sample
+        # sample_Acc = TP_sample/union_sample
 
-        assert np.isfinite(sample_Acc).all(), 'Nan found in sample accuracy'
+        # assert np.isfinite(sample_Acc).all(), 'Nan found in sample accuracy'
 
         FP = (np.logical_and((predict_labels == 1), (true_labels == 0))).astype(int)
         TN = (np.logical_and((predict_labels == 0), (true_labels == 0))).astype(int)
@@ -207,7 +207,7 @@ def forward(self, predict_labels, true_labels):
 
         micro_Acc = (TP_cls.mean() + TN_cls.mean()) / (TP_cls.mean() + FP_cls.mean() + TN_cls.mean() + FN_cls.mean())
 
-        return macro_Acc, micro_Acc, sample_Acc.mean()
+        return macro_Acc, micro_Acc#, sample_Acc.mean()
 
 
 class One_error(nn.Module):
diff --git a/classification/models/ResNet_bu.py b/classification/models/ResNet_bu.py
new file mode 100644
index 0000000..ee5af90
--- /dev/null
+++ b/classification/models/ResNet_bu.py
@@ -0,0 +1,463 @@
+# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
+# Modified by Yu-Lun Wu, TUM
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.autograd import Function
+from torchvision import models
+
+
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+def weights_init_kaiming(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv2d') != -1:
+        init.kaiming_normal_(m.weight.data)
+
+def fc_init_weights(m):
+    if type(m) == nn.Linear:
+        init.kaiming_normal_(m.weight.data)
+
+
+class ResNet18(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet18(pretrained=False)
+        
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+
+class ResNet34(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet34(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet50(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class ResNet50_1x1(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(3, 64, kernel_size=(256, 256), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            self.conv1, # self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+class Moco_1x1(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        print("n_inputs :",n_inputs)
+
+        Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # Update input module
+        input_module_pre_trained = mocoModel["input_module"]
+        conv1x1_default_state_dict = Conv1x1Block.state_dict()
+        migrated_data_dict = {}
+        for k, v in input_module_pre_trained.items():
+            if k == "input_module.net.0.weight":
+                if n_inputs == 10:
+                    # Set the value only if the n_inputs are 10 (i.e only S2). If they are 12 (both S2 and S1),
+                    # the below assignment will result in an error during execution.
+                    # Error: "size mismatch for 0.weight: copying a param with shape torch.Size([3, 10, 1, 1]) from checkpoint,
+                    # the shape in current model is torch.Size([3, 12, 1, 1])"
+                    # The reason is that during pre-training, we have the input set to 10 (for the query block)
+                    migrated_data_dict["0.weight"] = input_module_pre_trained["input_module.net.0.weight"]
+
+            elif k == "input_module.net.1.weight":
+                migrated_data_dict["1.weight"] = input_module_pre_trained["input_module.net.1.weight"]
+            elif k == "input_module.net.1.bias":
+                migrated_data_dict["1.bias"] = input_module_pre_trained["input_module.net.1.bias"]
+            elif k == "input_module.net.1.running_mean":
+                migrated_data_dict["1.running_mean"] = input_module_pre_trained["input_module.net.1.running_mean"]
+            elif k == "input_module.net.1.running_var":
+                migrated_data_dict["1.running_var"] = input_module_pre_trained["input_module.net.1.running_var"]
+
+        conv1x1_default_state_dict.update(migrated_data_dict)
+        Conv1x1Block.load_state_dict(conv1x1_default_state_dict)
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            Conv1x1Block,
+            # self.conv1,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class Moco_1x1RND(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        print("n_inputs :",n_inputs)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # We don't need to initialize here as we are transferring the weights
+        #self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+# This class uses Conv1x1Block block, but it doesn't get initialized from the pre-trained model.
+# Only the backbone gets initialized from the pre-trained model
+class Moco(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+#class ResNet50_em512(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC1 = nn.Linear(2048, 512)
+#        self.FC2 = nn.Linear(512, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        x = self.FC1(x)
+#        logits = self.FC2(x)
+#
+#        return logits
+#
+#
+#class ResNet50_em(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC = nn.Linear(2048, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        logits = self.FC(x)
+#
+#        return logits, x
+
+class ResNet101(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet101(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet152(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet152(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+if __name__ == "__main__":
+    
+    inputs = torch.randn((1, 12, 256, 256)) # (how many images, spectral channels, pxl, pxl)
+
+    net = ResNet18()
+    #net = ResNet34()
+    #net = ResNet50()
+    #net = ResNet101()
+    #net = ResNet152()
+
+    outputs = net(inputs)
+
+    print(outputs)
+    print(outputs.shape)
+
+    numParams = count_parameters(net)
+
+    print(f"{numParams:.2E}")
+
+
diff --git a/classification/models/ResNet_nnedition.py b/classification/models/ResNet_nnedition.py
new file mode 100644
index 0000000..0b63dc5
--- /dev/null
+++ b/classification/models/ResNet_nnedition.py
@@ -0,0 +1,467 @@
+# Modified from Jian Kang, https://www.rsim.tu-berlin.de/menue/team/dring_jian_kang/
+# Modified by Yu-Lun Wu, TUM
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.nn.init as init
+from torch.autograd import Function
+from torchvision import models
+
+
+def count_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+
+def weights_init_kaiming(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv2d') != -1:
+        init.kaiming_normal_(m.weight.data)
+
+def fc_init_weights(m):
+    if type(m) == nn.Linear:
+        init.kaiming_normal_(m.weight.data)
+
+
+class ResNet18(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet18(pretrained=False)
+        
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+
+class ResNet34(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet34(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(512, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet50(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class ResNet50_1x1(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(3, 64, kernel_size=(256, 256), stride=(2, 2), padding=(3, 3), bias=False)
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            self.conv1, # self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+class Moco_1x1(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        print("n_inputs :",n_inputs)
+
+        Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # Update input module
+        input_module_pre_trained = mocoModel["input_module"]
+        conv1x1_default_state_dict = Conv1x1Block.state_dict()
+        migrated_data_dict = {}
+        for k, v in input_module_pre_trained.items():
+            if k == "input_module.net.0.weight":
+                if n_inputs == 10:
+                    # Set the value only if the n_inputs are 10 (i.e only S2). If they are 12 (both S2 and S1),
+                    # the below assignment will result in an error during execution.
+                    # Error: "size mismatch for 0.weight: copying a param with shape torch.Size([3, 10, 1, 1]) from checkpoint,
+                    # the shape in current model is torch.Size([3, 12, 1, 1])"
+                    # The reason is that during pre-training, we have the input set to 10 (for the query block)
+                    migrated_data_dict["0.weight"] = input_module_pre_trained["input_module.net.0.weight"]
+
+            elif k == "input_module.net.1.weight":
+                migrated_data_dict["1.weight"] = input_module_pre_trained["input_module.net.1.weight"]
+            elif k == "input_module.net.1.bias":
+                migrated_data_dict["1.bias"] = input_module_pre_trained["input_module.net.1.bias"]
+            elif k == "input_module.net.1.running_mean":
+                migrated_data_dict["1.running_mean"] = input_module_pre_trained["input_module.net.1.running_mean"]
+            elif k == "input_module.net.1.running_var":
+                migrated_data_dict["1.running_var"] = input_module_pre_trained["input_module.net.1.running_var"]
+
+        conv1x1_default_state_dict.update(migrated_data_dict)
+        Conv1x1Block.load_state_dict(conv1x1_default_state_dict)
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            Conv1x1Block,
+            # self.conv1,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+class Moco_1x1RND(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+
+        print("n_inputs :",n_inputs)
+
+        self.Conv1x1Block = nn.Sequential(
+            nn.Conv2d(n_inputs, 3, kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm2d(3),
+            nn.ReLU(inplace=True)
+        )
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.Conv1x1Block,
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # We don't need to initialize here as we are transferring the weights
+        #self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+# This class uses Conv1x1Block block, but it doesn't get initialized from the pre-trained model.
+# Only the backbone gets initialized from the pre-trained model
+class Moco(nn.Module):
+    def __init__(self, mocoModel, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet50(pretrained=False)
+        resnet.load_state_dict(mocoModel["state_dict"])
+
+        # self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        conv1.apply(weights_init_kaiming)
+        self.encoder = nn.Sequential(
+            # self.conv1,
+            conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+
+        self.FC = nn.Linear(2048, numCls)
+
+        # self.conv1.apply(weights_init_kaiming)
+
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+#class ResNet50_em512(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC1 = nn.Linear(2048, 512)
+#        self.FC2 = nn.Linear(512, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        x = self.FC1(x)
+#        logits = self.FC2(x)
+#
+#        return logits
+#
+#
+#class ResNet50_em(nn.Module):
+#    def __init__(self, n_inputs = 12, numCls = 17):
+#        super().__init__()
+#
+#        resnet = models.resnet50(pretrained=False)
+#
+#        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+#        self.encoder = nn.Sequential(
+#            self.conv1,
+#            resnet.bn1,
+#            resnet.relu,
+#            resnet.maxpool,
+#            resnet.layer1,
+#            resnet.layer2,
+#            resnet.layer3,
+#            resnet.layer4,
+#            resnet.avgpool
+#        )
+#        self.FC = nn.Linear(2048, numCls)
+#
+#        self.apply(weights_init_kaiming)
+#        self.apply(fc_init_weights)
+#
+#    def forward(self, x):
+#        x = self.encoder(x)
+#        x = x.view(x.size(0), -1)
+#
+#        logits = self.FC(x)
+#
+#        return logits, x
+
+class ResNet101(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet101(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+        
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+
+class ResNet152(nn.Module):
+    def __init__(self, n_inputs = 12, numCls = 17):
+        super().__init__()
+
+        resnet = models.resnet152(pretrained=False)
+
+        self.conv1 = nn.Conv2d(n_inputs, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
+        self.encoder = nn.Sequential(
+            self.conv1,
+            resnet.bn1,
+            resnet.relu,
+            resnet.maxpool,
+            resnet.layer1,
+            resnet.layer2,
+            resnet.layer3,
+            resnet.layer4,
+            resnet.avgpool
+        )
+        self.FC = nn.Linear(2048, numCls)
+
+        self.apply(weights_init_kaiming)
+        self.apply(fc_init_weights)
+
+    def forward(self, x):
+        x = self.encoder(x)
+        x = x.view(x.size(0), -1)
+
+        logits = self.FC(x)
+
+        return logits
+
+
+if __name__ == "__main__":
+    
+    inputs = torch.randn((1, 12, 256, 256)) # (how many images, spectral channels, pxl, pxl)
+
+    net = ResNet18()
+    #net = ResNet34()
+    #net = ResNet50()
+    #net = ResNet101()
+    #net = ResNet152()
+
+    outputs = net(inputs)
+
+    print(outputs)
+    print(outputs.shape)
+
+    numParams = count_parameters(net)
+
+    print(f"{numParams:.2E}")
+
+
diff --git a/classification/run_finetune.sh b/classification/run_finetune.sh
new file mode 100644
index 0000000..b33e3cd
--- /dev/null
+++ b/classification/run_finetune.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+for dataset in bigearthnet sen12ms
+do
+  for lr in 0.001 0.0005 0.0001 0.00005 0.00001
+  do
+    for epoch in 200
+    do
+      for label_tp in single_label multi_label
+      do
+        for model in Moco # Moco_1x1RND Moco_1x1
+        do
+          python classification/main_train.py --exp_name finetune --simple_scheme \
+                                              --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
+                                              --pt_name electric-mountain-33 --pt_dir pretrained/moco \
+                                              --batch_size 64 --num_workers 4 --data_size 1024 \
+                                              --dataset ${dataset} --label_split_dir splits \
+                                              --label_type ${label_tp} \
+                                              --model ${model} \
+                                              --epochs ${epoch} \
+                                              --sensor_type s1s2 --eval
+        done
+      done
+    done
+  done
+done
\ No newline at end of file
diff --git a/classification/run_ft_s1s2.sh b/classification/run_ft_s1s2.sh
deleted file mode 100644
index 487d5cf..0000000
--- a/classification/run_ft_s1s2.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
-do
-  for epoch in 200
-  do
-    for label_tp in single_label multi_label
-    do
-      for model in Moco_1x1RND Moco_1x1 Moco
-      do
-        python classification/main_train.py --exp_name finetune --IGBP_simple \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --pt_name dainty-dragon-14 --pt_dir pretrained/moco \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --use_s2 --use_s1 --eval
-      done
-    done
-  done
-done
-#
\ No newline at end of file
diff --git a/classification/run_ft_s2.sh b/classification/run_ft_s2.sh
deleted file mode 100644
index a973cdc..0000000
--- a/classification/run_ft_s2.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
-do
-  for epoch in 200
-  do
-    for label_tp in single_label multi_label
-    do
-      for model in Moco_1x1RND Moco_1x1 Moco
-      do
-        python classification/main_train.py --exp_name finetune --IGBP_simple \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --pt_name dainty-dragon-14 --pt_dir pretrained/moco \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --use_s2 --eval
-      done
-    done
-  done
-done
-#
\ No newline at end of file
diff --git a/classification/run_supervised.sh b/classification/run_supervised.sh
new file mode 100644
index 0000000..d0f4a36
--- /dev/null
+++ b/classification/run_supervised.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+for dataset in bigearthnet # sen12ms
+do
+  for lr in 0.001 0.0005 0.0001 0.00005 0.00001
+  do
+    for epoch in 200
+    do
+      for label_tp in single_label multi_label
+      do
+        for model in Supervised # Supervised_1x1
+        do
+          python classification/main_train2.py --exp_name sup_learning --simple_scheme \
+                                              --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
+                                              --batch_size 64 --num_workers 4 --data_size 1024 \
+                                              --dataset ${dataset} --label_split_dir splits \
+                                              --label_type ${label_tp} \
+                                              --model ${model} \
+                                              --epochs ${epoch} \
+                                              --sensor_type s1s2 --eval
+        done
+      done
+    done
+  done
+done
\ No newline at end of file
diff --git a/classification/run_supervised_s1s2.sh b/classification/run_supervised_s1s2.sh
deleted file mode 100644
index 5711d61..0000000
--- a/classification/run_supervised_s1s2.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
-do
-  for epoch in 200
-  do
-    for label_tp in single_label multi_label
-    do
-      for model in Supervised_1x1 Supervised
-      do
-        python classification/main_train.py --exp_name sup_learning --IGBP_simple \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --use_s1 --use_s2 --eval
-      done
-    done
-  done
-done
-#
\ No newline at end of file
diff --git a/classification/run_supervised_s2.sh b/classification/run_supervised_s2.sh
deleted file mode 100644
index 6efec37..0000000
--- a/classification/run_supervised_s2.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-for lr in 0.00001 0.00005 0.0001 0.001 0.001
-do
-  for epoch in 200
-  do
-    for label_tp in single_label multi_label
-    do
-      for model in Supervised_1x1 Supervised
-      do
-        python classification/main_train.py --exp_name sup_learning --IGBP_simple \
-                                            --lr ${lr} --use_lr_step --lr_step_size 30 --decay 1e-5 \
-                                            --batch_size 64 --num_workers 4 --data_size 1024 \
-                                            --data_dir data/sen12ms/data --label_split_dir splits \
-                                            --label_type ${label_tp} \
-                                            --model ${model} \
-                                            --epochs ${epoch} \
-                                            --use_s2 --eval
-      done
-    done
-  done
-done
-#
\ No newline at end of file