GCAM-CDR-policy/plotting_script.py at master · PEESEgroup/GCAM-CDR-policy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
import os
import constants
import plotting
import data_manipulation
import constants as c
import pandas as pd
import utilities
import numpy as np
import verification
import numpy_financial as npf


def main(config_fname, reference_year):
    """
    Main method for scripts used to plot figures and information for the article
    :param config_fname: used to store information on where to save plots and tables
    :param reference_year: year to analyze outputs
    :return: N/A
    """
    config_fname = config_fname.replace("_", "/")
    os.makedirs("./data/data_analysis/images/" + config_fname + "/", exist_ok=True)
    os.makedirs("data/data_analysis/supplementary_tables/" + config_fname + "/", exist_ok=True)
    CDR_cost(config_fname, reference_year)
    CDR_tech(config_fname, reference_year)
    social_cost(config_fname, reference_year)
    market_share(config_fname, reference_year)
    subsidy_expiration(config_fname, reference_year)
    costs_and_benefits(config_fname, reference_year)


def costs_and_benefits(config_fname, reference_year):
    """
    calculate the costs and benefits for a given policy scenario or baseline pathway
    :param config_fname: name of the policy scenario and baseline pathway
    :param reference_year: not needed
    :return: N/A
    """
    # process scenario data
    baseline = config_fname.split("/")[1]
    scenario = config_fname.split("/")[0]
    npv_cols = [str(2025 + i) for i in range(0, 26)]
    npv_cols.append("cost_type")
    npv_cols.append("Units")

    # grab scenario config files
    xml_scenario_files = utilities.build_from_scenario(scenario)

    # get the costs of the scenario
    scenario_df = pd.read_csv("data/data_analysis/supplementary_tables/" + scenario + "/" + baseline +
                              "/policy cost by technology.csv")
    # get the costs of the baseline
    baseline_df = pd.read_csv("data/data_analysis/supplementary_tables/" + baseline + "/" + baseline +
                              "/policy cost by technology.csv")

    baseline_subsidy, baseline_deadweight, baseline_CTax, baseline_market, baseline_innovation_costs = get_CB_dfs(
        baseline_df, npv_cols)
    # if there is no year missing in the scenario, then compute the costs, benefits, and npv
    scenario_df = scenario_df[
        ["product_price", "technology_price", "2020", "2025", "2030", "2035", "2040", "2045", "2050", "Units",
         "scenario", "baseline", "product"]]
    scenario_subsidy, scenario_deadweight, scenario_CTax, scenario_market, scenario_innovation_costs = get_CB_dfs(
        scenario_df, npv_cols)

    # get the NPV of the baseline scenario under 3 interest rates
    interest_rates = [0.02, 0.12, 0.20]
    # 12% is recommended by CATF, 2% is median of EPA report - https://www.epa.gov/system/files/documents/2023-12/epa_scghg_2023_report_final.pdf
    npv_net_zero = {}
    NPV_CB = {}

    # benefit/cost calculations
    # calculate the procurement costs and remove that much money from the CDR market
    # get procurement dollar amounts, if they exist
    procurement_costs = pd.Series()
    for j in xml_scenario_files:
        if "exo_CDR_demand_verify" in j.data_files:
            # avoid double counting subsidies - doesn't overwrite baseline data
            scenario_subsidy_calc = scenario_subsidy[[str(k) for k in constants.GCAMConstants.plotting_x]]
            baseline_subsidy_calc = baseline_subsidy[[str(k) for k in constants.GCAMConstants.plotting_x]]
            double_subsidy = scenario_subsidy_calc - baseline_subsidy_calc

            # get CDR demand
            CDR_demand = utilities.open_csv(j.data_files)
            CDR_demand = CDR_demand["exo_CDR_demand_verify"]
            CDR_demand = pd.DataFrame(CDR_demand)

            # add in the double subsidy
            double_subsidy.columns = CDR_demand.columns
            double_subsidy.index = ["subsidy"]
            CDR_demand = pd.concat([CDR_demand, double_subsidy])
            CDR_demand = CDR_demand.T
            if "calc-avg-price" in CDR_demand.columns:
                # calculate the procurement costs
                CDR_demand["procurement_cost"] = CDR_demand['calc-avg-price'] * CDR_demand['govt-procurement'] - \
                                                 CDR_demand['subsidy']
                CDR_demand = CDR_demand.reset_index()
                CDR_demand["year"] = CDR_demand["level_0"].astype(str)
                CDR_demand = CDR_demand.set_index("year")
                procurement_costs = CDR_demand["procurement_cost"]
                procurement_costs = pd.DataFrame(
                    pd.concat([procurement_costs, pd.Series(["procurement costs"], index=["cost_type"])])).T
                procurement_costs = data_manipulation.interpolate(procurement_costs, "truncated")
                procurement_costs["Units"] = "Million 2025$USD/yr"

                # remove procurement costs from the market
                remove_procure = pd.merge(scenario_market, procurement_costs, "inner", "Units",
                                          suffixes=("_market", "_procure"))
                for j in range(0, 26):
                    # subtract off the amount spent on procurement from the market
                    remove_procure[str(2025 + j)] = remove_procure[str(2025 + j) + "_market"] - remove_procure[
                        str(2025 + j) + "_procure"]
                # replace scenario market df
                remove_procure["cost_type"] = "CDR Market"
                scenario_market = remove_procure[npv_cols]

    # combine costs
    fiscal_costs = pd.concat([scenario_subsidy, procurement_costs, scenario_innovation_costs])
    fiscal_costs = fiscal_costs.groupby(["Units"]).sum(min_count=1)

    fiscal_cost_benefit_analysis(NPV_CB, baseline_market, config_fname, fiscal_costs, interest_rates, scenario_market)

    social_cost_effectiveness(config_fname, interest_rates, npv_net_zero, procurement_costs, scenario_deadweight,
                              scenario_df, scenario_innovation_costs, scenario_market, scenario_subsidy)


def social_cost_effectiveness(config_fname, interest_rates, npv_net_zero, procurement_costs, scenario_deadweight,
                              scenario_df, scenario_innovation_costs, scenario_market, scenario_subsidy):
    """
    Calculate the social cost-effectiveness of a given policy scenario
    :param config_fname: the policy scenario name
    :param interest_rates: discount rate
    :param npv_net_zero: npv cost of achieving net-zero in the optimal scenario
    :param procurement_costs: the costs of procurement
    :param scenario_deadweight: the amount of deadweight loss
    :param scenario_df: dataframe containing scenario data
    :param scenario_innovation_costs: the amount of fiscal spend on innovation
    :param scenario_market: dataframe containing market information
    :param scenario_subsidy: dataframe containing subsidy information
    :return: .csv files containing calculations of cost
    """
    # combine the information that is relevant to meeting the net-zero 2050 mandate
    # if there is a missing C tax period, that scenario is excluded from the analysis
    if not scenario_df.isnull().all().any():
        # combine all fiscal costs and abatement costs
        net_zero_mandate = pd.concat(
            [scenario_subsidy, procurement_costs, scenario_innovation_costs, scenario_deadweight, scenario_market])
        net_zero_mandate = net_zero_mandate.drop(columns=["0", 0], errors='ignore')
        net_zero_mandate = net_zero_mandate.dropna()

        # calculate the npv
        for k in interest_rates:
            # calculate the npv of each sector
            net_zero_mandate["npv_" + str(k)] = net_zero_mandate.apply(
                lambda row: npf.npv(rate=k, values=row[[str(2025 + i) for i in range(0, 26)]].values) / 1000000, axis=1)

            # get information about the total PV of the CDR market
            CDR_market_size = net_zero_mandate[net_zero_mandate["cost_type"] == "CDR Market"].copy(deep=True)
            CDR_market_size = CDR_market_size["npv_" + str(k)].values[0]

            # get information about the CDR market in 2050
            CDR_market_2050 = net_zero_mandate[net_zero_mandate["cost_type"] == "CDR Market"].copy(deep=True)
            CDR_market_2050 = CDR_market_2050["2050"].values[0] / 1000000  # convert millions to trillions

            # get the total npv
            net_zero_total_cost = net_zero_mandate.groupby(["Units"]).sum(min_count=1)
            total_market_2050 = net_zero_total_cost["2050"].values[0] / 1000000  # convert millions to trillions

            # find the percentage cost decrease necessary to get net zero compared to nzn
            nzn_cost = dict()
            nzn_cost[0.02] = 5.417576989947383
            nzn_cost[0.12] = 1.2146232405976478
            nzn_cost[0.20] = 0.5051764571050655
            nzn_cost["2050"] = 0.842909016916415

            # total cost of net zero
            total_net_zero_cost = net_zero_total_cost["npv_" + str(k)].values[0]

            # add labels to data
            npv_net_zero[
                "NPV of Net Zero Mandate" + " | " + str(k * 100) + "%" + " | Trillion $USD"] = total_net_zero_cost
            npv_net_zero["Cost Decrease necessary in PV CDR market" + " | " + str(
                k * 100) + "%" + " | % of CDR market"] = 100 * (total_net_zero_cost - nzn_cost[k]) / CDR_market_size

        # calculate cost decreases in 2050
        npv_net_zero["Cost Decrease necessary in 2050"] = 100 * (total_market_2050 - nzn_cost["2050"]) / CDR_market_2050
        npv_net_zero["Abatement size in 2050"] = total_market_2050

        # prepare data for output
        npv_net_zero = pd.DataFrame(npv_net_zero, index=[0])
        npv_net_zero.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                            "/npv of achieving net zero.csv")
        net_zero_mandate.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                                "/interpolated costs of achieving net zero.csv")
    else:
        CB_error = pd.DataFrame()
        CB_error["error"] = "error in computing the costs and benefits in this scenario"
        CB_error.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                        "/npv of achieving net zero.csv")
        CB_error.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                        "/interpolated costs of achieving net zero.csv")


def fiscal_cost_benefit_analysis(NPV_CB, baseline_market, config_fname, fiscal_costs, interest_rates, scenario_market):
    """
    calculate the fiscal cost/benefits
    :param NPV_CB: dataframe containing costs and benefits of NPV
    :param baseline_market: data of the CDR market
    :param config_fname: scenario name
    :param fiscal_costs: dataframe of the fiscal costs
    :param interest_rates: discount rate
    :param scenario_market: dataframe of the scenario CDR market
    :return: .csv files of output of calculations
    """
    # complete cost benefit analysis for fiscal costs
    for k in interest_rates:
        # remove identifying information from the dataframes
        fiscal_costs_cb = fiscal_costs.drop(columns=['Units', 'cost_type', "0", 0], errors='ignore').copy(deep=True)
        scenario_market_cb = scenario_market.drop(columns=['Units', 'cost_type', "0", 0], errors='ignore').copy(
            deep=True)
        baseline_market_cb = baseline_market.drop(columns=['Units', 'cost_type', "0", 0], errors='ignore').copy(
            deep=True)

        fiscal_costs_cb = npf.npv(rate=k, values=fiscal_costs_cb.values[0])
        # benefits are defined as lower costs in the CDR market. these are compared to the baseline market
        benefits = npf.npv(rate=k, values=scenario_market_cb.values[0]) - npf.npv(rate=k,
                                                                                  values=baseline_market_cb.values[
                                                                                      0])  # benefits are negative, costs are positive

        # save cost benefit information
        NPV_CB["Benefits" + " | " + str(k * 100) + "%"] = benefits
        NPV_CB["Costs" + " | " + str(k * 100) + "%"] = fiscal_costs_cb
        NPV_CB["Benefits/Costs" + " | " + str(k * 100) + "%"] = benefits / fiscal_costs_cb
        NPV_CB["Benefits+Costs" + " | " + str(k * 100) + "%"] = benefits + fiscal_costs_cb # benefits are negative and costs are positive

        # write out information in .csv
    NPV_CB["Units"] = "Million $USD or unitless"
    NPV_CB = pd.DataFrame(NPV_CB, index=[0])
    NPV_CB.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                  "/cost-benefit-analysis.csv")


def get_CB_dfs(baseline_market, npv_cols):
    """
    Calculate the costs and benefits for a given dataframe
    :param baseline_market: baseline information on the CDR market
    :param npv_cols: list of columns on which to calculate NPV
    :return: output calculations for costs and benefits as NPV
    """
    # get subsidy information
    baseline_subsidy = baseline_market[baseline_market["technology_price"] == "subsidy"].copy(deep=True)
    baseline_subsidy = data_manipulation.interpolate(baseline_subsidy, "truncated")
    baseline_subsidy = baseline_subsidy.groupby(["technology_price", "Units"]).sum(min_count=1).reset_index()
    baseline_subsidy["cost_type"] = "Subsidy"
    baseline_subsidy = baseline_subsidy[npv_cols]

    # get C tax revenue and deadweight loss information
    baseline_deadweight = baseline_market[baseline_market["product"] == "Deadweight Loss"].copy(deep=True)
    baseline_deadweight = data_manipulation.interpolate(baseline_deadweight, "truncated")
    baseline_deadweight["cost_type"] = "Deadweight Loss"
    baseline_deadweight["Units"] = "Million 2025$USD/yr"
    baseline_deadweight = baseline_deadweight[npv_cols]

    baseline_CTax = baseline_market[baseline_market["product"] == "C Tax Revenue"].copy(deep=True)
    baseline_CTax = data_manipulation.interpolate(baseline_CTax, "truncated")
    baseline_CTax["cost_type"] = "C Tax Revenue"
    baseline_CTax["Units"] = "Million 2025$USD/yr"
    baseline_CTax = baseline_CTax[npv_cols]

    # get innovation information
    baseline_innovation_funding = baseline_market[(baseline_market["product"] == "Investment in R&D") |
                                                  (baseline_market["product"] == "Investment in DAC Hubs")].copy(
        deep=True)
    if len(baseline_innovation_funding) != 0:
        baseline_innovation_funding = data_manipulation.interpolate(baseline_innovation_funding, "extended")
        baseline_innovation_funding["cost_type"] = "Investment in R&D"
        baseline_innovation_funding = baseline_innovation_funding[npv_cols]
        baseline_innovation_funding["Units"] = "Million 2025$USD/yr"
    else:
        # empty df
        baseline_innovation_funding = pd.DataFrame(columns=npv_cols)

    # get market information
    baseline_market = baseline_market[
        (baseline_market["technology_price"] != "subsidy") & (baseline_market["product_price"] != "CO2") & (
                    baseline_market["product"] != "Investment in R&D") & (
                    baseline_market["product"] != "Investment in DAC Hubs")].copy(deep=True)
    baseline_market = data_manipulation.interpolate(baseline_market, "linear")
    baseline_market["cost_type"] = "CDR Market"
    baseline_market = baseline_market.groupby(["cost_type", "Units"]).sum(min_count=1).reset_index()
    baseline_market = baseline_market[npv_cols]

    # return files
    return baseline_subsidy, baseline_deadweight, baseline_CTax, baseline_market, baseline_innovation_funding


def subsidy_expiration(config_fname, reference_year):
    """
    calculate the effects of the expiration of subsidies
    :param config_fname: scenario name
    :param reference_year: year of interest
    :return: .csv files with results from calculations
    """
    # get baseline info
    baseline = config_fname.split("/")[1]
    scenario = config_fname.split("/")[0]
    # get market data at the state level
    CDR = pd.read_csv("data/data_analysis/supplementary_tables/" + scenario + "/" + baseline +
                      "/sorted price and supply of CDR by technology.csv")

    list_of_subsidies = []
    # find out which years have subsidies
    for i in c.GCAMConstants.plotting_x:
        try:
            condition = CDR[str(i) + "_price"] > CDR[str(i) + "_subsidized"]
            if condition.any():
                list_of_subsidies.append(i)
        except KeyError as e:
            print(e)

    # get the last year without subsidies
    year_without_subsidies = list_of_subsidies[-1] + 5

    # calculate what happens when subsidies end
    if year_without_subsidies < 2051:
        CDR = CDR.fillna(0)
        columns = ["GCAM", "product_price"]
        for i in list_of_subsidies:
            CDR[str(i) + "_total_loss"] = CDR.apply(
                lambda row: -1 * (row[str(i) + "_supply"] - row[str(year_without_subsidies) + "_supply"]) * row[
                    str(i) + "_price"]
                if row[str(i) + "_price"] > row[str(i) + "_subsidized"] else 0, axis=1)
            CDR[str(i) + "_CDR-Market_loss"] = CDR.apply(
                lambda row: -1 * (row[str(i) + "_supply"] - row[str(year_without_subsidies) + "_supply"]) * row[
                    str(i) + "_subsidized"]
                if row[str(i) + "_price"] > row[str(i) + "_subsidized"] else 0, axis=1)
            CDR[str(i) + "_CDR-Subsidy_loss"] = CDR[str(i) + "_total_loss"] - CDR[str(i) + "_CDR-Market_loss"]
            columns.append(str(i) + "_CDR-Market_loss")
            columns.append(str(i) + "_CDR-Subsidy_loss")

        # refit df for histogram plotting
        CDR_df = CDR[columns]
        CDR_df = CDR_df.melt(id_vars=["GCAM", "product_price"], var_name='category', value_name='Change in market size')
        CDR_df["SSP"] = "na"
        CDR_df["Units"] = "Million USD/yr"
        # remove rows with no change
        CDR_df = CDR_df[CDR_df["Change in market size"] != 0]

        plotting.plot_regional_hist_avg(CDR_df, 'Change in market size',
                                        "change in size of markets once the subsidy ends",
                                        "category", config_fname)

        # get only wasted and good spend
        CDR_wasted_subsidy = CDR.copy(deep=True)
        CDR_good_subsidy = CDR.copy(deep=True)

        for i in list_of_subsidies:
            CDR_wasted_subsidy[str(i) + "_CDR-Subsidy_loss"] = CDR_wasted_subsidy.apply(
                lambda row: 0 if row[str(i) + "_CDR-Subsidy_loss"] > 0 else row[str(i) + "_CDR-Subsidy_loss"], axis=1)
            CDR_wasted_subsidy[str(i) + "_CDR-Market_loss"] = CDR_wasted_subsidy.apply(
                lambda row: 0 if row[str(i) + "_CDR-Market_loss"] > 0 else row[str(i) + "_CDR-Market_loss"], axis=1)
            CDR_good_subsidy[str(i) + "_CDR-Subsidy_loss"] = CDR_good_subsidy.apply(
                lambda row: 0 if row[str(i) + "_CDR-Subsidy_loss"] < 0 else row[str(i) + "_CDR-Subsidy_loss"], axis=1)
            CDR_good_subsidy[str(i) + "_CDR-Market_loss"] = CDR_good_subsidy.apply(
                lambda row: 0 if row[str(i) + "_CDR-Market_loss"] < 0 else row[str(i) + "_CDR-Market_loss"], axis=1)

        CDR_wasted_subsidy = CDR_wasted_subsidy.groupby(["product_price"]).sum(min_count=1)
        CDR_wasted_subsidy = CDR_wasted_subsidy.reset_index()
        CDR_wasted_subsidy["Units"] = "Million 2025$USD/yr"
        CDR_wasted_subsidy["spend"] = "Wasted"
        CDR_good_subsidy = CDR_good_subsidy.groupby(["product_price"]).sum(min_count=1)
        CDR_good_subsidy = CDR_good_subsidy.reset_index()
        CDR_good_subsidy["Units"] = "Million 2025$USD/yr"
        CDR_good_subsidy["spend"] = "Good"
        CDR = pd.concat([CDR_wasted_subsidy, CDR_good_subsidy]).reset_index()
        CDR["product"] = CDR["product_price"] + " " + CDR["spend"]

        CDR.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                   "/subsidy-and-market-spend-on-subsidized-techs.csv")

        # remove market spend to focus on subsidies
        for i in list_of_subsidies:
            CDR[str(i)] = CDR[str(i) + "_CDR-Subsidy_loss"]
        plotting.plot_stacked_bar_product(CDR, list_of_subsidies, "product",
                                          "change in CDR market size from base year to year after subsidies end",
                                          config_fname)


def market_share(config_fname, reference_year):
    """
    calculate the market share for different CDR technologies
    :param config_fname: scenario name
    :param reference_year: year of interest
    :return: .csv files and plots of relevant output
    """
    # get baseline info
    baseline = config_fname.split("/")[1]
    scenario = config_fname.split("/")[0]

    # get market data from state
    CDR_market = pd.read_csv("data/data_analysis/supplementary_tables/" + scenario + "/" + baseline +
                             "/sorted price and supply of CDR by technology.csv")

    # get supply
    CDR_market = CDR_market.groupby('product_price')[str(reference_year) + "_supply"].sum()

    # calculate percentage of market share
    percentages = (CDR_market / CDR_market.sum()) * 100

    # format data and output
    df = pd.concat([CDR_market, percentages], axis=1)
    df.columns = ["Mt", "%"]
    df.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
              "/market share in " + str(reference_year) + ".csv")


def social_cost(config_fname, reference_year):
    """
    calculate the social costs (fiscal, tax, and deadweight) of the policies
    :param config_fname: scenario of interest
    :param reference_year: year of interest
    :return: .csv file with outputs of calculations
    """
    baseline = config_fname.split("/")[1]
    scenario = config_fname.split("/")[0]
    # process USA emissions
    costs = get_C_costs(baseline, config_fname, scenario)
    costs.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                 "/CO2_CDR_social_costs.csv")


def get_C_costs(baseline, config_fname, scenario):
    """
    calculate the costs of the C tax
    :param baseline: baseline pathway
    :param config_fname: scenario information
    :param scenario: policy scenario
    :return: dataframe with relevant calculations
    """
    CO2_emissions = data_manipulation.get_sensitivity_data([config_fname], "CO2_emissions_by_sector")
    CO2_emissions = CO2_emissions[CO2_emissions["GCAM"].isin(c.GCAMConstants.USA_region)]
    CO2_emissions = CO2_emissions[CO2_emissions["sector"] != "CDR_regional"]  # excluded from the C tax
    CO2_emissions = CO2_emissions.groupby(["scenario", "baseline", "Units"]).sum(min_count=1).reset_index()
    # get a baseline CO2 emissions
    baseline_emissions = pd.read_csv("data/data_analysis/baseline_co2_emissions.csv")
    # process emissions revenue
    CO2_prices = data_manipulation.get_sensitivity_data([config_fname], "CO2_prices")
    CO2_prices = CO2_prices[(CO2_prices["GCAM"] == "USA") & (CO2_prices["product"] == "CO2")]
    CO2_tax_revenue = pd.merge(CO2_emissions, CO2_prices, "left", "baseline", suffixes=("_supply", "_price"))
    for i in c.GCAMConstants.plotting_x:
        # (Mt C * CO2 / C = Mt CO2) * (1990$/tC * 2025$/tC /1990$/tC = 2025$t C * CO2 /C = 2025$/t CO2) = (Mt CO2 * 2025$/t CO2) = M 2025$
        CO2_tax_revenue[str(i) + "_total_cost"] = (CO2_tax_revenue[str(i) + "_supply"] / c.GCAMConstants.CO2_to_C) * (
                CO2_tax_revenue[str(i) + "_price"] / c.GCAMConstants.USD2025_tCO2_to_1990_tC)
    # process deadweight loss
    CO2_tax_price = pd.merge(CO2_emissions, CO2_prices, "left", "baseline", suffixes=("_supply", "_price"))
    CO2_tax_price["Units"] = "MTC"
    deadweight_loss = pd.merge(CO2_tax_price, baseline_emissions, "left", "Units", suffixes=("_actual", "_baseline"))
    for i in c.GCAMConstants.plotting_x:
        # ((Mt C - Mt C) * CO2 / C = Mt CO2) * (1990$/tC * 2025$/tC /1990$/tC = 2025$t C * CO2 /C = 2025$/t CO2) = (Mt CO2 * 2025$/t CO2) = M 2025$
        deadweight_loss[str(i) + "_total_cost"] = (
                0.5 * (deadweight_loss[str(i)] - deadweight_loss[str(i) + "_supply"]) / c.GCAMConstants.CO2_to_C *
                (deadweight_loss[str(i) + "_price"] / c.GCAMConstants.USD2025_tCO2_to_1990_tC))

    # process total price of CDR
    CDR_cost = pd.read_csv(
        "data/data_analysis/supplementary_tables/" + scenario + "/" + baseline + "/sorted price and supply of CDR by technology.csv")
    for i in c.GCAMConstants.plotting_x:
        try:
            CDR_cost[str(i) + "_total_cost"] = CDR_cost[str(i) + "_supply"] * CDR_cost[str(i) + "_price"]
            CDR_cost = CDR_cost.drop([str(i) + "_supply", str(i) + "_price"], axis=1)
        except KeyError as e:
            print(e)
            CDR_cost[str(i) + "_total_cost"] = np.nan
    CDR_cost = CDR_cost.groupby(["Units_supply", "Units_price"]).sum(min_count=1).reset_index()
    CDR_cost["Units"] = "Million 2025$USD/yr"
    deadweight_loss["Units"] = "Million 2025$USD/yr"
    CO2_tax_revenue["Units"] = "Million 2025$USD/yr"
    CDR_cost["product"] = "CDR Market"
    deadweight_loss["product"] = "Deadweight Loss"
    CO2_tax_revenue["product"] = "C Tax Revenue"
    costs = pd.concat([CDR_cost, CO2_tax_revenue, deadweight_loss])
    return costs


def CDR_tech(config_fname, year):
    """
    plot CDR information by region and technology in bar and map
    :param config_fname: where to store output data
    :param year: year being analyzed
    :return: N/A
    """
    # data processing
    CDR = data_manipulation.get_sensitivity_data([config_fname], "CDR_by_tech")
    CDR = CDR[CDR[['GCAM']].isin(c.GCAMConstants.USA_region).any(axis=1)]
    CDR = CDR[CDR['technology'] != "unsatisfied CDR demand"]

    # stacked bar plot
    plotting.plot_stacked_bar_product(CDR, year, "technology", "CDR by technology in " + str(year), config_fname)

    # choropleth map
    plotting.plot_world_by_products(CDR, "technology", [year],
                                    "plotting estimated CDR supply by technology in " + str(year),
                                    config_fname)


def CDR_cost(config_fname, year):
    """
    plot CDR costs (and policy costs from subsidies and R&D investment
    :param config_fname: retains information about where to save data
    :param year: year being analyzed
    :return: N/A
    """
    # check if it is a baseline scenario
    baseline = config_fname.split("/")[1]
    scenario = config_fname.split("/")[0]

    # build and write out scenario policy cost
    supply = data_manipulation.get_sensitivity_data([config_fname], "CDR_by_tech", source="unmasked")
    supply["Units"] = "Mt CO$_{2}$-eq"
    price = data_manipulation.get_sensitivity_data([config_fname], "prices_of_all_markets", source="unmasked")
    price["product"] = price.apply(lambda row: data_manipulation.price_subsidy(row), axis=1)

    # match subsidy market to the states
    # get the subsidy files
    files = config_fname.split("/")
    xml_files_to_build = []
    for i in files:
        xml_files_to_build.extend(utilities.build_from_scenario(str(i)))
    xml_files_to_build.reverse()
    subsidy_df = pd.DataFrame()
    meko_subsidy = pd.DataFrame()

    # get subsidy links and calculate subsidy name
    for xml in xml_files_to_build:
        for file in xml.data_files:
            csv = xml.data_files[file]
            if "subsidy" in file and "countersubsidy" not in file and "link" not in file:
                ground_truth = pd.read_csv(csv, skiprows=2)
                meko_subsidy = ground_truth
                ground_truth["product"] = ground_truth["stub-technology"] + " subsidy"
                ground_truth = ground_truth[["product", "market"]].drop_duplicates()
                ground_truth["GCAM"] = [c.GCAMConstants.USA_region for i in ground_truth.index]
                ground_truth = ground_truth.explode("GCAM")
                if not subsidy_df.equals(ground_truth):
                    subsidy_df = pd.concat([subsidy_df, ground_truth])

    if not subsidy_df.empty:
        subsidy_df.drop_duplicates(inplace=True)
        subsidy_df = pd.merge(subsidy_df, price, "left", on=["product"])

        # update columns of df to prepare for merger
        subsidy_df["GCAM"] = subsidy_df["GCAM_x"]
        subsidy_df[['product', 'technology']] = subsidy_df['product'].str.split(' ', expand=True)
        price = pd.concat([price, subsidy_df])

    # update the price to $2025USD/t C  from $1975USD/kg C - then to a CO2-eq basis
    price["Units"] = "2025USD/t CO$_{2}$-eq"
    for i in c.GCAMConstants.plotting_x:
        # https://data.bls.gov/cgi-bin/cpicalc.pl?cost1=1.00&year1=197501&year2=202501
        price[str(i)] = price[str(i)] / c.GCAMConstants.USD2025_tCO2_to_1975_kgC
        supply[str(i)] = supply[str(i)] / c.GCAMConstants.CO2_to_C

    # merge dataframes and constrain to US regions
    dataframe = pd.merge(supply, price, "left", left_on=["technology", "GCAM"], right_on=["product", "GCAM"],
                         suffixes=("_supply", "_price"))
    dataframe = dataframe[dataframe["GCAM"].isin(c.GCAMConstants.USA_region)]
    dataframe.drop_duplicates(inplace=True)
    dataframe = dataframe[~dataframe["subsector_supply"].isin(["unsatisfiedDemand"])]

    # if there is supply less than 0.01 Mt CDR for a given tech and state, set supply and price to np.nan
    for i in c.GCAMConstants.plotting_x:
        dataframe[str(i) + "_price"] = dataframe.apply(
            lambda row: data_manipulation.remove_price_supply_outliers(str(i), row, "_price"), axis=1)
        dataframe[str(i) + "_supply"] = dataframe.apply(
            lambda row: data_manipulation.remove_price_supply_outliers(str(i), row, "_supply"), axis=1)
    mari_df = dataframe[dataframe["technology_price"] != "subsidy"]

    # format ground truth
    meko_subsidy = meko_subsidy.pivot(index='stub-technology', columns='year')['fixedTax'].reset_index()
    meko_subsidy.columns = meko_subsidy.columns.astype(str)
    meko_subsidy["Units"] = "Mt"
    scenario_df = plotting.plot_marimekko(mari_df, c.GCAMConstants.plotting_x, "_supply", "_price", "product_price",
                                          "sorted price and supply of CDR by technology", config_fname, meko_subsidy)

    # and compare tech costs to default
    if scenario != baseline:
        baseline_df = pd.read_csv(
            "data/data_analysis/supplementary_tables/" + baseline + "/" + baseline + "/sorted price and supply of CDR by technology.csv")
        plotting.compare_marimekko(scenario_df, baseline_df, config_fname)

    # calculate the total cost and plot
    for i in c.GCAMConstants.plotting_x:
        # "Mt $CO_{2}$-eq"  "2025USD/t $CO_{2}$-eq" factor of a million is added to dollars
        dataframe[str(i)] = dataframe[str(i) + "_supply"] * dataframe[str(i) + "_price"]
    # sum by technology
    dataframe = dataframe.groupby(["product_price", "technology_price"]).sum(min_count=1)
    dataframe = dataframe.reset_index()
    dataframe["Units"] = "Million 2025$USD/yr"
    dataframe['scenario'] = scenario
    dataframe['baseline'] = baseline
    dataframe['product'] = dataframe.apply(
        lambda row: row["product_price"] + " " + row["technology_price"] if row["technology_price"] != "missing" else
        row["product_price"], axis=1)

    # avoid double counting cost
    for i in c.GCAMConstants.plotting_x:
        dataframe[str(i)] = dataframe.apply(lambda row: data_manipulation.substract_subsidy(row, str(i), subsidy_df),
                                            axis=1)

    # add exogenous policy costs to the CDR cost dataframes
    if os.path.exists("./data/gcam_out/" + config_fname + "/exogenous_subsector_investment" + ".csv"):
        investments = data_manipulation.get_sensitivity_data([config_fname], "exogenous_subsector_investment",
                                                             source="not")
        investments["product"] = "Investment in " + investments["subsector"]

        # remove nan rows
        investments = investments.dropna(subset=[str(i) for i in constants.GCAMConstants.plotting_x], how='all')
        dataframe = pd.concat([dataframe, investments])

    # add CO2 costs into the dataframe
    plotting.plot_stacked_bar_product(dataframe, c.GCAMConstants.plotting_x, "product",
                                      "policy cost by year (no mitigation cost)", config_fname)
    dataframe.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                     "/policy cost by technology_no co2.csv")

    # compare this bar plot with default one (if this is not a default scenario)
    if baseline != scenario:
        cost_diff = pd.read_csv("data/data_analysis/supplementary_tables/" + baseline + "/" + baseline +
                                "/policy cost by technology_no co2.csv")
        cost_diff = pd.merge(cost_diff, dataframe, "outer", on=["product", "baseline"], suffixes=("_old", "_new"))
        cost_diff["Units"] = "Million 2025$USD/yr"
        for i in c.GCAMConstants.plotting_x:
            # if a year has been masked from the data, don't fill na
            no_subsidy = cost_diff[cost_diff["scenario_new"] == scenario]
            if no_subsidy[str(i) + "_new"].isnull().all() or no_subsidy[str(i) + "_old"].isnull().all():
                cost_diff[str(i)] = cost_diff[str(i) + "_new"] - cost_diff[str(i) + "_old"]
            else:
                cost_diff[str(i)] = cost_diff[str(i) + "_new"].fillna(0) - cost_diff[str(i) + "_old"].fillna(0)
        plotting.plot_stacked_bar_product(cost_diff, c.GCAMConstants.plotting_x, "product",
                                          "change in policy cost by year (no mitigation cost)", config_fname)

        # add a total row
        cols = ["2025", "2030", "2035", "2040", "2045", "2050", "product", "scenario_new", "baseline", "Units"]
        cost_diff = cost_diff[cols]
        total = pd.DataFrame(cost_diff.sum(numeric_only=True)).T
        cost_diff = pd.concat([cost_diff, total])
        cost_diff.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                         "/change in policy cost by technology_no_C_tax.csv")

    C_costs = get_C_costs(baseline, config_fname, scenario)
    C_costs = C_costs[C_costs["product"] != "CDR Market"]

    for i in c.GCAMConstants.plotting_x:
        C_costs[str(i)] = C_costs[str(i) + "_total_cost"]

    dataframe = pd.concat([dataframe, C_costs])
    dataframe.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                     "/policy cost by technology.csv")
    # keep the C cost information in dataframe, but don't plot it
    dataframe = dataframe[dataframe["product"] != "C Tax Revenue"]
    plotting.plot_stacked_bar_product(dataframe, c.GCAMConstants.plotting_x, "product", "policy cost by year",
                                      config_fname)

    # compare this bar plot with default one (if this is not a default scenario)
    if baseline != scenario:
        cost_diff = pd.read_csv("data/data_analysis/supplementary_tables/" + baseline + "/" + baseline +
                                "/policy cost by technology.csv")
        cost_diff = cost_diff[cost_diff["product"] != "C Tax Revenue"]
        cost_diff = pd.merge(cost_diff, dataframe, "outer", on=["product", "baseline"], suffixes=("_old", "_new"))
        cost_diff["Units"] = "Million 2025$USD/yr"
        for i in c.GCAMConstants.plotting_x:
            # if a year has been masked from the data, don't fill na
            no_subsidy = cost_diff[cost_diff["scenario_new"] == scenario]
            if no_subsidy[str(i) + "_new"].isnull().all() or no_subsidy[str(i) + "_old"].isnull().all():
                cost_diff[str(i)] = cost_diff[str(i) + "_new"] - cost_diff[str(i) + "_old"]
            else:
                cost_diff[str(i)] = cost_diff[str(i) + "_new"].fillna(0) - cost_diff[str(i) + "_old"].fillna(0)
        plotting.plot_stacked_bar_product(cost_diff, c.GCAMConstants.plotting_x, "product",
                                          "change in policy cost by year", config_fname)

        # add a total row
        cols = ["2025", "2030", "2035", "2040", "2045", "2050", "product", "scenario_new", "baseline", "Units"]
        cost_diff = cost_diff[cols]
        total = pd.DataFrame(cost_diff.sum(numeric_only=True)).T
        cost_diff = pd.concat([cost_diff, total])
        cost_diff.to_csv("data/data_analysis/supplementary_tables/" + str(config_fname).replace("_", "/") + "/" +
                         "/change in policy cost by technology.csv")

    # verify procurement
    if scenario != baseline:
        verification.verify_procurement(scenario, baseline, "./data/gcam_out/" + config_fname)


if __name__ == '__main__':
    for i in ["nzn_nzn", "nothing_nothing", "low_low", "high_high", "excess_excess", "4gt_4gt",
              "s1-procureScaling-n_nothing", "s1-procure3B-n_nothing", "s1-procureRhodium-n_nothing",
              "s1-procureScaling-l_low", "s1-procure3B-l_low", "s1-procureRhodium-l_low",
              "s1-procureScaling-h_high", "s1-procure3B-h_high", "s1-procureRhodium-h_high",
              "45Q-2040_low", "45Q-2050_low", "CDRIA-2035_low", "CDRIA-2050_low",
              "45Q-2040_high", "45Q-2050_high", "CDRIA-2035_high", "CDRIA-2050_high",
              "innovation-DACHubs_low", "innovation-maintain_low", "innovation-rhodium6b_low",
              "innovation-rhodium18b_low", "innovation-triple_low",
              "innovation-DACHubs_high", "innovation-maintain_high", "innovation-rhodium6b_high",
              "innovation-rhodium18b_high", "innovation-triple_high", "CDRIA-rhodium18b_low",
              "CDRIA-rhodium18b_high"]:
        # for i in ["CDRIA-2035_low", "CDRIA-2050_low","CDRIA-2035_high", "CDRIA-2050_high"]:
        main(i, "2050")