Source code for ecodynelec.preprocessing.load_impacts

"""
Module collection functions to load the information about impact per generation unit type.
"""

import numpy as np
import pandas as pd

from ecodynelec.preprocessing.auxiliary import get_default_file
from ecodynelec.parameter import Parameter


# +
# This module of function extracts the impact information from the files

# +

#################################
# ################################
# EXTRACT UNIT IMPACT VECTOR
# ################################
# ################################

# -


[docs]
def extract_UI(parameters: Parameter, path_ui, ctry: list = None, target: str = 'CH', residual: bool = False, cst_imports: bool = False):
    """Function to extract and modify the UI vector from a .csv file"""
    ### Get default file if None
    if path_ui is None:
        if parameters.uvek_data:
            file = 'UVEK_Unit_Impact_Vector.csv'
        else:
            file = 'Unit_Impact_Vector.csv'
        path_ui = get_default_file(name=file)

    ### Import the UI
    ui = pd.read_csv(path_ui, index_col=[0])

    ### Selection of countries
    ui = select_ui_indexes(ui, ctry=ctry, residual=residual)

    ### Create constant import impacts
    if cst_imports:
        ui = set_constant_imports(ui, target=target)

    return ui



# +

#################################
# ################################
# SET CONSTANT IMPORTS FROM UI
# ################################
# ################################

# -


[docs]
def set_constant_imports(ui, target: str = 'CH'):
    """Set the impacts of non-target countries to average Entsoe"""

    ### Selection of unique countries
    countries = np.unique([i.split("_")[-1] for i in ui.index])

    # The indexes to systematically exclude
    exclude = ['Mix_Other'] + [i for i in ui.index if str(i).endswith(f'_{target}')]
    # The value to turn all but target into
    how = ui.loc['Mix_Other', :]

    ### Change the information
    new_ui = ui.copy()
    new_ui.loc[~new_ui.index.isin(exclude), :] = how.values
    return new_ui



# +

#################################
# ################################
# SET CONSTANT IMPORTS FROM UI
# ################################
# ################################

# -


[docs]
def select_ui_indexes(ui, ctry: list = None, residual: bool = False):
    """Selects relevant rows from complete UI vector"""
    if ctry is not None:
        # Consider the "Mix Other"
        places = list(ctry) + ['Other']

        # Copy the indexes
        idx = pd.Series(ui.index)

        # Production units per country
        selection = np.logical_or.reduce([idx.apply(lambda x: str(x).endswith(f'_{p}')).values
                                          for p in places])

    else:  # Select for all countries
        selection = np.full((ui.shape[0],), True)  # Vector of TRUE

    # Deal with residual
    if not residual:
        selection = np.logical_and(selection,
                                   ~(idx.apply(lambda x: str(x).startswith('Residual'))).values)

    return ui.loc[selection, :]



# +

#################################
# ################################
# EXTRACT MAPPING
# ################################
# ################################

# -


[docs]
def extract_mapping(ctry, mapping_path=None, cst_import=False, residual=False, target='CH', is_verbose=False):
    """
    Function to build the impact matrix from mapping stored in files.

    Parameters
    ----------
        ctry: list
            list of countries to load the impacts of
        mapping_path: str, default to None
            .xlsx file where to find the mapping data
        cst_import: bool, default to False
            whether to consider all impacts of non-traget countres as
            the impact of 'Other'
        residual: bool, default to False
            whether to consider production residual for the target country
        target: str, default to 'CH'
            the target country
        is_verbose: bool, default to False
            to display information
    """
    ### Check the country list
    if is_verbose: print("Extraction of impact vector...")
    # Test the type of country
    if type(ctry) == str:
        ctry = [ctry]
    elif '__iter__' not in dir(ctry):
        raise TypeError("Parameter ctry should be a list, tuple or str")

    ### Extract the impact information
    impacts = {}

    if is_verbose: print("\t. Mix_Other ", end="")  # Mix from other countries
    impacts['Other'] = other_from_excel(mapping=mapping_path)

    for c in ctry:
        if is_verbose: print(f"/ {c} ", end="")
        if np.logical_and(cst_import, (c != target)):  # Constant imports for other countries
            impacts[c] = set_constant_impacts(country_from_excel(mapping=mapping_path, place=c),
                                              constant=impacts['Other'].loc['Mix_Other'])
        else:
            impacts[c] = country_from_excel(mapping=mapping_path, place=c)

    ### Add impact of residual
    if residual:  # Mix from the residual part -> direct after "Mix_Other" (residual only in CH)
        if is_verbose: print("+ Residual ", end="")
        if 'CH' not in impacts:
            raise ValueError("Including residual only available for CH. Please include CH in the list of countries")
        impacts['CH'] = pd.concat([impacts['CH'],
                                   residual_from_excel(mapping=mapping_path, place='CH')])

    ### Gather impacts in one table
    if is_verbose: print(".")
    impact_matrix = pd.concat([impacts[c] for c in impacts.keys()])

    return impact_matrix




[docs]
def extract_network_mapping(ctry, mapping_path=None, is_verbose=False):
    """
    Function to build the network impact dict from mapping stored in files.

    Parameters
    ----------
        ctry: list
            list of countries to load the impacts of
        mapping_path: str, default to None
            .xlsx file where to find the mapping data
        is_verbose: bool, default to False
            to display information
    """
    ### Check the country list
    if is_verbose: print("Extraction of network impact...")
    # Test the type of country
    if type(ctry) == str:
        ctry = [ctry]
    elif '__iter__' not in dir(ctry):
        raise TypeError("Parameter ctry should be a list, tuple or str")

    df_temp = pd.read_excel(mapping_path, header=None)
    header_idx = df_temp.index[df_temp.iloc[:, 0] == "Catégorie d'impact"].tolist()

    if not header_idx:
        raise ValueError("Header 'Catégorie d'impact' not found in file.")

    matrix = pd.read_excel(mapping_path, header=header_idx[0])
    row_to_keep = ["Climate change - Fossil", "Land use", "Particulate matter", "Human toxicity, cancer"]
    matrix = matrix.rename(columns={"Catégorie d'impact": 'Category'})
    matrix = matrix[matrix['Category'].isin(row_to_keep)]

    if len(matrix.columns) > 1:
        matrix = matrix.drop(matrix.columns[1], axis=1)
    matrix = matrix.set_index('Category')

    row_mapping = {
        "Climate change - Fossil": "Carbon intensity",
        "Land use": "Land use",
        "Particulate matter": "Fine particulate matter formation",
        "Human toxicity, cancer": "Human carcinogenic toxicity"
    }
    matrix_clean = matrix.rename(index=row_mapping)

    ### Extract the impact information
    impacts = {}

    for col in matrix_clean.columns:
        # Extract metadata from column name: "process.../unit/COUNTRY SUFFIX"
        # Example: "electricity.../kWh/CH U"
        if '/' not in col:
            continue
        process_part, country_part = col.rsplit('/', 1)
        country_code = country_part.replace(' U', '').strip()
        if country_code in ctry:
            if country_code not in impacts:
                impacts[country_code] = {}
            if "high voltage" in process_part:
                voltage_key = "High Voltage"
            elif "medium voltage" in process_part:
                voltage_key = "Medium Voltage"
            elif "low voltage" in process_part:
                voltage_key = "Low Voltage"
            elif "infra at pumped storage" in process_part:
                if country_code == "CH":
                    voltage_key = "Infra PHS"

            if voltage_key not in impacts[country_code]:
                impacts[country_code][voltage_key] = {}

            for category, value in matrix_clean[col].items():
                impacts[country_code][voltage_key][category] = value

    desired_order = ['High Voltage', 'Medium Voltage', 'Low Voltage', 'Infra PHS']
    for code in impacts:
        impacts[code] = {k: impacts[code][k] for k in desired_order if k in impacts[code]}

    if is_verbose:
        print(f"Extraction complete for: {list(impacts.keys())}")

    return impacts



# +

#################################
# ################################
# Other from excel
# ################################
# ################################

# -


[docs]
def other_from_excel(mapping):
    """Load the mapping for 'Other' from an excel file (mapping)."""
    ### Impact for production mix of 'other countries'
    d = pd.read_excel(mapping, sheet_name="ENTSOE_avg",
                      header=1, usecols=np.arange(2, 7),
                      index_col=[0])  # extract
    return d.loc[['ENTSOE average mix'], :].rename(index={'ENTSOE average mix': 'Mix_Other'})  # format



# +

#################################
# ################################
# Country form excel
# ################################
# ################################

# -


[docs]
def country_from_excel(mapping, place):
    """Load the mapping of a given country (place) from an excel file (mapping)."""
    try:  # test if the country is available in the mapping file
        d = pd.read_excel(mapping, sheet_name=place, index_col=[0])  # Read and get index col
    except Exception as e:
        raise ValueError(f"Mapping for {place} not available: {e} ")

    key = [k for k in d.columns if str(k).lower().find('impact') != -1][-1]  # Select last 'impact' column as key
    columns = d.loc[:, key:].iloc[0]
    columns = columns[
        columns.apply(lambda x: not str(x).endswith('KBOB'))]  # Strike out KBOB... Do your own mapping man!

    # Get only important data
    d = d.loc[:, columns.index].dropna(axis=0).rename(columns=columns.to_dict())
    to_drop = [k for k in d.index if str(k).lower().find('sources entso-e') != -1]
    d = d.loc[d.index.notnull()].drop(index=to_drop, errors='ignore')  # Select the correct indexes

    # Replace "-" with zeros.
    d = d.replace("-", 0).astype('float32')

    # Change indexes
    return d.rename({i: (i.replace('(', '').replace(')', '').replace(" Fos", ' fos')
                         + f" {place}").replace(' ', '_').replace('__', '_')
                     for i in d.index}, axis=0).rename_axis("")



# +

#################################
# ################################
# Residual from excel
# ################################
# ################################

# -


[docs]
def residual_from_excel(mapping, place):
    """
    Load impact data of the production residual and add it to the impact matrix.

    Parameters
    ----------
        mapping: str
            path to file with the mapping
        place: str
            country tag of the country

    Returns
    -------
    pandas.DataFrame
        table with the matrix of impacts per unit type, with
        the impact of residual production is added.
    """
    try:  # test if the "country" is available in the mapping file
        d = pd.read_excel(mapping, sheet_name="Residual", index_col=0)
    except Exception as e:
        raise ValueError(f" Residual not available: {e}")

    key = [k for k in d.columns if str(k).lower().find('impact') != -1][-1]  # Select last 'impact' column as key
    columns = d.loc[:, key:].iloc[0]
    columns = columns[
        columns.apply(lambda x: not str(x).endswith('KBOB'))]  # Strike out KBOB... Do your own mapping man!
    # columns = d.loc[:,'Environmental impacts of ENTSO-E sources':].iloc[0]

    # Select the righ column
    d = d.loc[:, columns.index].rename(columns=columns.to_dict()).rename_axis('')

    # Select the right indexes
    idx = pd.Series(d.index).apply(lambda x: str(x).startswith('Resid')).values
    d = d.loc[idx].astype('float32')

    # Rename indexes with the place & formatting
    return d.rename(index={i: (i.replace('Residue', 'Residual').replace(" ", "_")
                               + f"_{place}")
                           for i in d.index})



# +

#################################
# ################################
# Set constant impacts
# ################################
# ################################

# -


[docs]
def set_constant_impacts(impacts, constant):
    """Set the impacts to a constant value"""
    return impacts.apply(lambda x: constant, axis=1)



# +

#################################
# ################################
# Units from Mapping
# ################################
# ################################

# -


[docs]
def get_impact_units(mapping):
    """Read the units of impacts from the mapping"""
    ### Impact for production mix of 'other countries'
    d = pd.read_excel(mapping, sheet_name="ENTSOE_avg",
                      header=1, usecols=np.arange(2, 7),
                      index_col=[0])  # extract
    return d.iloc[1].apply(lambda x: str(x).replace(' ', '')).rename('Units')