Source code for ecodynelec.preprocessing.load_impacts

"""
Module collection functions to load the information about impact per generation unit type.
"""

import numpy as np
import pandas as pd

from ecodynelec.preprocessing.auxiliary import get_default_file
from ecodynelec.parameter import Parameter


# +
# This module of function extracts the impact information from the files

# +

#################################
# ################################
# EXTRACT UNIT IMPACT VECTOR
# ################################
# ################################

# -

[docs] def extract_UI(parameters: Parameter, path_ui, ctry: list = None, target: str = 'CH', residual: bool = False, cst_imports: bool = False): """Function to extract and modify the UI vector from a .csv file""" ### Get default file if None if path_ui is None: if parameters.uvek_data: file = 'UVEK_Unit_Impact_Vector.csv' else: file = 'Unit_Impact_Vector.csv' path_ui = get_default_file(name=file) ### Import the UI ui = pd.read_csv(path_ui, index_col=[0]) ### Selection of countries ui = select_ui_indexes(ui, ctry=ctry, residual=residual) ### Create constant import impacts if cst_imports: ui = set_constant_imports(ui, target=target) return ui
# + ################################# # ################################ # SET CONSTANT IMPORTS FROM UI # ################################ # ################################ # -
[docs] def set_constant_imports(ui, target: str = 'CH'): """Set the impacts of non-target countries to average Entsoe""" ### Selection of unique countries countries = np.unique([i.split("_")[-1] for i in ui.index]) # The indexes to systematically exclude exclude = ['Mix_Other'] + [i for i in ui.index if str(i).endswith(f'_{target}')] # The value to turn all but target into how = ui.loc['Mix_Other', :] ### Change the information new_ui = ui.copy() new_ui.loc[~new_ui.index.isin(exclude), :] = how.values return new_ui
# + ################################# # ################################ # SET CONSTANT IMPORTS FROM UI # ################################ # ################################ # -
[docs] def select_ui_indexes(ui, ctry: list = None, residual: bool = False): """Selects relevant rows from complete UI vector""" if ctry is not None: # Consider the "Mix Other" places = list(ctry) + ['Other'] # Copy the indexes idx = pd.Series(ui.index) # Production units per country selection = np.logical_or.reduce([idx.apply(lambda x: str(x).endswith(f'_{p}')).values for p in places]) else: # Select for all countries selection = np.full((ui.shape[0],), True) # Vector of TRUE # Deal with residual if not residual: selection = np.logical_and(selection, ~(idx.apply(lambda x: str(x).startswith('Residual'))).values) return ui.loc[selection, :]
# + ################################# # ################################ # EXTRACT MAPPING # ################################ # ################################ # -
[docs] def extract_mapping(ctry, mapping_path=None, cst_import=False, residual=False, target='CH', is_verbose=False): """ Function to build the impact matrix from mapping stored in files. Parameters ---------- ctry: list list of countries to load the impacts of mapping_path: str, default to None .xlsx file where to find the mapping data cst_import: bool, default to False whether to consider all impacts of non-traget countres as the impact of 'Other' residual: bool, default to False whether to consider production residual for the target country target: str, default to 'CH' the target country is_verbose: bool, default to False to display information """ ### Check the country list if is_verbose: print("Extraction of impact vector...") # Test the type of country if type(ctry) == str: ctry = [ctry] elif '__iter__' not in dir(ctry): raise TypeError("Parameter ctry should be a list, tuple or str") ### Extract the impact information impacts = {} if is_verbose: print("\t. Mix_Other ", end="") # Mix from other countries impacts['Other'] = other_from_excel(mapping=mapping_path) for c in ctry: if is_verbose: print(f"/ {c} ", end="") if np.logical_and(cst_import, (c != target)): # Constant imports for other countries impacts[c] = set_constant_impacts(country_from_excel(mapping=mapping_path, place=c), constant=impacts['Other'].loc['Mix_Other']) else: impacts[c] = country_from_excel(mapping=mapping_path, place=c) ### Add impact of residual if residual: # Mix from the residual part -> direct after "Mix_Other" (residual only in CH) if is_verbose: print("+ Residual ", end="") if 'CH' not in impacts: raise ValueError("Including residual only available for CH. Please include CH in the list of countries") impacts['CH'] = pd.concat([impacts['CH'], residual_from_excel(mapping=mapping_path, place='CH')]) ### Gather impacts in one table if is_verbose: print(".") impact_matrix = pd.concat([impacts[c] for c in impacts.keys()]) return impact_matrix
[docs] def extract_network_mapping(ctry, mapping_path=None, is_verbose=False): """ Function to build the network impact dict from mapping stored in files. Parameters ---------- ctry: list list of countries to load the impacts of mapping_path: str, default to None .xlsx file where to find the mapping data is_verbose: bool, default to False to display information """ ### Check the country list if is_verbose: print("Extraction of network impact...") # Test the type of country if type(ctry) == str: ctry = [ctry] elif '__iter__' not in dir(ctry): raise TypeError("Parameter ctry should be a list, tuple or str") df_temp = pd.read_excel(mapping_path, header=None) header_idx = df_temp.index[df_temp.iloc[:, 0] == "Catégorie d'impact"].tolist() if not header_idx: raise ValueError("Header 'Catégorie d'impact' not found in file.") matrix = pd.read_excel(mapping_path, header=header_idx[0]) row_to_keep = ["Climate change - Fossil", "Land use", "Particulate matter", "Human toxicity, cancer"] matrix = matrix.rename(columns={"Catégorie d'impact": 'Category'}) matrix = matrix[matrix['Category'].isin(row_to_keep)] if len(matrix.columns) > 1: matrix = matrix.drop(matrix.columns[1], axis=1) matrix = matrix.set_index('Category') row_mapping = { "Climate change - Fossil": "Carbon intensity", "Land use": "Land use", "Particulate matter": "Fine particulate matter formation", "Human toxicity, cancer": "Human carcinogenic toxicity" } matrix_clean = matrix.rename(index=row_mapping) ### Extract the impact information impacts = {} for col in matrix_clean.columns: # Extract metadata from column name: "process.../unit/COUNTRY SUFFIX" # Example: "electricity.../kWh/CH U" if '/' not in col: continue process_part, country_part = col.rsplit('/', 1) country_code = country_part.replace(' U', '').strip() if country_code in ctry: if country_code not in impacts: impacts[country_code] = {} if "high voltage" in process_part: voltage_key = "High Voltage" elif "medium voltage" in process_part: voltage_key = "Medium Voltage" elif "low voltage" in process_part: voltage_key = "Low Voltage" elif "infra at pumped storage" in process_part: if country_code == "CH": voltage_key = "Infra PHS" if voltage_key not in impacts[country_code]: impacts[country_code][voltage_key] = {} for category, value in matrix_clean[col].items(): impacts[country_code][voltage_key][category] = value desired_order = ['High Voltage', 'Medium Voltage', 'Low Voltage', 'Infra PHS'] for code in impacts: impacts[code] = {k: impacts[code][k] for k in desired_order if k in impacts[code]} if is_verbose: print(f"Extraction complete for: {list(impacts.keys())}") return impacts
# + ################################# # ################################ # Other from excel # ################################ # ################################ # -
[docs] def other_from_excel(mapping): """Load the mapping for 'Other' from an excel file (mapping).""" ### Impact for production mix of 'other countries' d = pd.read_excel(mapping, sheet_name="ENTSOE_avg", header=1, usecols=np.arange(2, 7), index_col=[0]) # extract return d.loc[['ENTSOE average mix'], :].rename(index={'ENTSOE average mix': 'Mix_Other'}) # format
# + ################################# # ################################ # Country form excel # ################################ # ################################ # -
[docs] def country_from_excel(mapping, place): """Load the mapping of a given country (place) from an excel file (mapping).""" try: # test if the country is available in the mapping file d = pd.read_excel(mapping, sheet_name=place, index_col=[0]) # Read and get index col except Exception as e: raise ValueError(f"Mapping for {place} not available: {e} ") key = [k for k in d.columns if str(k).lower().find('impact') != -1][-1] # Select last 'impact' column as key columns = d.loc[:, key:].iloc[0] columns = columns[ columns.apply(lambda x: not str(x).endswith('KBOB'))] # Strike out KBOB... Do your own mapping man! # Get only important data d = d.loc[:, columns.index].dropna(axis=0).rename(columns=columns.to_dict()) to_drop = [k for k in d.index if str(k).lower().find('sources entso-e') != -1] d = d.loc[d.index.notnull()].drop(index=to_drop, errors='ignore') # Select the correct indexes # Replace "-" with zeros. d = d.replace("-", 0).astype('float32') # Change indexes return d.rename({i: (i.replace('(', '').replace(')', '').replace(" Fos", ' fos') + f" {place}").replace(' ', '_').replace('__', '_') for i in d.index}, axis=0).rename_axis("")
# + ################################# # ################################ # Residual from excel # ################################ # ################################ # -
[docs] def residual_from_excel(mapping, place): """ Load impact data of the production residual and add it to the impact matrix. Parameters ---------- mapping: str path to file with the mapping place: str country tag of the country Returns ------- pandas.DataFrame table with the matrix of impacts per unit type, with the impact of residual production is added. """ try: # test if the "country" is available in the mapping file d = pd.read_excel(mapping, sheet_name="Residual", index_col=0) except Exception as e: raise ValueError(f" Residual not available: {e}") key = [k for k in d.columns if str(k).lower().find('impact') != -1][-1] # Select last 'impact' column as key columns = d.loc[:, key:].iloc[0] columns = columns[ columns.apply(lambda x: not str(x).endswith('KBOB'))] # Strike out KBOB... Do your own mapping man! # columns = d.loc[:,'Environmental impacts of ENTSO-E sources':].iloc[0] # Select the righ column d = d.loc[:, columns.index].rename(columns=columns.to_dict()).rename_axis('') # Select the right indexes idx = pd.Series(d.index).apply(lambda x: str(x).startswith('Resid')).values d = d.loc[idx].astype('float32') # Rename indexes with the place & formatting return d.rename(index={i: (i.replace('Residue', 'Residual').replace(" ", "_") + f"_{place}") for i in d.index})
# + ################################# # ################################ # Set constant impacts # ################################ # ################################ # -
[docs] def set_constant_impacts(impacts, constant): """Set the impacts to a constant value""" return impacts.apply(lambda x: constant, axis=1)
# + ################################# # ################################ # Units from Mapping # ################################ # ################################ # -
[docs] def get_impact_units(mapping): """Read the units of impacts from the mapping""" ### Impact for production mix of 'other countries' d = pd.read_excel(mapping, sheet_name="ENTSOE_avg", header=1, usecols=np.arange(2, 7), index_col=[0]) # extract return d.iloc[1].apply(lambda x: str(x).replace(' ', '')).rename('Units')