Source code for meltPT.parse

"""
=====
parse
=====

Read data from a csv file.
"""

import warnings

import numpy as np
import pandas as pd

[docs]def parse_csv( infile, Ce_to_H2O=0., src_FeIII_totFe=0., src_Fo=0.9, min_MgO=0., param_co2=False): """ Read a csv and return a dataframe after some processing. Processing steps are: - check SiO2, MgO, and FexOx are specified; if not will crash - check other major elements are specified; if not will be set to zero - try to set some values via trace elements - redistribute Fe according to src_FeIII_totFe - add source forsterite numbers if not already specified - if desired, estimate CO2 from SiO2 - normalise major elements to 100% - reject samples with MgO less than some threshold. Parameters ---------- infile : str Path to a csv containing data to be read. Ce_to_H2O : float Ratio of Ce to H2O in mantle source. src_FeIII_totFe : float Ratio of Fe3+ to total Fe in the mantle source. src_Fo : float Forsterite number in the mantle source. min_MgO : float, optional Minimum amount of MgO in sample to be accepted. read_as_primary : bool If true, data from input_csv is assumed to be primary and backtracking is avoided. param_co2 : bool If true, CO2 is calculated from SiO2 concentration. Returns ------- df : pandas dataframe Dataframe containing processed data. """ # Read in file df = pd.read_csv(infile, delimiter=",") # Check for compulsory columns compulsory_cols = ['SiO2', 'MgO'] for col in compulsory_cols: if col not in df.columns: raise Exception("Input csv must contain a %s column." % col) if ("FeO" not in df.columns and "Fe2O3" not in df.columns and "FeO_tot" not in df.columns): raise Exception("Input csv must contain one of FeO, Fe2O3, FeO_tot columns.") # Replace empties and NaNs with zeros df = df.replace(r'^\s*$', 0., regex=True) df = df.replace(np.nan, 0., regex=True) # If major element columns do not exist in the input make them and # give them zeros for every row. major_columns = ['SiO2', 'TiO2', 'Al2O3', 'FeO', 'Fe2O3', 'MgO', 'CaO', 'Na2O', 'K2O', 'MnO', 'Cr2O3', 'P2O5', 'NiO', 'CoO', 'H2O', 'CO2'] for col in major_columns + ["FeO_tot"]: if col not in df.columns: message = "Input csv does not contain a %s column: we will try to fill it for you, or set it to zero." % col warnings.warn(message) df[col] = 0. # If not given compute some major oxides from trace element concentrations # df.loc[df['P2O5'] == 0, 'P2O5'] = df['P'] * 141.942524 / 2. / 10000. try: df.loc[df['NiO']==0., 'NiO'] = df['Ni'] * 1.2725 / 10000. except KeyError: pass try: df.loc[df['CoO']==0., 'CoO'] = df['Co'] * 1.2715 / 10000. except KeyError: pass try: df.loc[df['Cr2O3']==0., 'Cr2O3'] = df['Cr'] * 1.4616 / 10000. except KeyError: pass # Calculate H2O value if H2O value is zero # parameterizes water by converting Ce to H20 if (df['H2O'] == 0.).any(): try: df.loc[df['H2O']==0., 'H2O'] = df['Ce'] * Ce_to_H2O / 10000. except KeyError: message = "Some sample's H2O still zero after parameterization with Ce." warnings.warn(message) # Add chosen FeIII_totFe value if none are given if 'src_FeIII_totFe' not in df.columns: df['src_FeIII_totFe'] = src_FeIII_totFe df.loc[df['src_FeIII_totFe'] == 0, 'src_FeIII_totFe'] = src_FeIII_totFe df.loc[df['FeO_tot']==0., 'FeO_tot'] = df['FeO'] + (df['Fe2O3'] * (74.84 * 2.) / 159.69) # Calculate FeO and Fe2O3 based on FeIII_totFe # Only applicable if FeO and Fe2O3 not already stated. df.loc[(df['FeO']==0.) | (df['Fe2O3']==0.), 'FeO'] = df['FeO_tot'] * (1. - df['src_FeIII_totFe']) df.loc[(df['FeO']==0.) | (df['Fe2O3']==0.), 'Fe2O3'] = df['FeO_tot'] * df['src_FeIII_totFe'] * 159.69 / 71.84 / 2. # Add chosen Fo# if none are given if 'src_Fo' not in df.columns: df['src_Fo'] = src_Fo df.loc[df['src_Fo'] == 0, 'src_Fo'] = src_Fo # Calculate CO2 value if desired and CO2 value is zero # parameterize CO2 using Equation 8 of Sun & Dasgupta, 2020 if param_co2: df.loc[df['CO2'] == 0, 'CO2'] = 43.77 - 0.9 * df['SiO2'] # Calculate total & normalise df['Total'] = df[major_columns].sum(axis=1) for col in major_columns: df[col] = df[col] / df['Total'] * 100. df['Total'] = df[major_columns].sum(axis=1) # Filter dataset to only include usable samples # Remove lines with MgO < 8.5 wt% df = df.loc[(df['MgO'] > min_MgO)] return df