Source code for meltPT.parse

"""
=====
parse
=====

Read data from a csv file.
"""

import warnings

import numpy as np
import pandas as pd

[docs]def parse_csv(
        infile, Ce_to_H2O=0., src_FeIII_totFe=0., src_Fo=0.9, min_MgO=0., 
        param_co2=False):
    """
    Read a csv and return a dataframe after some processing.
    
    Processing steps are:
    - check SiO2, MgO, and FexOx are specified; if not will crash
    - check other major elements are specified; if not will be set to zero
    - try to set some values via trace elements
    - redistribute Fe according to src_FeIII_totFe
    - add source forsterite numbers if not already specified
    - if desired, estimate CO2 from SiO2
    - normalise major elements to 100%
    - reject samples with MgO less than some threshold.
      
    Parameters
    ----------
    infile : str
        Path to a csv containing data to be read.
    Ce_to_H2O : float
        Ratio of Ce to H2O in mantle source.
    src_FeIII_totFe : float
        Ratio of Fe3+ to total Fe in the mantle source.
    src_Fo : float
        Forsterite number in the mantle source.
    min_MgO : float, optional
        Minimum amount of MgO in sample to be accepted.
    read_as_primary : bool
        If true, data from input_csv is assumed to be primary
        and backtracking is avoided.
    param_co2 : bool
        If true, CO2 is calculated from SiO2 concentration.
        
    Returns
    -------
    df : pandas dataframe
        Dataframe containing processed data.
    """

    # Read in file
    df = pd.read_csv(infile, delimiter=",")
    
    # Check for compulsory columns
    compulsory_cols = ['SiO2', 'MgO']
    for col in compulsory_cols:
        if col not in df.columns:
            raise Exception("Input csv must contain a %s column." % col)
    if ("FeO" not in df.columns and 
        "Fe2O3" not in df.columns and 
        "FeO_tot" not in df.columns):
        raise Exception("Input csv must contain one of FeO, Fe2O3, FeO_tot columns.")

    # Replace empties and NaNs with zeros
    df = df.replace(r'^\s*$', 0., regex=True)
    df = df.replace(np.nan, 0., regex=True)

    # If major element columns do not exist in the input make them and
    # give them zeros for every row.
    major_columns = ['SiO2', 'TiO2', 'Al2O3', 'FeO', 'Fe2O3', 'MgO', 'CaO', 
                     'Na2O', 'K2O', 'MnO', 'Cr2O3', 'P2O5', 'NiO', 'CoO',
                     'H2O', 'CO2']
    for col in major_columns + ["FeO_tot"]:
        if col not in df.columns:
            message = "Input csv does not contain a %s column: we will try to fill it for you, or set it to zero." % col
            warnings.warn(message)
            df[col] = 0.
            
    # If not given compute some major oxides from trace element concentrations
    # df.loc[df['P2O5'] == 0, 'P2O5'] = df['P'] * 141.942524 / 2. / 10000.
    try:
        df.loc[df['NiO']==0., 'NiO'] = df['Ni'] * 1.2725 / 10000.
    except KeyError:
        pass
    try:
        df.loc[df['CoO']==0., 'CoO'] = df['Co'] * 1.2715 / 10000.
    except KeyError:
        pass
    try:
        df.loc[df['Cr2O3']==0., 'Cr2O3'] = df['Cr'] * 1.4616 / 10000.
    except KeyError:
        pass
        
    # Calculate H2O value if H2O value is zero
    # parameterizes water by converting Ce to H20
    if (df['H2O'] == 0.).any():
        try:
            df.loc[df['H2O']==0., 'H2O'] = df['Ce'] * Ce_to_H2O / 10000.
        except KeyError:
            message = "Some sample's H2O still zero after parameterization with Ce."
            warnings.warn(message)

    # Add chosen FeIII_totFe value if none are given
    if 'src_FeIII_totFe' not in df.columns:
        df['src_FeIII_totFe'] = src_FeIII_totFe
    df.loc[df['src_FeIII_totFe'] == 0, 'src_FeIII_totFe'] = src_FeIII_totFe
    df.loc[df['FeO_tot']==0., 'FeO_tot'] = df['FeO'] + (df['Fe2O3'] * (74.84 * 2.) / 159.69)
    
    # Calculate FeO and Fe2O3 based on FeIII_totFe    
    # Only applicable if FeO and Fe2O3 not already stated.
    df.loc[(df['FeO']==0.) | (df['Fe2O3']==0.), 'FeO'] = df['FeO_tot'] * (1. - df['src_FeIII_totFe'])
    df.loc[(df['FeO']==0.) | (df['Fe2O3']==0.), 'Fe2O3'] = df['FeO_tot'] * df['src_FeIII_totFe'] * 159.69 / 71.84 / 2.      

    # Add chosen Fo# if none are given
    if 'src_Fo' not in df.columns:
        df['src_Fo'] = src_Fo
    df.loc[df['src_Fo'] == 0, 'src_Fo'] = src_Fo

    # Calculate CO2 value if desired and CO2 value is zero
    # parameterize CO2 using Equation 8 of Sun & Dasgupta, 2020    
    if param_co2:
        df.loc[df['CO2'] == 0, 'CO2'] = 43.77 - 0.9 * df['SiO2']

    # Calculate total & normalise
    df['Total'] = df[major_columns].sum(axis=1)
    for col in major_columns:
        df[col] = df[col] / df['Total'] * 100.
    df['Total'] = df[major_columns].sum(axis=1)

    # Filter dataset to only include usable samples
    # Remove lines with MgO < 8.5 wt%
    df = df.loc[(df['MgO'] > min_MgO)]

    return df