Source code for aiida_raspa.utils.base_parser

# -*- coding: utf-8 -*-
"""Basic raspa output parser."""
import re

from math import isnan, isinf

float_base = float  # pylint: disable=invalid-name


[docs]def float(number):  # pylint: disable=redefined-builtin
    number = float_base(number)
    return number if not any((isnan(number), isinf(number))) else None


KELVIN_TO_KJ_PER_MOL = float(8.314464919 / 1000.0)  #exactly the same as Raspa

# manage block of the first type
# --------------------------------------------------------------------------------------------
BLOCK_1_LIST = [
    #("Average temperature:", "temperature", (1, 2, 4), 0), # misleading property!
    #("Average Pressure:", "pressure", (1, 2, 4), 0), # misleading property!
    ("Average Volume:", "cell_volume", (1, 2, 4), 0),
    ("Average Density:", "adsorbate_density", (1, 2, 4), 0),
    #("Average Heat Capacity", "framework_heat_capacity", (1, 2, 4), 0), # misleading property!
    ("Enthalpy of adsorption:", "enthalpy_of_adsorption", (1, 4, 3), 4),
    ("Tail-correction energy:", "tail_correction_energy", (1, 2, 4), 0)
    #("Total energy:$", "total_energy", (1, 2, 4), 0), # not important property!
]

# block of box properties.
BOX_PROP_LIST = [
    ("Average Box-lengths:", 'box'),
]


# pylint: disable=too-many-arguments
[docs]def parse_block1(flines, result_dict, prop, value=1, unit=2, dev=4):
    """Parse block.

    Parses blocks that look as follows::

        Average Volume:
        =================
            Block[ 0]        12025.61229 [A^3]
            Block[ 1]        12025.61229 [A^3]
            Block[ 2]        12025.61229 [A^3]
            Block[ 3]        12025.61229 [A^3]
            Block[ 4]        12025.61229 [A^3]
            ------------------------------------------------------------------------------
            Average          12025.61229 [A^3] +/-            0.00000 [A^3]

    """
    for line in flines:
        if 'Average' in line:
            result_dict[prop + '_average'] = float(line.split()[value])
            result_dict[prop + '_unit'] = re.sub(r"[{}()\[\]]", '', line.split()[unit])
            result_dict[prop + '_dev'] = float(line.split()[dev])
            break


# manage energy reading
# --------------------------------------------------------------------------------------------
ENERGY_CURRENT_LIST = [
    ("Host/Adsorbate energy:", "host/ads", "tot"),
    ("Host/Adsorbate VDW energy:", "host/ads", "vdw"),
    ("Host/Adsorbate Coulomb energy:", "host/ads", "coulomb"),
    ("Adsorbate/Adsorbate energy:", "ads/ads", "tot"),
    ("Adsorbate/Adsorbate VDW energy:", "ads/ads", "vdw"),
    ("Adsorbate/Adsorbate Coulomb energy:", "ads/ads", "coulomb"),
]

ENERGY_AVERAGE_LIST = [("Average Adsorbate-Adsorbate energy:", "ads/ads"),
                       ("Average Host-Adsorbate energy:", "host/ads")]


[docs]def parse_block_energy(flines, res_dict, prop):
    """Parse energy block.

    Parse block that looks as follows::

        Average Adsorbate-Adsorbate energy:
        ===================================
            Block[ 0] -443.23204         Van der Waals: -443.23204         Coulomb: 0.00000            [K]
            Block[ 1] -588.20205         Van der Waals: -588.20205         Coulomb: 0.00000            [K]
            Block[ 2] -538.43355         Van der Waals: -538.43355         Coulomb: 0.00000            [K]
            Block[ 3] -530.00960         Van der Waals: -530.00960         Coulomb: 0.00000            [K]
            Block[ 4] -484.15106         Van der Waals: -484.15106         Coulomb: 0.00000            [K]
            ------------------------------------------------------------------------------
            Average   -516.80566         Van der Waals: -516.805659        Coulomb: 0.00000            [K]
                  +/- 98.86943                      +/- 98.869430               +/- 0.00000            [K]
    """
    for line in flines:
        if 'Average' in line:
            res_dict["energy_{}_tot_average".format(prop)] = float(line.split()[1]) * KELVIN_TO_KJ_PER_MOL
            res_dict["energy_{}_vdw_average".format(prop)] = float(line.split()[5]) * KELVIN_TO_KJ_PER_MOL
            res_dict["energy_{}_coulomb_average".format(prop)] = float(line.split()[7]) * KELVIN_TO_KJ_PER_MOL
        if '+/-' in line:
            res_dict["energy_{}_tot_dev".format(prop)] = float(line.split()[1]) * KELVIN_TO_KJ_PER_MOL
            res_dict["energy_{}_vdw_dev".format(prop)] = float(line.split()[3]) * KELVIN_TO_KJ_PER_MOL
            res_dict["energy_{}_coulomb_dev".format(prop)] = float(line.split()[5]) * KELVIN_TO_KJ_PER_MOL
            return


# manage lines with components
# --------------------------------------------------------------------------------------------
LINES_WITH_COMPONENT_LIST = [
    (" Average Widom Rosenbluth-weight:", "widom_rosenbluth_factor"),
    (" Average chemical potential: ", "chemical_potential"),
    (" Average Henry coefficient: ", "henry_coefficient"),
    (" Average  <U_gh>_1-<U_h>_0:", "adsorption_energy_widom"),
]


[docs]def parse_lines_with_component(res_components, components, line, prop):
    """Parse lines that contain components"""
    # self.logger.info("analysing line: {}".format(line))
    for i, component in enumerate(components):
        if '[' + component + ']' in line:
            words = line.split()
            res_components[i][prop + '_unit'] = re.sub(r'[{}()\[\]]', '', words[-1])
            res_components[i][prop + '_dev'] = float(words[-2])
            res_components[i][prop + '_average'] = float(words[-4])


# pylint: disable=too-many-locals, too-many-arguments, too-many-statements, too-many-branches
[docs]def parse_base_output(output_abs_path, system_name, ncomponents):
    """Parse RASPA output file: it is divided in different parts, whose start/end is carefully documented."""

    warnings = []
    res_per_component = []
    for i in range(ncomponents):
        res_per_component.append({})
    result_dict = {'exceeded_walltime': False}

    with open(output_abs_path, "r") as fobj:

        # 1st parsing part: input settings
        # --------------------------------
        # from: start of file
        # to: "Current (initial full energy) Energy Status"

        icomponent = 0
        component_names = []
        res_cmp = res_per_component[0]
        for line in fobj:
            if "Component" in line and "molecule)" in line:
                component_names.append(line.split()[2][1:-1])
                if "(Adsorbate" in line:
                    res_cmp['molecule_type'] = 'adsorbate'
                elif "(Cation" in line:
                    res_cmp['molecule_type'] = 'cation'

            # Consider to change it with parse_line()
            if "Conversion factor molecules/unit cell -> mol/kg:" in line:
                res_cmp['conversion_factor_molec_uc_to_mol_kg'] = float(line.split()[6])
                res_cmp['conversion_factor_molec_uc_to_mol_kg_unit'] = "(mol/kg)/(molec/uc)"
            # this line was corrected in Raspa's commit c1ad4de (Nov19), since "gr/gr" should read "mg/g"
            if "Conversion factor molecules/unit cell -> gr/gr:" in line \
            or "Conversion factor molecules/unit cell -> mg/g:" in line:
                res_cmp['conversion_factor_molec_uc_to_mg_g'] = float(line.split()[6])
                res_cmp['conversion_factor_molec_uc_to_mg_g_unit'] = "(mg/g)/(molec/uc)"
            if "Conversion factor molecules/unit cell -> cm^3 STP/gr:" in line:
                res_cmp['conversion_factor_molec_uc_to_cm3stp_gr'] = float(line.split()[7])
                res_cmp['conversion_factor_molec_uc_to_cm3stp_gr_unit'] = "(cm^3_STP/gr)/(molec/uc)"
            if "Conversion factor molecules/unit cell -> cm^3 STP/cm^3:" in line:
                res_cmp['conversion_factor_molec_uc_to_cm3stp_cm3'] = float(line.split()[7])
                res_cmp['conversion_factor_molec_uc_to_cm3stp_cm3_unit'] = "(cm^3_STP/cm^3)/(molec/uc)"
            if "MolFraction:" in line:
                res_cmp['mol_fraction'] = float(line.split()[1])
                res_cmp['mol_fraction_unit'] = "-"
            if "Partial pressure:" in line:
                res_cmp['partial_pressure'] = float(line.split()[2])
                res_cmp['partial_pressure_unit'] = "Pa"
            if "Partial fugacity:" in line:
                res_cmp['partial_fugacity'] = float(line.split()[2])
                res_cmp['partial_fugacity_unit'] = "Pa"
                icomponent += 1
                if icomponent < ncomponents:
                    res_cmp = res_per_component[icomponent]
            if "Framework Density" in line:
                result_dict['framework_density'] = line.split()[2]
                result_dict['framework_density_unit'] = re.sub(r'[{}()\[\]]', '', line.split()[3])
            if "Current (initial full energy) Energy Status" in line:
                break

        # 2nd parsing part: initial and final configurations
        # --------------------------------------------------
        # from: "Current (initial full energy) Energy Status"
        # to: "Average properties of the system"

        reading = 'initial'
        result_dict['energy_unit'] = 'kJ/mol'

        for line in fobj:
            # Understand if it is the initial or final "Current Energy Status" section
            if "Current (full final energy) Energy Status" in line:
                reading = 'final'

            # Read the entries of "Current Energy Status" section
            if reading:
                for parse in ENERGY_CURRENT_LIST:
                    if parse[0] in line:
                        result_dict['energy_{}_{}_{}'.format(parse[1], parse[2],
                                                             reading)] = float(line.split()[-1]) * KELVIN_TO_KJ_PER_MOL
                        if parse[1] == "ads/ads" and parse[2] == "coulomb":
                            reading = None

            if "Average properties of the system" in line:
                break

        # 3rd parsing part: average system properties
        # --------------------------------------------------
        # from: "Average properties of the system"
        # to: "Number of molecules"

        for line in fobj:
            for parse in BLOCK_1_LIST:
                if parse[0] in line:
                    parse_block1(fobj, result_dict, parse[1], *parse[2])
                    # I assume here that properties per component are present furhter in the output file.
                    # so I need to skip some lines:
                    skip_nlines_after = parse[3]
                    while skip_nlines_after > 0:
                        line = next(fobj)
                        skip_nlines_after -= 1
                    for i, cmpnt in enumerate(component_names):
                        # The order of properties per molecule is the same as the order of molecules in the
                        # input file. So if component name was not found in the next line, I break the loop
                        # immidiately as there is no reason to continue it
                        line = next(fobj)
                        if cmpnt in line:
                            parse_block1(fobj, res_per_component[i], parse[1], *parse[2])
                        else:
                            break
                        skip_nlines_after = parse[3]
                        while skip_nlines_after > 0:
                            line = next(fobj)
                            skip_nlines_after -= 1

                    continue  # no need to perform further checks, propperty has been found already
            for parse in ENERGY_AVERAGE_LIST:
                if parse[0] in line:
                    parse_block_energy(fobj, result_dict, prop=parse[1])
                    continue  # no need to perform further checks, propperty has been found already
            for parse in BOX_PROP_LIST:
                if parse[0] in line:
                    # parse three cell vectors
                    parse_block1(fobj, result_dict, prop='box_ax', value=2, unit=3, dev=5)
                    parse_block1(fobj, result_dict, prop='box_by', value=2, unit=3, dev=5)
                    parse_block1(fobj, result_dict, prop='box_cz', value=2, unit=3, dev=5)
                    # parsee angles between the cell vectors
                    parse_block1(fobj, result_dict, prop='box_alpha', value=3, unit=4, dev=6)
                    parse_block1(fobj, result_dict, prop='box_beta', value=3, unit=4, dev=6)
                    parse_block1(fobj, result_dict, prop='box_gamma', value=3, unit=4, dev=6)

            if "Number of molecules:" in line:
                break

        # 4th parsing part: average molecule properties
        # --------------------------------------------------
        # from: "Number of molecules"
        # to: end of file

        icomponent = 0
        for line in fobj:
            # Consider to change it with parse_line?
            if 'Average loading absolute [molecules/unit cell]' in line:
                res_per_component[icomponent]['loading_absolute_average'] = float(line.split()[5])
                res_per_component[icomponent]['loading_absolute_dev'] = float(line.split()[7])
                res_per_component[icomponent]['loading_absolute_unit'] = 'molecules/unit cell'
            elif 'Average loading excess [molecules/unit cell]' in line:
                res_per_component[icomponent]['loading_excess_average'] = float(line.split()[5])
                res_per_component[icomponent]['loading_excess_dev'] = float(line.split()[7])
                res_per_component[icomponent]['loading_excess_unit'] = 'molecules/unit cell'
                icomponent += 1
            if icomponent >= ncomponents:
                break

        for line in fobj:
            for to_parse in LINES_WITH_COMPONENT_LIST:
                if to_parse[0] in line:
                    parse_lines_with_component(res_per_component, component_names, line, to_parse[1])

        # Assigning to None all the quantities that are meaningless if not running a Widom insertion calculation
        for res_comp in res_per_component:
            for prop in ["henry_coefficient", "widom_rosenbluth_factor", "chemical_potential"]:
                if res_comp["{}_dev".format(prop)] == 0.0:
                    res_comp["{}_average".format(prop)] = None
                    res_comp["{}_dev".format(prop)] = None

            # The section "Adsorption energy from Widom-insertion" is not showing in the output if no widom is performed
            if not "adsorption_energy_widom_average" in res_comp:
                res_comp["adsorption_energy_widom_unit"] = "kJ/mol"
                res_comp["adsorption_energy_widom_dev"] = None
                res_comp["adsorption_energy_widom_average"] = None

    return_dictionary = {"general": result_dict, "components": {}}

    for name, value in zip(component_names, res_per_component):
        return_dictionary["components"][name] = value

    # Parsing all the warning that are printed in the output file, avoiding redoundancy
    with open(output_abs_path, "r") as fobj:
        for line in fobj:
            if "WARNING" in line:
                warning_touple = (system_name, line)
                if warning_touple not in warnings:
                    warnings.append(warning_touple)
    return return_dictionary, warnings