Source code for aiida_raspa.utils.base_parser

# -*- coding: utf-8 -*-
"""Basic raspa output parser."""
import re

from math import isnan, isinf

float_base = float  # pylint: disable=invalid-name


[docs]def float(number): # pylint: disable=redefined-builtin number = float_base(number) return number if not any((isnan(number), isinf(number))) else None
KELVIN_TO_KJ_PER_MOL = float(8.314464919 / 1000.0) #exactly the same as Raspa # manage block of the first type # -------------------------------------------------------------------------------------------- BLOCK_1_LIST = [ #("Average temperature:", "temperature", (1, 2, 4), 0), # misleading property! #("Average Pressure:", "pressure", (1, 2, 4), 0), # misleading property! ("Average Volume:", "cell_volume", (1, 2, 4), 0), ("Average Density:", "adsorbate_density", (1, 2, 4), 0), #("Average Heat Capacity", "framework_heat_capacity", (1, 2, 4), 0), # misleading property! ("Enthalpy of adsorption:", "enthalpy_of_adsorption", (1, 4, 3), 4), ("Tail-correction energy:", "tail_correction_energy", (1, 2, 4), 0) #("Total energy:$", "total_energy", (1, 2, 4), 0), # not important property! ] # block of box properties. BOX_PROP_LIST = [ ("Average Box-lengths:", 'box'), ] # pylint: disable=too-many-arguments
[docs]def parse_block1(flines, result_dict, prop, value=1, unit=2, dev=4): """Parse block. Parses blocks that look as follows:: Average Volume: ================= Block[ 0] 12025.61229 [A^3] Block[ 1] 12025.61229 [A^3] Block[ 2] 12025.61229 [A^3] Block[ 3] 12025.61229 [A^3] Block[ 4] 12025.61229 [A^3] ------------------------------------------------------------------------------ Average 12025.61229 [A^3] +/- 0.00000 [A^3] """ for line in flines: if 'Average' in line: result_dict[prop + '_average'] = float(line.split()[value]) result_dict[prop + '_unit'] = re.sub(r"[{}()\[\]]", '', line.split()[unit]) result_dict[prop + '_dev'] = float(line.split()[dev]) break
# manage energy reading # -------------------------------------------------------------------------------------------- ENERGY_CURRENT_LIST = [ ("Host/Adsorbate energy:", "host/ads", "tot"), ("Host/Adsorbate VDW energy:", "host/ads", "vdw"), ("Host/Adsorbate Coulomb energy:", "host/ads", "coulomb"), ("Adsorbate/Adsorbate energy:", "ads/ads", "tot"), ("Adsorbate/Adsorbate VDW energy:", "ads/ads", "vdw"), ("Adsorbate/Adsorbate Coulomb energy:", "ads/ads", "coulomb"), ] ENERGY_AVERAGE_LIST = [("Average Adsorbate-Adsorbate energy:", "ads/ads"), ("Average Host-Adsorbate energy:", "host/ads")]
[docs]def parse_block_energy(flines, res_dict, prop): """Parse energy block. Parse block that looks as follows:: Average Adsorbate-Adsorbate energy: =================================== Block[ 0] -443.23204 Van der Waals: -443.23204 Coulomb: 0.00000 [K] Block[ 1] -588.20205 Van der Waals: -588.20205 Coulomb: 0.00000 [K] Block[ 2] -538.43355 Van der Waals: -538.43355 Coulomb: 0.00000 [K] Block[ 3] -530.00960 Van der Waals: -530.00960 Coulomb: 0.00000 [K] Block[ 4] -484.15106 Van der Waals: -484.15106 Coulomb: 0.00000 [K] ------------------------------------------------------------------------------ Average -516.80566 Van der Waals: -516.805659 Coulomb: 0.00000 [K] +/- 98.86943 +/- 98.869430 +/- 0.00000 [K] """ for line in flines: if 'Average' in line: res_dict["energy_{}_tot_average".format(prop)] = float(line.split()[1]) * KELVIN_TO_KJ_PER_MOL res_dict["energy_{}_vdw_average".format(prop)] = float(line.split()[5]) * KELVIN_TO_KJ_PER_MOL res_dict["energy_{}_coulomb_average".format(prop)] = float(line.split()[7]) * KELVIN_TO_KJ_PER_MOL if '+/-' in line: res_dict["energy_{}_tot_dev".format(prop)] = float(line.split()[1]) * KELVIN_TO_KJ_PER_MOL res_dict["energy_{}_vdw_dev".format(prop)] = float(line.split()[3]) * KELVIN_TO_KJ_PER_MOL res_dict["energy_{}_coulomb_dev".format(prop)] = float(line.split()[5]) * KELVIN_TO_KJ_PER_MOL return
# manage lines with components # -------------------------------------------------------------------------------------------- LINES_WITH_COMPONENT_LIST = [ (" Average Widom Rosenbluth-weight:", "widom_rosenbluth_factor"), (" Average chemical potential: ", "chemical_potential"), (" Average Henry coefficient: ", "henry_coefficient"), (" Average <U_gh>_1-<U_h>_0:", "adsorption_energy_widom"), ]
[docs]def parse_lines_with_component(res_components, components, line, prop): """Parse lines that contain components""" # self.logger.info("analysing line: {}".format(line)) for i, component in enumerate(components): if '[' + component + ']' in line: words = line.split() res_components[i][prop + '_unit'] = re.sub(r'[{}()\[\]]', '', words[-1]) res_components[i][prop + '_dev'] = float(words[-2]) res_components[i][prop + '_average'] = float(words[-4])
# pylint: disable=too-many-locals, too-many-arguments, too-many-statements, too-many-branches
[docs]def parse_base_output(output_abs_path, system_name, ncomponents): """Parse RASPA output file: it is divided in different parts, whose start/end is carefully documented.""" warnings = [] res_per_component = [] for i in range(ncomponents): res_per_component.append({}) result_dict = {'exceeded_walltime': False} with open(output_abs_path, "r") as fobj: # 1st parsing part: input settings # -------------------------------- # from: start of file # to: "Current (initial full energy) Energy Status" icomponent = 0 component_names = [] res_cmp = res_per_component[0] for line in fobj: if "Component" in line and "molecule)" in line: component_names.append(line.split()[2][1:-1]) if "(Adsorbate" in line: res_cmp['molecule_type'] = 'adsorbate' elif "(Cation" in line: res_cmp['molecule_type'] = 'cation' # Consider to change it with parse_line() if "Conversion factor molecules/unit cell -> mol/kg:" in line: res_cmp['conversion_factor_molec_uc_to_mol_kg'] = float(line.split()[6]) res_cmp['conversion_factor_molec_uc_to_mol_kg_unit'] = "(mol/kg)/(molec/uc)" # this line was corrected in Raspa's commit c1ad4de (Nov19), since "gr/gr" should read "mg/g" if "Conversion factor molecules/unit cell -> gr/gr:" in line \ or "Conversion factor molecules/unit cell -> mg/g:" in line: res_cmp['conversion_factor_molec_uc_to_mg_g'] = float(line.split()[6]) res_cmp['conversion_factor_molec_uc_to_mg_g_unit'] = "(mg/g)/(molec/uc)" if "Conversion factor molecules/unit cell -> cm^3 STP/gr:" in line: res_cmp['conversion_factor_molec_uc_to_cm3stp_gr'] = float(line.split()[7]) res_cmp['conversion_factor_molec_uc_to_cm3stp_gr_unit'] = "(cm^3_STP/gr)/(molec/uc)" if "Conversion factor molecules/unit cell -> cm^3 STP/cm^3:" in line: res_cmp['conversion_factor_molec_uc_to_cm3stp_cm3'] = float(line.split()[7]) res_cmp['conversion_factor_molec_uc_to_cm3stp_cm3_unit'] = "(cm^3_STP/cm^3)/(molec/uc)" if "MolFraction:" in line: res_cmp['mol_fraction'] = float(line.split()[1]) res_cmp['mol_fraction_unit'] = "-" if "Partial pressure:" in line: res_cmp['partial_pressure'] = float(line.split()[2]) res_cmp['partial_pressure_unit'] = "Pa" if "Partial fugacity:" in line: res_cmp['partial_fugacity'] = float(line.split()[2]) res_cmp['partial_fugacity_unit'] = "Pa" icomponent += 1 if icomponent < ncomponents: res_cmp = res_per_component[icomponent] if "Framework Density" in line: result_dict['framework_density'] = line.split()[2] result_dict['framework_density_unit'] = re.sub(r'[{}()\[\]]', '', line.split()[3]) if "Current (initial full energy) Energy Status" in line: break # 2nd parsing part: initial and final configurations # -------------------------------------------------- # from: "Current (initial full energy) Energy Status" # to: "Average properties of the system" reading = 'initial' result_dict['energy_unit'] = 'kJ/mol' for line in fobj: # Understand if it is the initial or final "Current Energy Status" section if "Current (full final energy) Energy Status" in line: reading = 'final' # Read the entries of "Current Energy Status" section if reading: for parse in ENERGY_CURRENT_LIST: if parse[0] in line: result_dict['energy_{}_{}_{}'.format(parse[1], parse[2], reading)] = float(line.split()[-1]) * KELVIN_TO_KJ_PER_MOL if parse[1] == "ads/ads" and parse[2] == "coulomb": reading = None if "Average properties of the system" in line: break # 3rd parsing part: average system properties # -------------------------------------------------- # from: "Average properties of the system" # to: "Number of molecules" for line in fobj: for parse in BLOCK_1_LIST: if parse[0] in line: parse_block1(fobj, result_dict, parse[1], *parse[2]) # I assume here that properties per component are present furhter in the output file. # so I need to skip some lines: skip_nlines_after = parse[3] while skip_nlines_after > 0: line = next(fobj) skip_nlines_after -= 1 for i, cmpnt in enumerate(component_names): # The order of properties per molecule is the same as the order of molecules in the # input file. So if component name was not found in the next line, I break the loop # immidiately as there is no reason to continue it line = next(fobj) if cmpnt in line: parse_block1(fobj, res_per_component[i], parse[1], *parse[2]) else: break skip_nlines_after = parse[3] while skip_nlines_after > 0: line = next(fobj) skip_nlines_after -= 1 continue # no need to perform further checks, propperty has been found already for parse in ENERGY_AVERAGE_LIST: if parse[0] in line: parse_block_energy(fobj, result_dict, prop=parse[1]) continue # no need to perform further checks, propperty has been found already for parse in BOX_PROP_LIST: if parse[0] in line: # parse three cell vectors parse_block1(fobj, result_dict, prop='box_ax', value=2, unit=3, dev=5) parse_block1(fobj, result_dict, prop='box_by', value=2, unit=3, dev=5) parse_block1(fobj, result_dict, prop='box_cz', value=2, unit=3, dev=5) # parsee angles between the cell vectors parse_block1(fobj, result_dict, prop='box_alpha', value=3, unit=4, dev=6) parse_block1(fobj, result_dict, prop='box_beta', value=3, unit=4, dev=6) parse_block1(fobj, result_dict, prop='box_gamma', value=3, unit=4, dev=6) if "Number of molecules:" in line: break # 4th parsing part: average molecule properties # -------------------------------------------------- # from: "Number of molecules" # to: end of file icomponent = 0 for line in fobj: # Consider to change it with parse_line? if 'Average loading absolute [molecules/unit cell]' in line: res_per_component[icomponent]['loading_absolute_average'] = float(line.split()[5]) res_per_component[icomponent]['loading_absolute_dev'] = float(line.split()[7]) res_per_component[icomponent]['loading_absolute_unit'] = 'molecules/unit cell' elif 'Average loading excess [molecules/unit cell]' in line: res_per_component[icomponent]['loading_excess_average'] = float(line.split()[5]) res_per_component[icomponent]['loading_excess_dev'] = float(line.split()[7]) res_per_component[icomponent]['loading_excess_unit'] = 'molecules/unit cell' icomponent += 1 if icomponent >= ncomponents: break for line in fobj: for to_parse in LINES_WITH_COMPONENT_LIST: if to_parse[0] in line: parse_lines_with_component(res_per_component, component_names, line, to_parse[1]) # Assigning to None all the quantities that are meaningless if not running a Widom insertion calculation for res_comp in res_per_component: for prop in ["henry_coefficient", "widom_rosenbluth_factor", "chemical_potential"]: if res_comp["{}_dev".format(prop)] == 0.0: res_comp["{}_average".format(prop)] = None res_comp["{}_dev".format(prop)] = None # The section "Adsorption energy from Widom-insertion" is not showing in the output if no widom is performed if not "adsorption_energy_widom_average" in res_comp: res_comp["adsorption_energy_widom_unit"] = "kJ/mol" res_comp["adsorption_energy_widom_dev"] = None res_comp["adsorption_energy_widom_average"] = None return_dictionary = {"general": result_dict, "components": {}} for name, value in zip(component_names, res_per_component): return_dictionary["components"][name] = value # Parsing all the warning that are printed in the output file, avoiding redoundancy with open(output_abs_path, "r") as fobj: for line in fobj: if "WARNING" in line: warning_touple = (system_name, line) if warning_touple not in warnings: warnings.append(warning_touple) return return_dictionary, warnings