The Python API for EdgeMBAR
Learning objectives
Learn how to interact with the Python API for edgembar-generated data
Learn how to port that data to other programs.
Activities
In this Activity, you will learn how to interact with edgembar-generated data through the edgembar Python API.
Introduction to the Edgembar Python API
Data generated from the edgembar package in FE-Toolkit Giese et al.[1] is typically stored in one of three sources. The first is an edge Python file (e.g. edge_ejm31.py), the second is a NetCDF file (e.g. edge_ejm31.nc), and the third is an HTML report (e.g. edge_ejm31.html). Each serves a particular purpose; however, when developing outside analysis tools that interact with them, the choice can make a significant impact. For instance, the .py file is dependent on relative paths, so if you move it to a different directory, then it will no longer be able to generate an HTML report. The HTML report includes a significant amount of information and is portable; however, it’s challenging to parse in Python. Therefore, the NC file exists as a portable (and parseable) solution.
To start, download the NetCDF file for an ABFE run on ejm31: This file
from __future__ import annotations
import argparse
from pathlib import Path
from edgembar.HtmlUtils import GetReplExchData
from edgembar.NcIO import LoadEdgeNc
def format_energy(value: float, error: float) -> str:
return f"{value:8.3f} +- {error:6.3f} kcal/mol"
def print_bar_summary(label: str, obj, prod_data) -> None:
value, error = obj.GetValueAndError(prod_data)
print(f"{label:<28} {format_energy(value, error)}")
def print_ti_summary(edge) -> None:
ti_data = edge.GetTIValuesAndErrors()
if ti_data is None:
print("TI estimates: unavailable")
return
print("TI estimates:")
for method in ("Linear", "Natural", "Clamped"):
if method in ti_data:
value, error = ti_data[method]
print(f" {method:<8} {format_energy(value, error)}")
def print_message_summary(edge) -> None:
messages = edge.GetErrorMsgs()
errors = [msg for msg in messages if msg.iserr]
warnings = [msg for msg in messages if not msg.iserr and msg.kind != "outlier"]
print(f"Errors: {len(errors)}")
print(f"Warnings: {len(warnings)}")
def print_trial_summary(trial, prod_data) -> None:
print_bar_summary(f" trial {trial.name}", trial, prod_data)
rem_data = GetReplExchData(trial)
if rem_data is None:
return
single_pass = rem_data["Average single pass steps:"]
trips_per_replica = rem_data["Round trips per replica:"]
total_round_trips = rem_data["Total round trips:"]
print(
" " * 8
+ "RE summary: "
+ f"single-pass={single_pass:.1f}, "
+ f"round-trips/replica={trips_per_replica:.2f}, "
+ f"total-round-trips={total_round_trips:.1f}"
)
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Read an EdgeMBAR .nc file and print a simple energy summary."
)
parser.add_argument("edge_nc", type=Path, help="Path to an EdgeMBAR NetCDF file")
return parser
def main() -> None:
args = build_parser().parse_args()
if args.edge_nc.suffix != ".nc":
raise SystemExit("Expected a NetCDF edge file ending in .nc")
if not args.edge_nc.exists():
raise SystemExit(f"File not found: {args.edge_nc}")
edge = LoadEdgeNc(str(args.edge_nc))
prod_data = edge.results.prod
print(f"Edge: {edge.name}")
print_bar_summary("edge", edge, prod_data)
print_ti_summary(edge)
print_message_summary(edge)
print()
for env in edge.GetEnvs():
print_bar_summary(f" env {env.name}", env, prod_data)
for stage in env.stages:
print_bar_summary(f" stage {stage.name}", stage, prod_data)
for trial in stage.trials:
print_trial_summary(trial, prod_data)
print()
if __name__ == "__main__":
main()
Above is a Python script that you can use to access the data that you were looking at in the HTML report from the NetCDF file.
The key steps are:
Load an edge with
LoadEdgeNcThe free energy is obtained by running
edge.GetValueAndError(edge.results.prod)(orstage, orenv, ortrial)The replica exchange information is provided by
GetReplExchData(trial)