Skip to content

Instantly share code, notes, and snippets.

@andersx
Created April 15, 2019 09:15
Show Gist options
  • Select an option

  • Save andersx/6c9accc51f2a6f91c67a22a7a1900f95 to your computer and use it in GitHub Desktop.

Select an option

Save andersx/6c9accc51f2a6f91c67a22a7a1900f95 to your computer and use it in GitHub Desktop.
KRR and OQML energy regression tests
from __future__ import print_function
import os
import csv
import ast
from copy import deepcopy
import scipy
import scipy.stats
from scipy.linalg import lstsq
import numpy as np
import qml
from qml.math import cho_solve
from qml.fchl import generate_representation
from qml.fchl import get_local_kernels
from qml.fchl import get_local_symmetric_kernels
from qml.fchl import get_atomic_local_kernels
test_dir = os.path.dirname(os.path.realpath(__file__))
CSV_FILE = test_dir + "/data/amons_small.csv"
SIGMAS = [0.64]
TRAINING = 13
TEST = 7
DX = 0.005
CUT_DISTANCE = 1e6
KERNEL_ARGS = {
"verbose": False,
"cut_distance": CUT_DISTANCE,
"kernel": "gaussian",
"kernel_args": {
"sigma": SIGMAS,
},
}
LLAMBDA_ENERGY = 1e-7
LLAMBDA_FORCE = 1e-7
def mae(a, b):
return np.mean(np.abs(a.flatten() - b.flatten()))
def csv_to_molecular_reps(csv_filename):
np.random.seed(667)
x = []
e = []
distance = []
max_atoms = 5
with open(csv_filename, 'r') as csvfile:
df = csv.reader(csvfile, delimiter=";", quotechar='#')
for row in df:
coordinates = np.array(ast.literal_eval(row[2]))
nuclear_charges = ast.literal_eval(row[5])
atomtypes = ast.literal_eval(row[1])
energy = float(row[6])
rep = generate_representation(coordinates, nuclear_charges,
max_size=max_atoms, cut_distance=CUT_DISTANCE)
x.append(rep)
e.append(energy)
return np.array(x), e
def test_krr_derivative():
Xall, Eall, = csv_to_molecular_reps(CSV_FILE)
Eall = np.array(Eall)
X = Xall[:TRAINING]
E = Eall[:TRAINING]
Xs = Xall[-TEST:]
Es = Eall[-TEST:]
K = get_local_symmetric_kernels(X, **KERNEL_ARGS)
Ks = get_local_kernels(Xs, X, **KERNEL_ARGS)
Y = np.array(E)
for i, sigma in enumerate(SIGMAS):
C = deepcopy(K[i])
for j in range(K.shape[2]):
C[j,j] += LLAMBDA_ENERGY
alpha = cho_solve(C, Y)
Ess = np.dot(Ks[i], alpha)
Et = np.dot(K[i], alpha)
print(mae(Ess, Es))
print(mae(Et, E))
assert mae(Ess, Es) < 0.7, "Error in KRR test energy"
assert mae(Et, E) < 0.02, "Error in KRR training energy"
def test_non_square_derivative():
Xall, Eall, = csv_to_molecular_reps(CSV_FILE)
Eall = np.array(Eall)
X = Xall[:TRAINING]
E = Eall[:TRAINING]
Xs = Xall[-TEST:]
Es = Eall[-TEST:]
Kt_energy = get_atomic_local_kernels(X, X, **KERNEL_ARGS)
Ks_energy = get_atomic_local_kernels(X, Xs, **KERNEL_ARGS)
Y = np.array(E)
for i, sigma in enumerate(SIGMAS):
C = Kt_energy[i].T
alphas, residuals, singular_values, rank = lstsq(C, Y, cond=1e-9, lapack_driver="gelsd")
Ess = np.dot(Ks_energy[i].T, alphas)
Et = np.dot(Kt_energy[i].T, alphas)
print(mae(Ess, Es))
print(mae(Et, E))
assert mae(Ess, Es) < 4.0, "Error in operator test energy"
assert mae(Et, E) < 0.04, "Error in operator training energy"
if __name__ == "__main__":
test_krr_derivative()
test_non_square_derivative()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment