GaN findTDE Analysis Example

Contents

GaN findTDE Analysis Example#

[3]:
import os
import glob
from pathlib import Path
import seedir as sd

import subprocess
import re
import pprint

import numpy as np
import pandas as pd

import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

from pymatgen.io.vasp.inputs import Poscar
from findtde.tde_analysis import *

VASP#

[4]:
vasp_path = Path(os.path.abspath('')) / 'GaN_VASP'
print(vasp_path)
/storage/work/ash5615/bin/findTDE/examples/GaN_VASP
[5]:
sd.seedir(vasp_path)
GaN_VASP/
├─.ipynb_checkpoints/
│ ├─GaN_TDE_example-checkpoint.md
│ ├─all_tde_data-checkpoint.csv
│ ├─all_tde_data_ALL-checkpoint.csv
│ ├─find_tde_lineplot_ALL-checkpoint.png
│ ├─gan_ga_tde_ALL-checkpoint.png
│ ├─gan_vasp_pseudo_keys_ALL-checkpoint.csv
│ ├─latt_dirs_to_calc-checkpoint.csv
│ ├─latt_dirs_to_calc_ALL-checkpoint.csv
│ ├─tde_lineplot-checkpoint.png
│ └─tde_scatter-checkpoint.png
├─1L_ga34/
│ ├─1L_ga34_data.csv
│ ├─1L_ga34_out.txt
│ ├─25eV/
│ │ ├─INCAR
│ │ ├─KPOINTS
│ │ ├─OUTCAR
│ │ ├─POSCAR
│ │ └─cgm/
│ │   ├─INCAR
│ │   ├─KPOINTS
│ │   ├─OUTCAR
│ │   └─POSCAR
│ ├─33eV/
│ │ ├─INCAR
│ │ ├─KPOINTS
│ │ ├─OUTCAR
│ │ ├─POSCAR
│ │ └─cgm/
│ │   ├─INCAR
│ │   ├─KPOINTS
│ │   ├─OUTCAR
│ │   └─POSCAR
│ ├─34eV/
│ │ ├─INCAR
│ │ ├─KPOINTS
│ │ ├─OUTCAR
│ │ ├─POSCAR
│ │ └─cgm/
│ │   ├─INCAR
│ │   ├─KPOINTS
│ │   ├─OUTCAR
│ │   └─POSCAR
│ ├─35eV/
│ │ ├─INCAR
│ │ ├─KPOINTS
│ │ ├─OUTCAR
│ │ ├─POSCAR
│ │ └─cgm/
│ │   ├─INCAR
│ │   ├─KPOINTS
│ │   ├─OUTCAR
│ │   └─POSCAR
│ ├─37eV/
│ │ ├─INCAR
│ │ ├─KPOINTS
│ │ ├─OUTCAR
│ │ ├─POSCAR
│ │ └─cgm/
│ │   ├─INCAR
│ │   ├─KPOINTS
│ │   ├─OUTCAR
│ │   └─POSCAR
│ ├─41eV/
│ │ ├─INCAR
│ │ ├─KPOINTS
│ │ ├─OUTCAR
│ │ ├─POSCAR
│ │ └─cgm/
│ │   ├─INCAR
│ │   ├─KPOINTS
│ │   ├─OUTCAR
│ │   └─POSCAR
│ └─KE_calcs_list.txt
├─GaN_TDE_example.md
├─all_tde_data.csv
├─all_tde_data_ALL.csv
├─find_tde_lineplot_ALL.png
├─gan_ga_tde_ALL.png
├─gan_vasp_pseudo_keys_ALL.csv
├─inp/
│ ├─INCAR_cgm
│ ├─INCAR_md
│ ├─KPOINTS
│ ├─POSCAR
│ └─POTCAR
├─latt_dirs_to_calc.csv
├─latt_dirs_to_calc_ALL.csv
├─perfect/
│ └─OUTCAR
├─sph_directions.csv
├─tde_lineplot.png
└─tde_scatter.png
[7]:
# read in POSCAR file using pymatgen
vasp_ref_file = vasp_path / 'inp' / 'POSCAR'
pos_perf = Poscar.from_file(vasp_ref_file)
vasp_lattice_vecs = pos_perf.structure.lattice.matrix
vasp_params = pos_perf.structure.lattice.abc

print('GaN 5x5x3 Supercell Lattice Vectors')
pprint.pprint(vasp_lattice_vecs)
print('\nGaN 5x5x3 Supercell Lattice Constants')
pprint.pprint(vasp_params)
print('\nGaN Unitcell Lattice Constants')
pprint.pprint((round(vasp_params[0]/5, 4), round(vasp_params[1]/5, 4), round(vasp_params[2]/3, 4)))
GaN 5x5x3 Supercell Lattice Vectors
array([[ 8.04829654e+00, -1.39400701e+01, -3.15196198e-07],
       [ 8.04829067e+00,  1.39400667e+01, -2.25419314e-07],
       [-5.26840221e-07,  4.87389220e-08,  1.57345155e+01]])

GaN 5x5x3 Supercell Lattice Constants
(16.096603103259092, 16.09659723098588, 15.734515530175035)

GaN Unitcell Lattice Constants
(3.2193, 3.2193, 5.2448)

Each calculation direction is checked for common errors. A tuple of nested dictionaries is created with the first dictionary summarizing any errors found and the second dictionary showing all the checks performed. Each dictionary uses the direction pseudo/atom type and number (e.g., 1L_ga34) for the keys with secondary dictionaries as the values. These dictionaries show if the TDE is found, if the velocity vector correctly corresponds to the chosen atom type and index, if the velocity vector correctly corresponds to the chosen KE value (within a specified tolerance), and if the final temperature at the end of the CGM relaxation is sufficiently low (below a specified tolerance ratio).

[8]:
check_find_tde_runs(tde_calc_dir=vasp_path, program='vasp', ke_tol=1, temp_tol=0.6)
[8]:
({},
 {'1L_ga34': {'tde': [True],
   'knockout': [True, True, True, True, True, True],
   'ke': [True, True, True, True, True, True],
   'temp': [True, True, True, True, True, True]}})

The data from all direction calculations in the directory is gathered into a single CSV file. The final energy difference from the energy of the perfect supercell calculation is also computed, where the perfect supercell energy is read from the OUTCAR file in the perfect directory.

[26]:
tde_data_gather(ofile=(vasp_path / 'all_tde_data.csv'), tde_calc_dir=vasp_path)

The analysis function returns a tuple with two dictionaries. The first dictionary uses the knockout atom type and number (e.g., ga34) as the keys and tuples containing two pandas DataFrames as the values. The first DataFrame is the final energy for each calculation direction at each simulated kinetic energy, and the second DataFrame is the same but for the final energy difference from the perfect crystal energy (calculated in the data gather step). The second dictionary contains the keys corresponding the lattice direction pseudos (e.g., 1L) to the associated knockout directions (e.g., [-1 4 1])

[9]:
all_find_tde_dfs, pseudo_keys = find_tde_analysis(['ga'], [34], datafile=(vasp_path / 'all_tde_data.csv'), keyfile=(vasp_path / 'latt_dirs_to_calc.csv'))
[10]:
all_find_tde_dfs
[10]:
{'ga34': (             1L
  25 -1823.366892
  33 -1823.366856
  34 -1811.867811
  35 -1811.868307
  37 -1811.829686
  41 -1811.869515,
             1L
  25   0.000913
  33   0.000949
  34  11.499994
  35  11.499498
  37  11.538119
  41  11.498290)}
[11]:
pseudo_keys
[11]:
{'1L': array(['-1', '4', '1'], dtype='<U2')}
[12]:
find_tde_df = all_find_tde_dfs['ga34'][1]
[13]:
find_tde_df
[13]:
1L
25 0.000913
33 0.000949
34 11.499994
35 11.499498
37 11.538119
41 11.498290

The DataFrames can also be converted into arrays that associate the directions with the TDE values which are better suited for plotting purposes. Within this conversion, any runs above the kinetic energy cutoff can be removed, and the directions can be re-oriented if the if the reference structure does not correspond with the desired \(x\)-axis. In this example, the \(a\) lattice vector is re-oriented along the \(x\)-axis.

[14]:
tde_sph_arr, tde_pseudos = generate_tde_sph_arr(find_tde_df, pseudo_keys, lattice_vecs=vasp_lattice_vecs, e_tol=1.0, ke_cut=45, polar_offset=angle_between([1., 0., 0.], vasp_lattice_vecs[0]))
[15]:
tde_sph_arr
[15]:
array([[130.8900206,  77.96     ,  34.       ]])
[16]:
tde_pseudos
[16]:
dict_keys(['1L'])
[ ]:
generate_tde_line_plot(find_tde_df, im_write=False, im_name='tde_lineplot.png')

TDE Lineplot

[ ]:
generate_tde_scatter_plot(tde_sph_arr, tde_pseudos, txt_show=False, im_write=False, im_name='tde_scatter.png')

TDE Scatterplot

[21]:
print(tde_sph_arr.shape)
(1, 3)
[22]:
# if tde_sph_arr.shape[0] is a low number, RES should be manually chosen
ps_tde, ts_tde, es_tde = idw_heatmap(tde_sph_arr, RES=tde_sph_arr.shape[0], P=5)
[ ]:
generate_tde_heatmap_plot(ps_tde, ts_tde, es_tde, im_write=False, im_name='tde_heatmap.png')

TDE Heatmap

Alternatively, the majority of analysis functions can be executed simultaneously using the evaluate_tdes function.

[ ]:
evaluate_tdes('ga', 34, run_type='vasp', base_path=vasp_path, annotate_pseudos=False, interpolate_heatmap=False)

An example of the lineplot and scatterplot functionalities containing multiple calculation directions for a Ga displacement are included below.

TDE Lineplot All

TDE Scatterplot All

LAMMPS#

[24]:
lmp_path = Path(os.path.abspath('')) / 'GaN_LAMMPS'
print(lmp_path)
/storage/work/ash5615/bin/findTDE/examples/GaN_LAMMPS
[25]:
sd.seedir(lmp_path)
GaN_LAMMPS/
├─.ipynb_checkpoints/
│ ├─all_tde_data-checkpoint.csv
│ ├─all_tde_data_lmp-checkpoint.csv
│ ├─gan_lmp_pseudo_keys_ALL-checkpoint.csv
│ └─tde_lineplot-checkpoint.png
├─176S_ga34_lmp/
│ ├─10eV/
│ │ ├─AlGaN.sw
│ │ ├─POSCAR
│ │ ├─dump.final
│ │ ├─input.tde
│ │ ├─log.lammps
│ │ ├─read_data.lmp
│ │ └─slurm-6231657.out
│ ├─176S_ga34_data.csv
│ ├─176S_ga34_out.txt
│ ├─18eV/
│ │ ├─AlGaN.sw
│ │ ├─POSCAR
│ │ ├─dump.final
│ │ ├─input.tde
│ │ ├─log.lammps
│ │ ├─read_data.lmp
│ │ └─slurm-6231924.out
│ ├─26eV/
│ │ ├─AlGaN.sw
│ │ ├─POSCAR
│ │ ├─dump.final
│ │ ├─input.tde
│ │ ├─log.lammps
│ │ ├─read_data.lmp
│ │ └─slurm-6232421.out
│ ├─30eV/
│ │ ├─AlGaN.sw
│ │ ├─POSCAR
│ │ ├─dump.final
│ │ ├─input.tde
│ │ ├─log.lammps
│ │ ├─read_data.lmp
│ │ └─slurm-6233480.out
│ ├─32eV/
│ │ ├─AlGaN.sw
│ │ ├─POSCAR
│ │ ├─dump.final
│ │ ├─input.tde
│ │ ├─log.lammps
│ │ ├─read_data.lmp
│ │ └─slurm-6233955.out
│ ├─33eV/
│ │ ├─AlGaN.sw
│ │ ├─POSCAR
│ │ ├─dump.final
│ │ ├─input.tde
│ │ ├─log.lammps
│ │ ├─read_data.lmp
│ │ └─slurm-6234581.out
│ ├─34eV/
│ │ ├─AlGaN.sw
│ │ ├─POSCAR
│ │ ├─dump.final
│ │ ├─input.tde
│ │ ├─log.lammps
│ │ ├─read_data.lmp
│ │ └─slurm-6233053.out
│ └─KE_calcs_list.txt
├─GaN_LAMMPS_TDE_example.md
├─gan_lmp_errs_run_info_ALL.txt
├─gan_lmp_pseudo_keys_ALL.csv
├─inp/
│ ├─AlGaN.sw
│ ├─AlN.tersoff
│ ├─GaN.sw
│ ├─GaN.tersoff
│ ├─INCAR_cgm
│ ├─INCAR_md
│ ├─KPOINTS
│ ├─POSCAR
│ ├─POTCAR
│ ├─input.tde
│ └─read_data_perfect.lmp
├─latt_dirs_to_calc.csv
├─perfect/
│ └─OUTCAR
├─sph_directions_ext_set1.csv
├─sph_directions_ext_set2.csv
├─sph_directions_ext_set3.csv
└─tde_lineplot.png

All analysis functions are compatible with LAMMPS as well as VASP, but some require specification of the program used within the function call. Again, the majority of analysis functions can be executed simultaneously using the evaluate_tdes function.

[26]:
evaluate_tdes('ga', 34, run_type='lammps', base_path=lmp_path, annotate_pseudos=False, interpolate_heatmap=False)
{}
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[26], line 1
----> 1 evaluate_tdes('ga', 34, run_type='lammps', base_path=lmp_path, annotate_pseudos=False, interpolate_heatmap=False)

File /storage/work/ash5615/bin/.conda/envs/findtde/lib/python3.12/site-packages/findtde/tde_analysis.py:454, in evaluate_tdes(knockout_atom_type, knockout_atom_num, run_type, base_path, annotate_pseudos, interpolate_heatmap)
    451 generate_tde_line_plot(find_tde_lmp_df, im_write=True, im_name=(base_path / 'tde_lineplot.png'))
    453 # reorganize data into array and plot scatter plot
--> 454 tde_lmp_sph_arr, tde_lmp_pseudos = generate_tde_sph_arr(find_tde_lmp_df, lmp_pseudo_keys, lattice_vecs=vasp_lattice_vecs, e_tol=1.0, ke_cut=100, polar_offset=angle_between([1., 0., 0.], vasp_lattice_vecs[0]))
    456 generate_tde_scatter_plot(tde_lmp_sph_arr, tde_lmp_pseudos, txt_show=annotate_pseudos, im_write=True, im_name=(base_path / 'tde_scatter.png'))
    458 # from Victor, read data into a (nsamples x 3) array (x, y, f(x, y)), interpolate and plot heatmap data

File /storage/work/ash5615/bin/.conda/envs/findtde/lib/python3.12/site-packages/findtde/tde_analysis.py:296, in generate_tde_sph_arr(tde_data_df, pseudo_keys, lattice_vecs, e_tol, ke_cut, polar_offset)
    292 def generate_tde_sph_arr(tde_data_df, pseudo_keys, lattice_vecs, e_tol=1.0, ke_cut=45, polar_offset=60):
    293     """
    294     Function to reorganize dataframe into a Nx3 array with spherical coordinates.
    295     """
--> 296     find_tde_sph_dict = find_tde_sph_analysis(tde_data_df, pseudo_keys=pseudo_keys, lattice_vecs=lattice_vecs, e_tol=e_tol, ke_cut=ke_cut)
    297     find_tde_pseudos = find_tde_sph_dict.keys()
    298     find_tde_sph_arr = np.zeros((len(find_tde_pseudos), 3))

File /storage/work/ash5615/bin/.conda/envs/findtde/lib/python3.12/site-packages/findtde/tde_analysis.py:282, in find_tde_sph_analysis(find_tde_dE, pseudo_keys, lattice_vecs, e_tol, ke_cut)
    279     tde_info_dict['TDE'] = ke_cut
    281 if find_tde_dE.columns[i][-1] == 'S':
--> 282     tde_info_dict['phi'], tde_info_dict['theta'] = pseudo_keys[find_tde_dE.columns[i]].astype(float)
    283 elif find_tde_dE.columns[i][-1] == 'L':
    284     tde_info_dict['latt_dir'] = pseudo_keys[find_tde_dE.columns[i]]

ValueError: could not convert string to float: '9S'
[ ]: