I started to work in the field of computational chemistry and I was ask to do Principal Component Analysis on some trajectory from molecular dynamics. I was told to use MDAnalysis package, thus I find one tutorial on their page a tried to follow it (but I included my own inputs of course) to see if it will be working. I have never done analysis like this ad I am also new to python coding. I attached my code inspired by tutorial. But it doesnt work for me, it raises many errors, one of the errors is that it cant take my inputs (topology is PDB file, coordinate is XTC file), but those are formats which are listed in supported formats or other error is that "class PCA" is not defined. I didnt find much about dealing with PCA using MDAanalysis from other people, thus I hoped that here I could find someone, who have ever done something like this and could, please, help me. I have alreadz tried related subreddits, but without result.
from __future__ import division, absolute_import
import MDAnalysis as mda
import MDAnalysis.analysis.pca as pca
from six.moves import range
import warnings
import numpy as np
import scipy.integrate
from MDAnalysis import Universe
from MDAnalysis.analysis.align import _fit_to
from MDAnalysis.lib.log import ProgressMeter
u = mda.Universe("L22trial.pdb", "L22trial.xtc")
PCA = mda.analysis.pca.PCA
class PCA():
pca = PCA(u, select='backbone').run()
pca_space = pca.transform(u.select_atoms('backbone'))
def __init__(self, universe, select='all', align=False, mean=None,
n_components=None, **kwargs):
super(PCA, self).__init__(universe.trajectory, **kwargs)
self._u = universe
self.align = align
self._calculated = False
self.n_components = n_components
self._select = select
self._mean = mean
def _prepare(self):
self._u.trajectory[self.start]
self._reference = self._u.select_atoms(self._select)
self._atoms = self._u.select_atoms(self._select)
self._n_atoms = self._atoms.n_atoms
if self._mean is None:
self.mean = np.zeros(self._n_atoms*3)
self._calc_mean = True
else:
self.mean = self._mean.positions
self._calc_mean = False
if self.n_frames == 1:
raise ValueError('No covariance information can be gathered from a single trajectory frame.\n')
n_dim = self._n_atoms * 3
self.cov = np.zeros((n_dim, n_dim))
self._ref_atom_positions = self._reference.positions
self._ref_cog = self._reference.center_of_geometry()
self._ref_atom_positions -= self._ref_cog
if self._calc_mean:
interval = int(self.n_frames // 100)
interval = interval if interval > 0 else 1
format = ("Mean Calculation Step %(step)5d/%(numsteps)d [%(percentage)5.1f%%]")
mean_pm = ProgressMeter(self.n_frames if self.n_frames else 1, interval=interval, verbose=self._verbose, format=format)
for i, ts in enumerate(self._u.trajectory[self.start:self.stop:self.step]):
if self.align:
mobile_cog = self._atoms.center_of_geometry()
mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog)
else:
self.mean += self._atoms.positions.ravel()
mean_pm.echo(i)
self.mean /= self.n_frames
self.mean_atoms = self._atoms
self.mean_atoms.positions = self._atoms.positions
def _single_frame(self):
if self.align:
mobile_cog = self._atoms.center_of_geometry()
mobile_atoms, old_rmsd = _fit_to(self._atoms.positions, self._ref_atom_positions, self._atoms, mobile_com=mobile_cog, ref_com=self._ref_cog)
x = mobile_atoms.positions.ravel()
else:
x = self._atoms.positions.ravel()
x -= self.mean
self.cov += np.dot(x[:, np.newaxis], x[:, np.newaxis].T)
def _conclude(self):
self.cov /= self.n_frames - 1
e_vals, e_vects = np.linalg.eig(self.cov)
sort_idx = np.argsort(e_vals)[::-1]
self.variance = e_vals[sort_idx]
self.variance = self.variance[:self.n_components]
self.p_components = e_vects[:self.n_components, sort_idx]
self.cumulated_variance = (np.cumsum(self.variance) / np.sum(self.variance))
self._calculated = True
def transform(self, atomgroup, n_components=None, start=None, stop=None, step=None):
if not self._calculated:
raise ValueError('Call run() on the PCA before using transform')
if isinstance(atomgroup, Universe):
atomgroup = atomgroup.atoms
if(self._n_atoms != atomgroup.n_atoms):
raise ValueError('PCA has been fit for {} atoms. Your atomgroup has {} atoms'.format(self._n_atoms, atomgroup.n_atoms))
if not (self._atoms.types == atomgroup.types).all():
warnings.warn('Atom types do not match with types used to fit PCA')
traj = atomgroup.universe.trajectory
start, stop, step = traj.check_slice_indices(start, stop, step)
n_frames = len(range(start, stop, step))
dim = (n_components if n_components is not None else self.p_components.shape[1])
dot = np.zeros((n_frames, dim))
for i, ts in enumerate(traj[start:stop:step]):
xyz = atomgroup.positions.ravel() - self.mean
dot[i] = np.dot(xyz, self.p_components[:, :n_components])
return dot
def cosine_content(pca_space, i):
t = np.arange(len(pca_space))
T = len(pca_space)
cos = np.cos(np.pi * t * (i + 1) / T)
return ((2.0 / T) * (scipy.integrate.simps(cos*pca_space[:, i])) ** 2 /
scipy.integrate.simps(pca_space[:, i] ** 2))
it seems you copied and pasted the PCA class itsefl. My guess is that you don't need to do this (I have never used that module so it s just a guess). The documentation ( https://www.mdanalysis.org/docs/documentation_pages/analysis/pca.html ) seems to indicate the only thing you need to do is the following
import MDAnalysis as mda
import MDAnalysis.analysis.pca as pca
u = mda.Universe("L22trial.pdb", "L22trial.xtc")
mypca = pca.PCA(u, select='backbone').run()
pca_space = mypca.transform(u.select_atoms('backbone'))
If you have an error message "No module named 'MDAnalysis.analysis.pca.PCA'; 'MDAnalysis.analysis.pca' is not a package" it means what it says :-). That means there is no package on your computer named MDAnalysis. to fix this you need to install using pip install command or conda if you use conda package manager. See this link https://www.mdanalysis.org/pages/installation_quick_start/
Looking at the link https://www.mdanalysis.org/docs/_modules/MDAnalysis/analysis/pca.html from which you got inspired it confirmed my first guess and I think my answer should allow you using that package.