32 #ifndef CDPL_CHEM_CHEMBLSTANDARDIZER_HPP
33 #define CDPL_CHEM_CHEMBLSTANDARDIZER_HPP
38 #include <unordered_set>
41 #include <boost/functional/hash.hpp>
82 EXPLICIT_HYDROGENS_REMOVED = 0x2,
84 UNKNOWN_STEREO_STANDARDIZED = 0x4,
86 BONDS_KEKULIZED = 0x8,
88 STRUCTURE_NORMALIZED = 0x10,
90 CHARGES_REMOVED = 0x20,
92 TARTRATE_STEREO_CLEARED = 0x40,
94 STRUCTURE_2D_CORRECTED = 0x80,
96 ISOTOPE_INFO_CLEARED = 0x100,
98 SALT_COMPONENTS_REMOVED = 0x200,
100 SOLVENT_COMPONENTS_REMOVED = 0x400,
102 DUPLICATE_COMPONENTS_REMOVED = 0x800
160 typedef std::vector<Atom*> AtomList;
164 bool checkExclusionCriterions(
const Molecule& mol)
const;
165 bool checkExclusionCriterions(
const MolecularGraph& molgraph, std::size_t& boron_cnt)
const;
167 bool standardizeUnknownStereochemistry(
Molecule& mol)
const;
171 bool removeExplicitHydrogens(
Molecule& mol)
const;
172 bool isRemovableHydrogen(
const Atom& atom)
const;
174 bool normalizeStructure(
Molecule& mol);
179 bool removeTartrateStereochemistry(
Molecule& mol);
181 bool cleanup2DStructure(
Molecule& mol);
182 double calc2DBondAngle(
const Molecule& mol,
const Atom& ctr_atom,
const Atom& nbr_atom1,
const Atom& nbr_atom2);
183 void rotateSubstituent(
const Molecule& mol,
const Atom& ctr_atom,
const Atom& subst_atom,
double rot_ang);
187 typedef std::pair<std::uint64_t, std::uint64_t> StructureID;
188 typedef std::pair<const Fragment*, StructureID> MoleculeComponent;
189 typedef std::vector<MoleculeComponent> MoleculeComponentList;
190 typedef std::unordered_set<StructureID, boost::hash<StructureID> > StructureIDSet;
201 MoleculeComponentList molCompList1;
202 MoleculeComponentList molCompList2;
203 StructureIDSet uniqueMolComps;
Definition of class CDPL::Chem::BasicMolecule.
Declaration of type CDPL::Util::BitSet.
Definition of the preprocessor macro CDPL_CHEM_API.
#define CDPL_CHEM_API
Tells the compiler/linker which classes, functions and variables are part of the library API.
Definition of class CDPL::Chem::Fragment.
Definition of class CDPL::Chem::HashCodeCalculator.
Definition of class CDPL::Chem::KekuleStructureCalculator.
Definition of class CDPL::Chem::ProtonationStateStandardizer.
Definition of class CDPL::Chem::SubstructureSearch.
Definition of class CDPL::Math::VectorArray.
Abstract base class representing a chemical atom and its bonded neighborhood.
Definition: Atom.hpp:57
Concrete Chem::Molecule implementation that owns Chem::BasicAtom and Chem::BasicBond instances.
Definition: BasicMolecule.hpp:60
Implementation of the ChEMBL structure preprocessing pipeline.
Definition: ChEMBLStandardizer.hpp:65
ChangeFlags getParent(const MolecularGraph &molgraph, Molecule &parent_mol, bool neutralize=true, bool check_exclusion=true)
Extracts the parent compound of molgraph into parent_mol.
ChangeFlags standardize(Molecule &mol, bool proc_excld=false)
Standardizes mol in place.
ChangeFlags
Bitwise-OR-combined flags reporting which standardization steps modified the input molecule.
Definition: ChEMBLStandardizer.hpp:75
ChangeFlags standardize(const MolecularGraph &molgraph, Molecule &std_mol, bool proc_excluded=false)
Writes a standardized copy of molgraph to std_mol.
ChEMBLStandardizer()
Constructs the ChEMBLStandardizer instance.
std::shared_ptr< ChEMBLStandardizer > SharedPointer
A reference-counted smart pointer [SHPTR] for dynamically allocated ChEMBLStandardizer instances.
Definition: ChEMBLStandardizer.hpp:69
ChEMBLStandardizer & operator=(const ChEMBLStandardizer &standardizer)
Replaces the state of this standardizer by a copy of the state of standardizer.
ChEMBLStandardizer(const ChEMBLStandardizer &standardizer)
Constructs a copy of the ChEMBLStandardizer instance standardizer.
ChangeFlags getParent(Molecule &mol, bool neutralize=true, bool check_exclusion=true)
Extracts the parent compound of mol in place (removing salt/solvent components).
Concrete Chem::MolecularGraph implementation that stores references to a selectable subset of atoms a...
Definition: Fragment.hpp:57
Computes a 64-bit hash code that identifies a molecular graph up to a configurable set of atom and bo...
Definition: HashCodeCalculator.hpp:67
Assigns an alternating single/double bond pattern (Kekulé structure) to the previously undefined bond...
Definition: KekuleStructureCalculator.hpp:55
Abstract base class for representations of a chemical structure as a graph of bonded atoms.
Definition: MolecularGraph.hpp:57
Abstract base class representing a mutable molecular graph that owns its atoms and bonds.
Definition: Molecule.hpp:53
Adjusts the protonation state of a molecule (atom formal charges and bonded hydrogen counts) accordin...
Definition: ProtonationStateStandardizer.hpp:58
Subgraph-isomorphism search of a query molecular graph against a target molecular graph,...
Definition: SubstructureSearch.hpp:74
constexpr unsigned int NONE
Represents an empty set of atom properties.
Definition: Biomol/AtomPropertyFlag.hpp:48
CDPL_CHEM_API void kekulizeBonds(MolecularGraph &molgraph)
Assigns Kekulé bond orders to the aromatic bonds of molgraph.
CDPL_CHEM_API void clearMatchConstraints(Atom &atom)
Removes the Chem::AtomProperty::MATCH_CONSTRAINTS property from atom.
VectorArray< Vector2D > Vector2DArray
Array storing vectors of type Math::Vector2D.
Definition: VectorArray.hpp:80
Array< std::size_t > STArray
Array storing unsigned integers of type std::size_t.
Definition: Array.hpp:575
boost::dynamic_bitset BitSet
Dynamic bitset class.
Definition: BitSet.hpp:46
The namespace of the Chemical Data Processing Library.