RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
MolOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2001-2024 Greg Landrum and other RDKit contributors
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10#include <RDGeneral/export.h>
11#ifndef RD_MOL_OPS_H
12#define RD_MOL_OPS_H
13
14#include <vector>
15#include <map>
16#include <list>
18#include <boost/smart_ptr.hpp>
19#include <boost/dynamic_bitset.hpp>
21#include <RDGeneral/types.h>
23#include "SanitException.h"
25
26RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
27namespace RDKit {
28class ROMol;
29class RWMol;
30class Atom;
31class Bond;
32class Conformer;
33typedef std::vector<double> INVAR_VECT;
34typedef INVAR_VECT::iterator INVAR_VECT_I;
35typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
36
37//! \brief Groups a variety of molecular query and transformation operations.
38namespace MolOps {
39
40//! return the number of electrons available on an atom to donate for
41/// aromaticity
42/*!
43 The result is determined using the default valency, number of lone pairs,
44 number of bonds and the formal charge. Note that the atom may not donate
45 all of these electrons to a ring for aromaticity (also used in Conjugation
46 and hybridization code).
47
48 \param at the atom of interest
49
50 \return the number of electrons
51*/
53
54//! sums up all atomic formal charges and returns the result
56
57//! returns whether or not the given Atom is involved in a conjugated bond
59
60//! find fragments (disconnected components of the molecular graph)
61/*!
62
63 \param mol the molecule of interest
64 \param mapping used to return the mapping of Atoms->fragments.
65 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
66 and will contain the fragment assignment for each Atom
67
68 \return the number of fragments found.
69
70*/
71RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
72 std::vector<int> &mapping);
73//! find fragments (disconnected components of the molecular graph)
74/*!
75
76 \param mol the molecule of interest
77 \param frags used to return the Atoms in each fragment
78 On return \c mapping will be \c numFrags long, and each entry
79 will contain the indices of the Atoms in that fragment.
80
81 \return the number of fragments found.
82
83*/
85 const ROMol &mol, std::vector<std::vector<int>> &frags);
86
87//! splits a molecule into its component fragments
88/// (disconnected components of the molecular graph)
89/*!
90
91 \param mol the molecule of interest
92 \param molFrags used to return the disconnected fragments as molecules.
93 Any contents on input will be cleared.
94 \param sanitizeFrags toggles sanitization of the fragments after
95 they are built
96 \param frags used to return the mapping of Atoms->fragments.
97 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
98 on return and will contain the fragment assignment for each Atom.
99 \param fragsMolAtomMapping used to return the Atoms in each fragment
100 On return \c mapping will be \c numFrags long, and each entry
101 will contain the indices of the Atoms in that fragment.
102 \param copyConformers toggles copying conformers of the fragments after
103 they are built
104 \return the number of fragments found.
105
106*/
108 const ROMol &mol, std::vector<std::unique_ptr<ROMol>> &molFrags,
109 bool sanitizeFrags = true, std::vector<int> *frags = nullptr,
110 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
111 bool copyConformers = true);
112
113//! splits a molecule into its component fragments
114/// (disconnected components of the molecular graph)
115/*!
116
117 \param mol the molecule of interest
118 \param sanitizeFrags toggles sanitization of the fragments after
119 they are built
120 \param frags used to return the mapping of Atoms->fragments.
121 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
122 on return and will contain the fragment assignment for each Atom
123 \param fragsMolAtomMapping used to return the Atoms in each fragment
124 On return \c mapping will be \c numFrags long, and each entry
125 will contain the indices of the Atoms in that fragment.
126 \param copyConformers toggles copying conformers of the fragments after
127 they are built
128 \return a vector of the fragments as smart pointers to ROMols
129
130*/
131RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
132 const ROMol &mol, bool sanitizeFrags = true,
133 std::vector<int> *frags = nullptr,
134 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
135 bool copyConformers = true);
136
137//! splits a molecule into pieces based on labels assigned using a query
138/*!
139
140 \param mol the molecule of interest
141 \param query the query used to "label" the molecule for fragmentation
142 \param sanitizeFrags toggles sanitization of the fragments after
143 they are built
144 \param whiteList if provided, only labels in the list will be kept
145 \param negateList if true, the white list logic will be inverted: only labels
146 not in the list will be kept
147
148 \return a map of the fragments and their labels
149
150*/
151
152template <typename T>
153RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
154getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
155 bool sanitizeFrags = true,
156 const std::vector<T> *whiteList = nullptr,
157 bool negateList = false);
158//! splits a molecule into pieces based on labels assigned using a query,
159//! putting them into a map of std::unique_ptr<ROMol>.
160/*!
161
162 \param mol the molecule of interest
163 \param query the query used to "label" the molecule for fragmentation
164 \param molFrags used to return the disconnected fragments as molecules.
165 Any contents on input will be cleared.
166 \param sanitizeFrags toggles sanitization of the fragments after
167 they are built
168 \param whiteList if provided, only labels in the list will be kept
169 \param negateList if true, the white list logic will be inverted: only labels
170 not in the list will be kept
171
172 \return the number of fragments
173
174*/
175template <typename T>
177 const ROMol &mol, T (*query)(const ROMol &, const Atom *),
178 std::map<T, std::unique_ptr<ROMol>> &molFrags, bool sanitizeFrags = true,
179 const std::vector<T> *whiteList = nullptr, bool negateList = false);
180
181//! \name Dealing with hydrogens
182//{@
183
185 bool explicitOnly = false; /**< only add explicit Hs */
186 bool addCoords = false; /**< add coordinates for the Hs */
187 bool addResidueInfo = false; /**< add residue info to the Hs */
189 false; /**< do not add Hs to query atoms or atoms with query bonds */
190};
191//! adds Hs to a molecule as explicit Atoms
192/*!
193 \param mol the molecule to add Hs to
194 \param params parameters controlling which Hs are added.
195 \param onlyOnAtoms (optional) if provided, this should be a vector of
196 IDs of the atoms that will be considered for H addition.
197
198 <b>Notes:</b>
199 - it makes no sense to use the \c addCoords option if the molecule's
200 heavy atoms don't already have coordinates.
201 - the molecule is modified
202 */
204 const UINT_VECT *onlyOnAtoms = nullptr);
205
206//! returns a copy of a molecule with hydrogens added in as explicit Atoms
207/*!
208 \param mol the molecule to add Hs to
209 \param explicitOnly (optional) if this \c true, only explicit Hs will be
210 added
211 \param addCoords (optional) If this is true, estimates for the atomic
212 coordinates
213 of the added Hs will be used.
214 \param onlyOnAtoms (optional) if provided, this should be a vector of
215 IDs of the atoms that will be considered for H addition.
216 \param addResidueInfo (optional) if this is true, add residue info to
217 hydrogen atoms (useful for PDB files).
218
219 \return the new molecule
220
221 <b>Notes:</b>
222 - it makes no sense to use the \c addCoords option if the molecule's
223 heavy
224 atoms don't already have coordinates.
225 - the caller is responsible for <tt>delete</tt>ing the pointer this
226 returns.
227 */
228inline ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
229 bool addCoords = false,
230 const UINT_VECT *onlyOnAtoms = nullptr,
231 bool addResidueInfo = false) {
232 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
233 std::unique_ptr<RWMol> res{new RWMol(mol)};
234 addHs(*res, ps, onlyOnAtoms);
235 return static_cast<ROMol *>(res.release());
236}
237//! \overload
238/// modifies the molecule in place
239inline void addHs(RWMol &mol, bool explicitOnly = false, bool addCoords = false,
240 const UINT_VECT *onlyOnAtoms = nullptr,
241 bool addResidueInfo = false) {
242 AddHsParameters ps{explicitOnly, addCoords, addResidueInfo};
243 addHs(mol, ps, onlyOnAtoms);
244}
245
246//! Sets Cartesian coordinates for a terminal atom.
247//! Useful for growing an atom off a molecule with sensible
248//! coordinates based on the geometry of the neighbor.
249/*!
250 NOTE: this sets appropriate coordinates in all of the molecule's
251 conformers.
252
253 \param mol the molecule the atoms belong to
254 \param idx index of the terminal atom whose coordinates are set
255 \param otherIdx index of the bonded neighbor atom
256*/
257
259 unsigned int otherIdx);
260
261//! returns a copy of a molecule with hydrogens removed
262/*!
263 \param mol the molecule to remove Hs from
264 \param implicitOnly if this \c true, only implicit Hs will be
265 removed
266 \param updateExplicitCount (optional) If this is \c true, when explicit
267 Hs are removed from the graph, the heavy atom to which they are bound will
268 have its counter of explicit Hs increased.
269 \param sanitize: (optional) If this is \c true, the final molecule will be
270 sanitized
271
272 \return the new molecule
273
274 <b>Notes:</b>
275 - Hydrogens which aren't connected to a heavy atom will not be
276 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
277 all atoms removed.
278 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
279 will not be removed.
280 - two coordinate Hs, like the central H in C[H-]C, will not be removed
281 - Hs connected to dummy atoms will not be removed
282 - Hs that are part of the definition of double bond Stereochemistry
283 will not be removed
284 - Hs that are not connected to anything else will not be removed
285 - Hs that have a query defined (i.e. hasQuery() returns true) will not
286 be removed
287
288 - the caller is responsible for <tt>delete</tt>ing the pointer this
289 returns.
290*/
291[[deprecated(
292 "Please use the version with RemoveHsParameters")]] RDKIT_GRAPHMOL_EXPORT
293 ROMol *
294 removeHs(const ROMol &mol, bool implicitOnly,
295 bool updateExplicitCount = false, bool sanitize = true);
296//! \overload
297/// modifies the molecule in place
298[[deprecated(
299 "Please use the version with RemoveHsParameters")]] RDKIT_GRAPHMOL_EXPORT void
300removeHs(RWMol &mol, bool implicitOnly, bool updateExplicitCount = false,
301 bool sanitize = true);
303 bool removeDegreeZero = false; /**< hydrogens that have no bonds */
304 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
306 false; /**< hydrogens with bonds only to other hydrogens */
307 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
308 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
309 isotopes and keeps track of the heavy atom the isotopes were attached to in
310 the private _isotopicHs atom property, so they are re-added by AddHs() as
311 the original isotopes if possible*/
313 false; /**< hydrogens with at least one dummy-atom neighbor */
315 false; /**< hydrogens defining bond stereochemistry */
316 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
317 bool removeWithQuery = false; /**< hydrogens with queries defined */
318 bool removeMapped = true; /**< mapped hydrogens */
319 bool removeInSGroups = true; /**< part of a SubstanceGroup.
320 An H atom will only be removed if it doesn't cause any SGroup to become empty,
321 and if it doesn't play a special role in the SGroup (XBOND, attach point
322 or a CState) */
323 bool showWarnings = true; /**< display warnings for Hs that are not removed */
324 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
326 false; /**< DEPRECATED equivalent of updateExplicitCount */
327 bool removeHydrides = false; /**< Removing Hydrides */
329 false; /**< remove Hs which are bonded to atoms with specified
330 non-tetrahedral stereochemistry */
331};
332
333//! \overload
334/// modifies the molecule in place
336 RWMol &mol, const RemoveHsParameters &ps = RemoveHsParameters(),
337 bool sanitize = true);
338//! \overload
339/// The caller owns the pointer this returns
341 const ROMol &mol, const RemoveHsParameters &ps = RemoveHsParameters(),
342 bool sanitize = true);
343
344//! removes all Hs from a molecule
345RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
346//! \overload
347/// The caller owns the pointer this returns
349 bool sanitize = true);
350
351//! returns a copy of a molecule with hydrogens removed and added as queries
352//! to the heavy atoms to which they are bound.
353/*!
354 This is really intended to be used with molecules that contain QueryAtoms
355
356 \param mol the molecule to remove Hs from
357
358 \return the new molecule
359
360 <b>Notes:</b>
361 - Atoms that do not already have hydrogen count queries will have one
362 added, other H-related queries will not be touched. Examples:
363 - C[H] -> [C;!H0]
364 - [C;H1][H] -> [C;H1]
365 - [C;H2][H] -> [C;H2]
366 - Hydrogens which aren't connected to a heavy atom will not be
367 removed. This prevents molecules like <tt>"[H][H]"</tt> from having
368 all atoms removed.
369 - the caller is responsible for <tt>delete</tt>ing the pointer this
370 returns.
371 - By default all hydrogens are removed, however if
372 mergeUnmappedOnly is true, any hydrogen participating
373 in an atom map will be retained
374
375*/
377 bool mergeUnmappedOnly = false,
378 bool mergeIsotopes = false);
379//! \overload
380/// modifies the molecule in place
382 bool mergeUnmappedOnly = false,
383 bool mergeIsotopes = false);
384
385//! returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
386/*!
387 This is really intended to be used with molecules that contain QueryAtoms
388 such as when checking smarts patterns for explicit hydrogens
389
390
391 \param mol the molecule to check for query Hs from
392 \return std::pair if pair.first is true if the molecule has query
393 hydrogens, if pair.second is true, the queryHs cannot be removed my
394 mergeQueryHs
395*/
396RDKIT_GRAPHMOL_EXPORT std::pair<bool, bool> hasQueryHs(const ROMol &mol);
397
407
408//! Parameters controlling the behavior of MolOps::adjustQueryProperties
409/*!
410
411 Note that some of the options here are either directly contradictory or make
412 no sense when combined with each other. We generally assume that client code
413 is doing something sensible and don't attempt to detect possible conflicts
414 or problems.
415
416*/
418 bool adjustDegree = true; /**< add degree queries */
420
421 bool adjustRingCount = false; /**< add ring-count queries */
422 std::uint32_t adjustRingCountFlags =
424
425 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
426 labels to any-atom queries */
427
428 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
429
431 false; /**< convert bonds to generic queries (any bonds) */
433
435 false; /**< convert atoms to generic queries (any atoms) */
437
438 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
439 overall degree */
442
443 bool adjustRingChain = false; /**< add ring-chain queries */
445
447 false; /**< remove stereochemistry info from double bonds that do not
448 have the stereoCare property set */
449
451 false; /**< sets bond queries in conjugated five-rings to
452 SINGLE|DOUBLE|AROMATIC */
453
455 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
456 software as documented in the Chemical Representation Guide */
457
459 false; /**< sets single bonds between aromatic or conjugated atoms and
460 degree one neighbors to SINGLE|AROMATIC */
461
463 false; /**< sets non-ring single bonds between two aromatic or
464 conjugated atoms to SINGLE|AROMATIC */
465
466 //! \brief returns an AdjustQueryParameters object with all adjustments
467 //! disabled
470 res.adjustDegree = false;
471 res.makeDummiesQueries = false;
472 res.aromatizeIfPossible = false;
473 return res;
474 }
476};
477
478//! updates an AdjustQueryParameters object from a JSON string
480 MolOps::AdjustQueryParameters &p, const std::string &json);
481
482//! returns a copy of a molecule with query properties adjusted
483/*!
484 \param mol the molecule to adjust
485 \param params controls the adjustments made
486
487 \return the new molecule, the caller owns the memory
488*/
490 const ROMol &mol, const AdjustQueryParameters *params = nullptr);
491//! \overload
492/// modifies the molecule in place
494 RWMol &mol, const AdjustQueryParameters *params = nullptr);
495
496//! returns a copy of a molecule with the atoms renumbered
497/*!
498
499 \param mol the molecule to work with
500 \param newOrder the new ordering of the atoms (should be numAtoms long)
501 for example: if newOrder is [3,2,0,1], then atom 3 in the original
502 molecule will be atom 0 in the new one
503
504 \return the new molecule
505
506 <b>Notes:</b>
507 - the caller is responsible for <tt>delete</tt>ing the pointer this
508 returns.
509
510*/
512 const ROMol &mol, const std::vector<unsigned int> &newOrder);
513
514//! @}
515
516//! \name Sanitization
517/// {
518
519// clang-format off
520BETTER_ENUM(SanitizeFlags, unsigned int,
521 SANITIZE_NONE = 0x0,
522 SANITIZE_CLEANUP = 0x1,
523 SANITIZE_PROPERTIES = 0x2,
524 SANITIZE_SYMMRINGS = 0x4,
525 SANITIZE_KEKULIZE = 0x8,
526 SANITIZE_FINDRADICALS = 0x10,
527 SANITIZE_SETAROMATICITY = 0x20,
528 SANITIZE_SETCONJUGATION = 0x40,
529 SANITIZE_SETHYBRIDIZATION = 0x80,
530 SANITIZE_CLEANUPCHIRALITY = 0x100,
531 SANITIZE_ADJUSTHS = 0x200,
532 SANITIZE_CLEANUP_ORGANOMETALLICS = 0x400,
533 SANITIZE_CLEANUPATROPISOMERS = 0x800,
534 SANITIZE_ALL = 0xFFFFFFF
535);
536// clang-format on
537
538//! \brief carries out a collection of tasks for cleaning up a molecule and
539//! ensuring that it makes "chemical sense"
540/*!
541 This functions calls the following in sequence
542 -# MolOps::cleanUp()
543 -# mol.updatePropertyCache()
544 -# MolOps::symmetrizeSSSR()
545 -# MolOps::Kekulize()
546 -# MolOps::assignRadicals()
547 -# MolOps::setAromaticity()
548 -# MolOps::setConjugation()
549 -# MolOps::setHybridization()
550 -# MolOps::cleanupChirality()
551 -# MolOps::adjustHs()
552 -# mol.updatePropertyCache()
553
554 \param mol : the RWMol to be cleaned
555
556 \param operationThatFailed : the first (if any) sanitization operation that
557 fails is set here.
558 The values are taken from the \c SanitizeFlags
559 enum. On success, the value is \c
560 SanitizeFlags::SANITIZE_NONE
561
562 \param sanitizeOps : the bits here are used to set which sanitization
563 operations are carried out. The elements of the \c
564 SanitizeFlags enum define the operations.
565
566 <b>Notes:</b>
567 - If there is a failure in the sanitization, a \c MolSanitizeException
568 will be thrown.
569 - in general the user of this function should cast the molecule following
570 this function to a ROMol, so that new atoms and bonds cannot be added to
571 the molecule and screw up the sanitizing that has been done here
572*/
574 RWMol &mol, unsigned int &operationThatFailed,
575 unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
576//! \overload
578
579//! \brief Identifies chemistry problems (things that don't make chemical
580//! sense) in a molecule
581/*!
582 This functions uses the operations in sanitizeMol but does not change
583 the input structure and returns a list of the problems encountered instead
584 of stopping at the first failure,
585
586 The problems this looks for come from the sanitization operations:
587 -# mol.updatePropertyCache() : Unreasonable valences
588 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
589 in rings, aromatic bonds to non-aromatic atoms.
590
591 \param mol : the ROMol to be cleaned
592
593 \param sanitizeOps : the bits here are used to set which sanitization
594 operations are carried out. The elements of the \c
595 SanitizeFlags enum define the operations.
596
597 \return a vector of \c MolSanitizeException values that indicate what
598 problems were encountered
599
600*/
602std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
603 const ROMol &mol, unsigned int sanitizeOps = SanitizeFlags::SANITIZE_ALL);
604
605//! Possible aromaticity models
606/*!
607- \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
608- \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
609Book)
610- \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
611does not consider the outer envelope of fused rings)
612- \c AROMATICITY_MDL
613- \c AROMATICIT_MMFF94 the aromaticity model used by the MMFF94 force field
614- \c AROMATICITY_CUSTOM uses a caller-provided function
615*/
616typedef enum {
617 AROMATICITY_DEFAULT = 0x0, ///< future proofing
622 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
624
625//! sets the aromaticity model for a molecule to MMFF94
627
628//! Sets up the aromaticity for a molecule
629/*!
630
631 This is what happens here:
632 -# find all the simple rings by calling the findSSSR function
633 -# loop over all the Atoms in each ring and mark them if they are
634 candidates
635 for aromaticity. A ring atom is a candidate if it can spare electrons
636 to the ring and if it's from the first two rows of the periodic table.
637 -# based on the candidate atoms, mark the rings to be either candidates
638 or non-candidates. A ring is a candidate only if all its atoms are
639 candidates
640 -# apply Hueckel rule to each of the candidate rings to check if the ring
641 can be
642 aromatic
643
644 \param mol the RWMol of interest
645 \param model the aromaticity model to use
646 \param func a custom function for assigning aromaticity (only used when
647 model=\c AROMATICITY_CUSTOM)
648
649 \return >0 on success, <= 0 otherwise
650
651 <b>Assumptions:</b>
652 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
653 been called)
654
655*/
658 int (*func)(RWMol &) = nullptr);
659
660//! Designed to be called by the sanitizer to handle special cases before
661/// anything is done.
662/*!
663
664 Currently this:
665 - modifies nitro groups, so that the nitrogen does not have an
666 unreasonable valence of 5, as follows:
667 - the nitrogen gets a positive charge
668 - one of the oxygens gets a negative charge and the double bond to
669 this oxygen is changed to a single bond The net result is that nitro groups
670 can be counted on to be: \c "[N+](=O)[O-]"
671 - modifies halogen-oxygen containing species as follows:
672 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
673 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
674 \c [Cl,Br,I](=O)O -> [X+]([O-])O
675 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
676
677 \param mol the molecule of interest
678
679*/
681
682//! Designed to be called by the sanitizer to handle special cases for
683//! organometallic species before valence is perceived
684/*!
685
686 \b Note that this function is experimental and may either change in
687 behavior or be replaced with something else in future releases.
688
689 Currently this:
690 - replaces single bonds between "hypervalent" organic atoms and metals
691 with dative bonds (this is following an IUPAC recommendation:
692 https://iupac.qmul.ac.uk/tetrapyrrole/TP8.html)
693
694 \param mol the molecule of interest
695
696*/
698
699//! Called by the sanitizer to assign radical counts to atoms
701
702//! adjust the number of implicit and explicit Hs for special cases
703/*!
704
705 Currently this:
706 - modifies aromatic nitrogens so that, when appropriate, they have an
707 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
708
709 \param mol the molecule of interest
710
711 <b>Assumptions</b>
712 - this is called after the molecule has been sanitized,
713 aromaticity has been perceived, and the implicit valence of
714 everything has been calculated.
715
716*/
718
719//! Kekulizes the molecule
720/*!
721
722 \param mol the molecule of interest
723
724 \param markAtomsBonds if this is set to true, \c isAromatic boolean
725 settings on both the Bonds and Atoms are turned to false following the
726 Kekulization, otherwise they are left alone in their original state.
727
728 \param canonical controls atom traversal order during kekulization:
729 - \c false (the default): traverses atoms in atom-index order
730 (std::iota). Fast, but the resulting Kekulé bond assignment depends on
731 the order atoms appear in the molecule.
732 - \c true: uses canonical atom ranking (Canon::rankFragmentAtoms with a
733 wedge-end heuristic) so that the Kekulé bond assignment is independent
734 of atom ordering. Use this in output writers and any code that requires
735 a reproducible, chemistry-based Kekulé form.
736
737 \note <b>Behavioural difference from releases prior to 2026.03.1:</b>
738 Before the \c canonical parameter was added (i.e. in 2025.09.1 and
739 earlier), Kekulize traversed atoms in an order determined by the SSSR
740 ring-membership and adjacency-list iteration order — neither pure
741 atom-index order (\c canonical=false) nor rank-based order
742 (\c canonical=true). Tests and expected outputs written against those
743 releases may differ from both new modes; \c canonical=true is the closest
744 match for output-writer use cases, while \c canonical=false gives a
745 deterministic but potentially different Kekulé form from the old default.
746
747 Note that the canonical mode only really makes sense when the molecule's
748 chemistry is sane, like after sanitization. If stereochemistry hasn't
749 been perceived, the chemistry of the molecule is inconsistent, and
750 "canonical" atom ranks are only a technical artifact.
751
752 \param maxBackTracks the maximum number of attempts at back-tracking. The
753 algorithm uses a back-tracking procedure to revisit a previous setting of
754 double bond if we hit a wall in the kekulization process
755
756 <b>Notes:</b>
757 - this does not modify query bonds which have bond type queries (like
758 those which come from SMARTS) or rings containing them.
759 - even if \c markAtomsBonds is \c false the \c BondType for all modified
760 aromatic bonds will be changed from \c RDKit::Bond::AROMATIC to \c
761 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
762
763*/
764RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
765 bool canonical = true,
766 unsigned int maxBackTracks = 100);
767//! Kekulizes the molecule if possible. If the kekulization fails the molecule
768//! will not be modified
769/*!
770
771 \param mol the molecule of interest
772
773 \param markAtomsBonds if this is set to true, \c isAromatic boolean
774 settings on both the Bonds and Atoms are turned to false following the
775 Kekulization, otherwise they are left alone in their original state.
776
777 \param canonical controls atom traversal order; see the full description
778 on \c Kekulize() for the three-way distinction between \c canonical=false
779 (atom-index order, default), \c canonical=true (rank-based, order-
780 independent), and the pre-PR master behaviour.
781
782 \param maxBackTracks the maximum number of attempts at back-tracking. The
783 algorithm uses a back-tracking procedure to revisit a previous setting of
784 double bond if we hit a wall in the kekulization process
785
786 \returns whether or not the kekulization succeeded
787
788 <b>Notes:</b>
789 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
790 bonds will be changed from \c RDKit::Bond::AROMATIC to \c
791 RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
792
793*/
795 bool markAtomsBonds = true,
796 bool canonical = true,
797 unsigned int maxBackTracks = 100);
798
799//! flags the molecule's conjugated bonds
801
802//! calculates and sets the hybridization of all a molecule's Stoms
804
805//! @}
806
807//! \name Ring finding and SSSR
808//! @{
809
810//! finds a molecule's Smallest Set of Smallest Rings
811/*!
812 Currently this implements a modified form of Figueras algorithm
813 (JCICS - Vol. 36, No. 5, 1996, 986-991)
814
815 \param mol the molecule of interest
816 \param res used to return the vector of rings. Each entry is a vector with
817 atom indices. This information is also stored in the molecule's
818 RingInfo structure, so this argument is optional (see overload)
819 \param includeDativeBonds - determines whether or not dative bonds are used
820 in the ring finding.
821 \param includeHydrogenBonds - determines whether or not hydrogen bonds are
822 used in the ring finding.
823
824 \return number of smallest rings found
825
826 Base algorithm:
827 - The original algorithm starts by finding representative degree 2
828 nodes.
829 - Representative because if a series of deg 2 nodes are found only
830 one of them is picked.
831 - The smallest ring around each of them is found.
832 - The bonds that connect to this degree 2 node are them chopped off,
833 yielding
834 new deg two nodes
835 - The process is repeated on the new deg 2 nodes.
836 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
837 with it is found. A bond from this is "carefully" (look in the paper)
838 selected and chopped, yielding deg 2 nodes. The process is same as
839 above once this is done.
840
841 Our Modifications:
842 - If available, more than one smallest ring around a representative deg 2
843 node will be computed and stored
844 - Typically 3 rings are found around a degree 3 node (when no deg 2s are
845 available)
846 and all the bond to that node are chopped.
847 - The extra rings that were found in this process are removed after all
848 the nodes have been covered.
849
850 These changes were motivated by several factors:
851 - We believe the original algorithm fails to find the correct SSSR
852 (finds the correct number of them but the wrong ones) on some sample
853 mols
854 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
855 done. The extra rings this process adds can be quite useful.
856*/
858 std::vector<std::vector<int>> &res,
859 bool includeDativeBonds = false,
860 bool includeHydrogenBonds = false);
861//! \overload
863 std::vector<std::vector<int>> *res = nullptr,
864 bool includeDativeBonds = false,
865 bool includeHydrogenBonds = false);
866
867//! use a DFS algorithm to identify ring bonds and atoms in a molecule
868/*!
869 \b NOTE: though the RingInfo structure is populated by this function,
870 the only really reliable calls that can be made are to check if
871 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
872 return values >0
873*/
875
877 bool includeDativeBonds = false,
878 bool includeHydrogenBonds = false);
879
880//! symmetrize the molecule's Smallest Set of Smallest Rings
881/*!
882 SSSR rings obtained from "findSSSR" can be non-unique in some case.
883 For example, cubane has five SSSR rings, not six as one would hope.
884
885 This function adds additional rings to the SSSR list if necessary
886 to make the list symmetric, e.g. all atoms in cubane will be part of the
887 same number of SSSRs. This function choses these extra rings from the extra
888 rings computed and discarded during findSSSR. The new ring are chosen such
889 that:
890 - replacing a same sized ring in the SSSR list with an extra ring yields
891 the same union of bond IDs as the original SSSR list
892
893 \param mol - the molecule of interest
894 \param res used to return the vector of rings. Each entry is a vector with
895 atom indices. This information is also stored in the molecule's
896 RingInfo structure, so this argument is optional (see overload)
897 \param includeDativeBonds - determines whether or not dative bonds are used
898 in the ring finding.
899 \param includeHydrogenBonds - determines whether or not hydrogen bonds are
900 used in the ring finding.
901
902 \return the total number of rings = (new rings + old SSSRs)
903
904 <b>Notes:</b>
905 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
906 first
907*/
909 std::vector<std::vector<int>> &res,
910 bool includeDativeBonds = false,
911 bool includeHydrogenBonds = false);
912//! \overload
914 bool includeDativeBonds = false,
915 bool includeHydrogenBonds = false);
916
917//! @}
918
919//! \name Shortest paths and other matrices
920//! @{
921
922//! returns a molecule's adjacency matrix
923/*!
924 \param mol the molecule of interest
925 \param useBO toggles use of bond orders in the matrix
926 \param emptyVal sets the empty value (for non-adjacent atoms)
927 \param force forces calculation of the matrix, even if already
928 computed
929 \param propNamePrefix used to set the cached property name
930 \param bondsToUse used to limit which bonds are considered
931
932 \return the adjacency matrix.
933
934 <b>Notes</b>
935 - The result of this is cached in the molecule's local property
936 dictionary, which will handle deallocation. The caller should <b>not</b> \c
937 delete this pointer.
938
939*/
941 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
942 const char *propNamePrefix = nullptr,
943 const boost::dynamic_bitset<> *bondsToUse = nullptr);
944
945//! Computes the molecule's topological distance matrix
946/*!
947 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
948
949 \param mol the molecule of interest
950 \param useBO toggles use of bond orders in the matrix
951 \param useAtomWts sets the diagonal elements of the result to
952 6.0/(atomic number) so that the matrix can be used to calculate
953 Balaban J values. This does not affect the bond weights.
954 \param force forces calculation of the matrix, even if already
955 computed
956 \param propNamePrefix used to set the cached property name
957
958 \return the distance matrix.
959
960 <b>Notes</b>
961 - The result of this is cached in the molecule's local property
962 dictionary, which will handle deallocation. The caller should <b>not</b> \c
963 delete this pointer.
964
965
966*/
968 const ROMol &mol, bool useBO = false, bool useAtomWts = false,
969 bool force = false, const char *propNamePrefix = nullptr);
970
971//! Computes the molecule's topological distance matrix
972/*!
973 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
974
975 \param mol the molecule of interest
976 \param activeAtoms only elements corresponding to these atom indices
977 will be included in the calculation
978 \param bonds only bonds found in this list will be included in the
979 calculation
980 \param useBO toggles use of bond orders in the matrix
981 \param useAtomWts sets the diagonal elements of the result to
982 6.0/(atomic number) so that the matrix can be used to calculate
983 Balaban J values. This does not affect the bond weights.
984
985 \return the distance matrix.
986
987 <b>Notes</b>
988 - The results of this call are not cached, the caller <b>should</b> \c
989 delete
990 this pointer.
991
992
993*/
995 const ROMol &mol, const std::vector<int> &activeAtoms,
996 const std::vector<const Bond *> &bonds, bool useBO = false,
997 bool useAtomWts = false);
998
999//! Computes the molecule's 3D distance matrix
1000/*!
1001
1002 \param mol the molecule of interest
1003 \param confId the conformer to use
1004 \param useAtomWts sets the diagonal elements of the result to
1005 6.0/(atomic number)
1006 \param force forces calculation of the matrix, even if already
1007 computed
1008 \param propNamePrefix used to set the cached property name
1009 (if set to an empty string, the matrix will not be
1010 cached)
1011
1012 \return the distance matrix.
1013
1014 <b>Notes</b>
1015 - If propNamePrefix is not empty the result of this is cached in the
1016 molecule's local property dictionary, which will handle deallocation.
1017 In other cases the caller is responsible for freeing the memory.
1018
1019*/
1021 const ROMol &mol, int confId = -1, bool useAtomWts = false,
1022 bool force = false, const char *propNamePrefix = nullptr);
1023
1024//! Find the shortest path between two atoms
1025/*!
1026 Uses the Bellman-Ford algorithm
1027
1028 \param mol molecule of interest
1029 \param aid1 index of the first atom
1030 \param aid2 index of the second atom
1031
1032 \return an std::list with the indices of the atoms along the shortest
1033 path
1034
1035 <b>Notes:</b>
1036 - the starting and end atoms are included in the path
1037 - if no path is found, an empty path is returned
1038
1039*/
1040RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
1041 int aid2);
1042
1043//! @}
1044
1045//! \name Stereochemistry
1046//! @{
1047
1048// class to hold hybridizations
1049
1051 public:
1053 throw FileParseException("not to be called without a mol parameter");
1054 };
1057 throw FileParseException("not to be called without a mol parameter");
1058 };
1059
1060 ~Hybridizations() = default;
1061
1063 return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1064 }
1065 // Atom::HybridizationType &operator[](unsigned int idx) {
1066 // return static_cast<Atom::HybridizationType>(d_hybridizations[idx]);
1067 // d_hybridizations[d_hybridizations[idx]];
1068 // }
1069
1070 // // void clear() { d_hybridizations.clear(); }
1071 // // void resize(unsigned int sz) { d_hybridizations.resize(sz); }
1072 unsigned int size() const { return d_hybridizations.size(); }
1073
1074 private:
1075 std::vector<int> d_hybridizations;
1076};
1077
1078//! removes bogus chirality markers (e.g. tetrahedral flags on non-sp3
1079//! centers):
1081
1082//! removes bogus atropisomeric markers (e.g. those without sp2 begin and end
1083//! atoms):
1085 Hybridizations &hybridizations);
1086//! \overload
1088
1089//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms
1090/*!
1091 \param mol the molecule of interest
1092 \param confId the conformer to use
1093 \param replaceExistingTags if this flag is true, any existing atomic chiral
1094 tags will be replaced
1095
1096 If the conformer provided is not a 3D conformer, nothing will be done.
1097
1098
1099 NOTE that this does not check to see if atoms are chiral centers (i.e. all
1100 substituents are different), it merely sets the chiral type flags based on
1101 the coordinates and atom ordering. Use \c assignStereochemistryFrom3D() if
1102 you want chiral flags only on actual stereocenters.
1103*/
1105 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1106
1107//! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
1108//! stereo flags to its bonds
1109/*!
1110
1111 \param mol the molecule of interest
1112 \param confId the conformer to use
1113 \param replaceExistingTags if this flag is true, any existing info about
1114 stereochemistry will be replaced
1115
1116 If the conformer provided is not a 3D conformer, nothing will be done.
1117*/
1119 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1120
1121//! \brief Use bond directions to assign ChiralTypes to a molecule's atoms
1122/*!
1123
1124 \param mol the molecule of interest
1125 \param confId the conformer to use
1126 \param replaceExistingTags if this flag is true, any existing info about
1127 stereochemistry will be replaced
1128*/
1130 ROMol &mol, int confId = -1, bool replaceExistingTags = true);
1131
1132//! \deprecated: this function will be removed in a future release. Use
1133//! setDoubleBondNeighborDirections() instead
1135 int confId = -1);
1136//! Sets bond directions based on double bond stereochemistry
1138 ROMol &mol, const Conformer *conf = nullptr);
1139//! removes directions from single bonds. The property _UnknownStereo will be
1140//! set on wiggly bonds
1142 bool onlyWedgeFlags = false);
1143
1144//! removes directions from all bonds. The property _UnknownStereo will be set
1145//! on wiggly bonds
1147//! removes directions from all bonds. The property _UnknownStereo will be set
1148//! on wiggly bonds
1150 bool onlyWedgeFlags = false);
1151
1152//! Assign CIS/TRANS bond stereochemistry tags based on neighboring
1153//! directions
1155
1156//! Assign stereochemistry tags to atoms and bonds.
1157/*!
1158 If useLegacyStereoPerception is true, it also does the CIP stereochemistry
1159 assignment for the molecule's atoms (R/S) and double bonds (Z/E).
1160 This assignment is based on legacy code which is fast, but is
1161 known to incorrectly assign CIP labels in some cases.
1162 instead, to assign CIP labels based on an accurate, though slower,
1163 implementation of the CIP rules, call CIPLabeler::assignCIPLabels().
1164 Chiral atoms will have a property '_CIPCode' indicating their chiral code.
1165
1166 \param mol the molecule to use
1167 \param cleanIt if true, any existing values of the property `_CIPCode`
1168 will be cleared, atoms with a chiral specifier that aren't
1169 actually chiral (e.g. atoms with duplicate
1170 substituents or only 2 substituents, etc.) will have
1171 their chiral code set to CHI_UNSPECIFIED. Bonds with
1172 STEREOCIS/STEREOTRANS specified that have duplicate
1173 substituents based upon the CIP atom ranks will be
1174 marked STEREONONE.
1175 \param force causes the calculation to be repeated even if it has
1176 already been done
1177 \param flagPossibleStereoCenters set the _ChiralityPossible property on
1178 atoms that are possible stereocenters
1179
1180 <b>Notes:M</b>
1181 - Throughout we assume that we're working with a hydrogen-suppressed
1182 graph.
1183
1184*/
1186 ROMol &mol, bool cleanIt = false, bool force = false,
1187 bool flagPossibleStereoCenters = false);
1188//! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
1189/// i.e. Z/E)
1190/*!
1191
1192 \param mol the molecule of interest
1193*/
1195
1196//! \brief finds bonds that could be cis/trans in a molecule and mark them as
1197//! Bond::STEREOANY.
1198/*!
1199 \param mol the molecule of interest
1200 \param cleanIt toggles removal of stereo flags from double bonds that can
1201 not have stereochemistry
1202
1203 This function finds any double bonds that can potentially be part of
1204 a cis/trans system. No attempt is made here to mark them cis or
1205 trans. No attempt is made to detect double bond stereo in ring systems.
1206
1207 This function is useful in the following situations:
1208 - when parsing a mol file; for the bonds marked here, coordinate
1209 information on the neighbors can be used to identify cis or trans
1210 states
1211 - when writing a mol file; bonds that can be cis/trans but not marked as
1212 either need to be specially marked in the mol file
1213 - finding double bonds with unspecified stereochemistry so they
1214 can be enumerated for downstream 3D tools
1215
1216 The CIPranks on the neighboring atoms are checked in this function. The
1217 _CIPCode property if set to any on the double bond.
1218*/
1220 bool cleanIt = false);
1221//! \brief Uses the molParity atom property to assign ChiralType to a
1222//! molecule's atoms
1223/*!
1224 \param mol the molecule of interest
1225 \param replaceExistingTags if this flag is true, any existing atomic chiral
1226 tags will be replaced
1227*/
1229 ROMol &mol, bool replaceExistingTags = true);
1230
1231//! @}
1232
1233//! returns the number of atoms which have a particular property set
1235 const ROMol &mol, const std::string_view &prop);
1236
1237//! returns whether or not a molecule needs to have Hs added to it.
1239
1240//! \brief Replaces haptic bond with explicit dative bonds.
1241/*!
1242 *
1243 * @param mol the molecule of interest
1244 *
1245 * One way of showing haptic bonds (such as cyclopentadiene to iron in
1246 * ferrocene) is to use a dummy atom with a dative bond to the iron atom with
1247 * the bond labelled with the atoms involved in the organic end of the bond.
1248 * Another way is to have explicit dative bonds from the atoms of the haptic
1249 * group to the metal atom. This function converts the former representation
1250 * to the latter.
1251 */
1253
1254//! \overload modifies molecule in place.
1256
1257//! \brief Replaces explicit dative bonds with haptic.
1258/*!
1259 *
1260 * @param mol the molecule of interest
1261 *
1262 * Does the reverse of hapticBondsToDative. If there are multiple contiguous
1263 * atoms attached by dative bonds to an atom (probably a metal atom), the
1264 * dative bonds will be replaced by a dummy atom in their centre attached to
1265 * the (metal) atom by a dative bond, which is labelled with ENDPTS of the
1266 * atoms that had the original dative bonds.
1267 */
1269
1270//! \overload modifies molecule in place.
1272
1273/*!
1274 Calculates a molecule's average molecular weight
1275
1276 \param mol the molecule of interest
1277 \param onlyHeavy (optional) if this is true (the default is false),
1278 only heavy atoms will be included in the MW calculation
1279
1280 \return the AMW
1281*/
1283 bool onlyHeavy = false);
1284/*!
1285 Calculates a molecule's exact molecular weight
1286
1287 \param mol the molecule of interest
1288 \param onlyHeavy (optional) if this is true (the default is false),
1289 only heavy atoms will be included in the MW calculation
1290
1291 \return the exact MW
1292*/
1294 bool onlyHeavy = false);
1295
1296/*!
1297 Calculates a molecule's formula
1298
1299 \param mol the molecule of interest
1300 \param separateIsotopes if true, isotopes will show up separately in the
1301 formula. So C[13CH2]O will give the formula: C[13C]H6O
1302 \param abbreviateHIsotopes if true, 2H and 3H will be represented as
1303 D and T instead of [2H] and [3H]. This only applies if \c
1304 separateIsotopes is true
1305
1306 \return the formula as a string
1307*/
1309 const ROMol &mol, bool separateIsotopes = false,
1310 bool abbreviateHIsotopes = true);
1311
1312namespace details {
1313//! not recommended for use in other code
1315 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1316 boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds = true,
1317 bool canonical = true, unsigned int maxBackTracks = 100);
1318
1319// If the bond is dative, and it has a common_properties::MolFileBondEndPts
1320// prop, returns a vector of the indices of the atoms mentioned in the prop.
1321RDKIT_GRAPHMOL_EXPORT std::vector<int> hapticBondEndpoints(const Bond *bond);
1322
1323} // namespace details
1324
1325//! attachment points encoded as attachPt properties are added to the graph as
1326/// dummy atoms
1327/*!
1328 *
1329 * @param mol the molecule of interest
1330 * @param addAsQueries if true, the dummy atoms will be added as null queries
1331 * (i.e. they will match any atom in a substructure search)
1332 * @param addCoords if true and the molecule has one or more conformers,
1333 * positions for the attachment points will be added to the conformer(s).
1334 *
1335 */
1337 bool addAsQueries = true,
1338 bool addCoords = true);
1339//! dummy atoms in the graph are removed and replaced with attachment point
1340//! annotations on the attached atoms
1341/*!
1342 *
1343 * @param mol the molecule of interest
1344 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1345 * property will be collapsed
1346 *
1347 * In order for a dummy atom to be considered for collapsing it must have:
1348 * - degree 1 with a single or unspecified bond
1349 * - the bond to it can not be wedged
1350 * - either no query or be an AtomNullQuery
1351 *
1352 */
1354 bool markedOnly = true);
1355
1356namespace details {
1357//! attachment points encoded as attachPt properties are added to the graph as
1358/// dummy atoms
1359/*!
1360 *
1361 * @param mol the molecule of interest
1362 * @param atomIdx the index of the atom to which the attachment point should
1363 * be added
1364 * @param val the attachment point value. Should be 1 or 2
1365 * @param addAsQueries if true, the dummy atoms will be added as null queries
1366 * (i.e. they will match any atom in a substructure search)
1367 * @param addCoords if true and the molecule has one or more conformers,
1368 * positions for the attachment points will be added to the conformer(s).
1369 *
1370 */
1372 RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery = true,
1373 bool addCoords = true);
1374
1375//! returns whether or not an atom is an attachment point
1376/*!
1377 *
1378 * @param mol the molecule of interest
1379 * @param markedOnly if true, only dummy atoms with the _fromAttachPoint
1380 * property will be collapsed
1381 *
1382 * In order for a dummy atom to be considered for collapsing it must have:
1383 * - degree 1 with a single or unspecified bond
1384 * - the bond to it can not be wedged
1385 * - either no query or be an AtomNullQuery
1386 *
1387 */
1389 bool markedOnly = true);
1390
1391} // namespace details
1392
1393} // namespace MolOps
1394} // namespace RDKit
1395
1396#endif
#define BETTER_ENUM(Enum, Underlying,...)
Definition BetterEnums.h:17
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition Atom.h:74
HybridizationType
store hybridization
Definition Atom.h:92
class for representing a bond
Definition Bond.h:46
The class for representing 2D or 3D conformation of a molecule.
Definition Conformer.h:46
used by various file parsing classes to indicate a parse error
unsigned int size() const
Definition MolOps.h:1072
Atom::HybridizationType operator[](int idx)
Definition MolOps.h:1062
Hybridizations(const Hybridizations &)
Definition MolOps.h:1056
Hybridizations(const ROMol &mol)
RWMol is a molecule class that is intended to be edited.
Definition RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition export.h:257
RDKIT_GRAPHMOL_EXPORT std::vector< int > hapticBondEndpoints(const Bond *bond)
RDKIT_GRAPHMOL_EXPORT bool isAttachmentPoint(const Atom *atom, bool markedOnly=true)
returns whether or not an atom is an attachment point
RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(RWMol &mol, const boost::dynamic_bitset<> &atomsToUse, boost::dynamic_bitset<> bondsToUse, bool markAtomsBonds=true, bool canonical=true, unsigned int maxBackTracks=100)
not recommended for use in other code
RDKIT_GRAPHMOL_EXPORT unsigned int addExplicitAttachmentPoint(RWMol &mol, unsigned int atomIdx, unsigned int val, bool addAsQuery=true, bool addCoords=true)
Groups a variety of molecular query and transformation operations.
Definition MolOps.h:38
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms and bonds.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT std::string getMolFormula(const ROMol &mol, bool separateIsotopes=false, bool abbreviateHIsotopes=true)
RDKIT_GRAPHMOL_EXPORT void cleanupAtropisomers(RWMol &mol, Hybridizations &hybridizations)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT double getExactMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT std::pair< bool, bool > hasQueryHs(const ROMol &mol)
returns a pair of booleans (hasQueryHs, hasUnmergaebleQueryHs)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition MolOps.h:616
@ AROMATICITY_RDKIT
Definition MolOps.h:618
@ AROMATICITY_MDL
Definition MolOps.h:620
@ AROMATICITY_CUSTOM
use a function
Definition MolOps.h:622
@ AROMATICITY_DEFAULT
future proofing
Definition MolOps.h:617
@ AROMATICITY_MMFF94
Definition MolOps.h:621
@ AROMATICITY_SIMPLE
Definition MolOps.h:619
RDKIT_GRAPHMOL_EXPORT void cleanUpOrganometallics(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT ROMol * hapticBondsToDative(const ROMol &mol)
Replaces haptic bond with explicit dative bonds.
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void clearSingleBondDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, const std::string_view &prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false, bool mergeIsotopes=false)
RDKIT_GRAPHMOL_EXPORT void expandAttachmentPoints(RWMol &mol, bool addAsQueries=true, bool addCoords=true)
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT ROMol * dativeBondsToHaptic(const ROMol &mol)
Replaces explicit dative bonds with haptic.
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, bool canonical=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void setMMFFAromaticity(RWMol &mol)
sets the aromaticity model for a molecule to MMFF94
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT void clearAllBondDirFlags(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SanitizeFlags::SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT void clearDirFlags(ROMol &mol, bool onlyWedgeFlags=false)
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, const AddHsParameters &params, const UINT_VECT *onlyOnAtoms=nullptr)
adds Hs to a molecule as explicit Atoms
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int > > &res, bool includeDativeBonds=false, bool includeHydrogenBonds=false)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void collapseAttachmentPoints(RWMol &mol, bool markedOnly=true)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT double getAvgMolWt(const ROMol &mol, bool onlyHeavy=false)
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, bool canonical=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
AdjustQueryWhichFlags
Definition MolOps.h:398
@ ADJUST_IGNORERINGS
Definition MolOps.h:401
@ ADJUST_IGNORENONE
Definition MolOps.h:399
@ ADJUST_IGNOREMAPPED
Definition MolOps.h:404
@ ADJUST_IGNORENONDUMMIES
Definition MolOps.h:403
@ ADJUST_IGNOREDUMMIES
Definition MolOps.h:402
@ ADJUST_IGNORECHAINS
Definition MolOps.h:400
@ ADJUST_IGNOREALL
Definition MolOps.h:405
Std stuff.
std::vector< double > INVAR_VECT
Definition MolOps.h:33
INVAR_VECT::iterator INVAR_VECT_I
Definition MolOps.h:34
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition MolOps.h:35
std::vector< UINT > UINT_VECT
Definition types.h:245
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition MolOps.h:417
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition MolOps.h:468