/** * Returns a fingerprint from a Base64 encoded Pubchem fingerprint. * * @param enc The Base64 encoded fingerprint * @return A BitSet corresponding to the input fingerprint */ public static BitSet decode(String enc) { byte[] fp = base64Decode(enc); if (fp.length < 4) { throw new IllegalArgumentException("Input is not a proper PubChem base64 encoded fingerprint"); } int len = (fp[0] << 24) | (fp[1] << 16) | (fp[2] << 8) | (fp[3] & 0xff); if (len != FP_SIZE) { throw new IllegalArgumentException("Input is not a proper PubChem base64 encoded fingerprint"); } // note the IChemObjectBuilder is passed as null because the SMARTSQueryTool // isn't needed when decoding PubchemFingerprinter pc = new PubchemFingerprinter(null); for (int i = 0; i < pc.m_bits.length; ++i) { pc.m_bits[i] = fp[i + 4]; } BitSet ret = new BitSet(FP_SIZE); for (int i = 0; i < FP_SIZE; i++) { if (pc.isBitOn(i)) ret.set(i); } return ret; }
@Override public IFingerprinter getBitFingerprinter() { return new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); }
public static List<IFingerprinter> createListOfFingerprints() { return Arrays.asList( (IFingerprinter) new OpenBabelFingerprinter(), (IFingerprinter) new SubstructureFingerprinter(), (IFingerprinter) new MACCSFingerprinter(), (IFingerprinter) new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()), (IFingerprinter) new KlekotaRothFingerprinter(), (IFingerprinter) new ECFPFingerprinter() ); }
public static List<IFingerprinter> createExtendedListOfFingerprints() { return Arrays.asList( (IFingerprinter) new OpenBabelFingerprinter(), (IFingerprinter) new SubstructureFingerprinter(), (IFingerprinter) new MACCSFingerprinter(), (IFingerprinter) new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()), (IFingerprinter) new KlekotaRothFingerprinter(), (IFingerprinter) new SphericalFingerprint() ); }
public static List<IFingerprinter> createListOfAllFingerprints() { return Arrays.asList( (IFingerprinter) new OpenBabelFingerprinter(), // 55 (0..54) (IFingerprinter) new SubstructureFingerprinter(), // 307 (55..361) (IFingerprinter) new MACCSFingerprinter(),// 166 (362..527) (IFingerprinter) new PubchemFingerprinter( DefaultChemObjectBuilder.getInstance()), // 881 (528..1408) (IFingerprinter) new KlekotaRothFingerprinter(), // 4860 (1409..6269) (IFingerprinter) new SphericalFingerprint(), (IFingerprinter) new ECFPFingerprinter() ); }
public Fingerprinter getFingerprinter(ComputeOpts opts) throws CDKException { if (opts.isCfm() || opts.isKlekotha() || opts.isMaccs() || opts.isOpenbabel() || opts.isPubchem()) { ArrayList<IFingerprinter> fingerprinters = new ArrayList<IFingerprinter>(); if (opts.isOpenbabel()) fingerprinters.add(new OpenBabelFingerprinter()); if (opts.isMaccs()) fingerprinters.add(new MACCSFingerprinter()); if (opts.isPubchem()) fingerprinters.add(new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance())); if (opts.isKlekotha()) fingerprinters.add(new KlekotaRothFingerprinter()); if (opts.isPath()) fingerprinters.add(new MarcusPathFingerprinter()); if (opts.isCfm()) fingerprinters.add(new CFMFingerprinter()); if (opts.isNeighbourhood()) fingerprinters.add(new NeighbourhoodFingerprinter()); return new Fingerprinter(fingerprinters); } else return new Fingerprinter(); }
private static List<IFingerprinter> getFingerprintList(ComputeOpts opts) { final List<IFingerprinter> list = new ArrayList<IFingerprinter>(); if (opts.isAll()) return Fingerprinter.createListOfAllFingerprints(); if (opts.isExtended()) return Fingerprinter.createExtendedListOfFingerprints(); if (opts.isOpenbabel()) { list.add(new OpenBabelFingerprinter()); list.add(new SubstructureFingerprinter()); } if (opts.isMaccs()) list.add(new MACCSFingerprinter()); if (opts.isPubchem()) list.add(new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance())); if (opts.isKlekotha()) list.add(new KlekotaRothFingerprinter()); if (opts.isPath()) list.add(new MarcusPathFingerprinter()); if (opts.isNeighbourhood()) list.add(new NeighbourhoodFingerprinter()); if (opts.isSpherical()) list.add(new SphericalFingerprint()); if (list.size() > 0) { return list; } else { return Fingerprinter.createListOfFingerprints(); } }
public static IFingerprinter getFingerprinterByName(String name) { switch (name.toLowerCase()) { case "openbabel": return new OpenBabelFingerprinter(); case "substructure": return new SubstructureFingerprinter(); case "maccs": return new MACCSFingerprinter(); case "pubchem": return new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); case "klekota": case "klekota_roth": return new KlekotaRothFingerprinter(); case "path": return new MarcusPathFingerprinter(); case "neighbours": return new NeighbourhoodFingerprinter(); case "spheres": return new SphericalFingerprint(); case "ecfp": return new ECFPFingerprinter(); default: throw new IllegalArgumentException("Unknown fingerprinter: " + name); } }
public static IFingerprinter getFingerprinter(CdkFingerprintVersion.USED_FINGERPRINTS fp) { switch (fp) { case OPENBABEL: return new OpenBabelFingerprinter(); case SUBSTRUCTURE: return new SubstructureFingerprinter(); case MACCS: return new MACCSFingerprinter(); case PUBCHEM: return new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); case KLEKOTA_ROTH: return new KlekotaRothFingerprinter(); case ECFP: return new ECFPFingerprinter(); case CLASSYFIRE_SMARTS: return new ClassyFireSmartsFingerprint(); case SHORTEST_PATH: return new ShortestPathFingerprinter(); case BIOSMARTS: return new BiosmartsFingerprinter(); case RINGSYSTEMS: return new RingsystemFingerprinter(); default: throw new IllegalArgumentException(); } }
@Test public void testGetSize() throws Exception { IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); Assert.assertEquals(881, printer.getSize()); }
Aromaticity.cdkLegacy().apply(mol2); IFingerprinter fp = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet bs1 = fp.getBitFingerprint(mol1).asBitSet(); BitSet bs2 = fp.getBitFingerprint(mol2).asBitSet();
@Test public void testGetFingerprintAsBytes() throws CDKException { IAtomContainer mol = parser.parseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); PubchemFingerprinter printer = new PubchemFingerprinter(mol.getBuilder()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); byte[] actual = printer.getFingerprintAsBytes(); byte[] expected = Arrays.copyOf(toByteArray(fp), actual.length); Assert.assertArrayEquals(expected, actual); }
addImplicitHydrogens(subStructure); IFingerprinter fpr = new PubchemFingerprinter(SilentChemObjectBuilder.getInstance()); IBitFingerprint superBits = fpr.getBitFingerprint(superStructure); IBitFingerprint subBits = fpr.getBitFingerprint(subStructure);
@Test public void testBenzene() throws CDKException { IAtomContainer mol = parser.parseSmiles("c1ccccc1"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADcYBgAAAAAAAAAAAAAAAAAAAAAAAAAAAwAAAAAAAAAAABAAAAGAAAAAAACACAEAAwAIAAAACAACBCAAACAAAgAAAIiAAAAIgIICKAERCAIAAggAAIiAcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
/** * Test case for Pubchem CID 25181308. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C13H24O10S/c1-20-12-8(18)6(16)10(4(2-14)21-12)23-13-9(19)7(17)11(24)5(3-15)22-13/h4-19,24H,2-3H2,1H3/t4-,5-,6-,7-,8-,9-,10-,11-,12-,13+/m1/s1 */ @Test public void testCID2518130() throws CDKException { IAtomContainer mol = parser.parseSmiles("COC1C(C(C(C(O1)CO)OC2C(C(C(C(O2)CO)S)O)O)O)O"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADceBwPABAAAAAAAAAAAAAAAAAAAAAAAAkSAAAAAAAAAAAAAAAGgQACAAACBS0wAOCCAAABgQAAAAAAAAAAAAAAAAAAAAAAAAREAIAAAAiQAAFAAAHAAHAYAwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
/** * Test case for Pubchem CID 25181289. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C14H10Cl3N3O3/c1-6(7-2-4-8(21)5-3-7)19-20-11-9(15)12(14(22)23)18-13(17)10(11)16/h2-5,19,21H,1H2,(H,18,20)(H,22,23) */ @Test public void testCID25181289() throws CDKException { IAtomContainer mol = parser.parseSmiles("C=C(C1=CC=C(C=C1)O)NNC2=C(C(=NC(=C2Cl)Cl)C(=O)O)Cl"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADccBzMAAGAAAAAAAAAAAAAAAAAAAAAAA8QAAAAAAAAAABwAAAHgIYCAAADA6BniAwzpJqEgCoAyTyTASChCAnJiIYumGmTtgKJnLD1/PEdQhkwBHY3Qe82AAOIAAAAAAAAABAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
/** * Test case for Pubchem CID 5934166. * * @throws InvalidSmilesException * @cdk.inchi InChI=1S/C32H26N/c1-5-13-26(14-6-1)21-22-31-23-30(28-17-9-3-10-18-28)24-32(29-19-11-4-12-20-29)33(31)25-27-15-7-2-8-16-27/h1-24H,25H2/q+1/b22-21+ */ @Test public void testCID5934166() throws CDKException { IAtomContainer mol = parser.parseSmiles("C1=CC=C(C=C1)C[N+]2=C(C=C(C=C2C=CC3=CC=CC=C3)C4=CC=CC=C4)C5=CC=CC=C5"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(mol.getBuilder()); adder.addImplicitHydrogens(mol); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol); Aromaticity.cdkLegacy().apply(mol); IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); BitSet fp = printer.getBitFingerprint(mol).asBitSet(); BitSet ref = PubchemFingerprinter .decode("AAADceB+AAAAAAAAAAAAAAAAAAAAAAAAAAA8YMGCAAAAAAAB1AAAHAAAAAAADAjBHgQwgJMMEACgAyRiRACCgCAhAiAI2CA4ZJgIIOLAkZGEIAhggADIyAcQgMAOgAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="); Assert.assertEquals(ref, fp); }
@Test public void testfp2() throws Exception { IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol1 = parser.parseSmiles("CC(N)CCCN"); IAtomContainer mol2 = parser.parseSmiles("CC(N)CCC"); IAtomContainer mol3 = parser.parseSmiles("CCCC"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol3); Aromaticity.cdkLegacy().apply(mol1); Aromaticity.cdkLegacy().apply(mol2); Aromaticity.cdkLegacy().apply(mol3); BitSet bs1 = printer.getBitFingerprint(mol1).asBitSet(); BitSet bs2 = printer.getBitFingerprint(mol2).asBitSet(); BitSet bs3 = printer.getBitFingerprint(mol3).asBitSet(); Assert.assertTrue(FingerprinterTool.isSubset(bs1, bs2)); Assert.assertTrue(FingerprinterTool.isSubset(bs2, bs3)); }
@Test public void testFingerprint() throws Exception { IFingerprinter printer = new PubchemFingerprinter(DefaultChemObjectBuilder.getInstance()); CDKHydrogenAdder adder = CDKHydrogenAdder.getInstance(DefaultChemObjectBuilder.getInstance()); IAtomContainer mol1 = parser.parseSmiles("c1ccccc1CCc1ccccc1"); IAtomContainer mol2 = parser.parseSmiles("c1ccccc1CC"); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol1); AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(mol2); adder.addImplicitHydrogens(mol1); adder.addImplicitHydrogens(mol2); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol1); AtomContainerManipulator.convertImplicitToExplicitHydrogens(mol2); Aromaticity.cdkLegacy().apply(mol1); Aromaticity.cdkLegacy().apply(mol2); BitSet bs1 = printer.getBitFingerprint(mol1).asBitSet(); BitSet bs2 = printer.getBitFingerprint(mol2).asBitSet(); Assert.assertEquals(881, printer.getSize()); Assert.assertFalse("c1ccccc1CC was detected as a subset of c1ccccc1CCc1ccccc1", FingerprinterTool.isSubset(bs1, bs2)); }