Appendix C. Loading MDL SDFFile with Python

This appendix presents a Python script for loading a MDL SDF file into a MySQL database. The script requires the MySQLdb Python module. It can be downloaded from the Mysql-Python Project Web Page.

#!/bin/python

import sys
import MySQLdb


#-----------------------------------------------------------------
# Set some paramaters and connect to the database
#-----------------------------------------------------------------

host = "localhost" # Set the host
username = "" # Set the username
password = "" # Set the password
database = "" # Set the database name
table = "" #  Set the table name

try:
    link = MySQLdb.connect(host = host,
                           user = username,
                           passwd = password,
                           db = database)
    cursor = link.cursor()
except MySQLdb.Error, e:
    print "Error %d: %s" % (e.args[0], e.args[1])
    sys.exit(1)


#-----------------------------------------------------------------
# Parse the file and load the structures
#-----------------------------------------------------------------

if len(sys.argv) != 2:
    print "Usage: upload_sdf.py FILE"
    link.close()
    sys.exit(1)
f_in = open(sys.argv[1], 'r')

ctTable = ""
lineCount = 0
molCount = 0
ctEnd = False

while 1:
    line = f_in.readline()
    if not line:
        break
    lineCount += 1
    if line[0:4] == "$$$$":
        if name == "":
            name = "Mol" + str(molCount)
        query = "INSERT INTO `%s` (`name`)" % table \
              + " VALUES ('%s')" % name
        cursor.execute(query)
        molid = cursor.lastrowid
        query = "INSERT INTO `%s_mol` (`molid`, `mol`)" % table \
              + " VALUES (%i, '%s')" % (molid, ctTable)
        cursor.execute(query)
        ctTable = ""
        lineCount = 0
        molCount += 1
        ctEnd = False
    elif not ctEnd:
        ctTable += line
        if lineCount == 1:
            name = line.strip()
        if line[0:6] == "M  END":
            ctEnd = True


#-----------------------------------------------------------------
# Close the handlers and print a summary
#-----------------------------------------------------------------

link.commit()
cursor.close()
link.close()

print str(molCount) + " structures have been loaded."