## Force Python 3 print syntax, if this is python 2.X
#if sys.hexversion < 0x03000000:
from __future__ import print_function
"""Module with functions to parse the input file and convert
Psithon into standard Python. Particularly, forms PsiMod
module calls that access the C++ side of Psi4.
"""
import PsiMod
from pubchem import getPubChemResults, PubChemObj
import re
import os
import sys
from psiexceptions import *
[docs]def bad_option_syntax(line):
"""Function to report bad syntax to screen and output file."""
raise ValidationError('Unsupported syntax:\n\n%s\n\n' % (line))
[docs]def process_word_quotes(matchobj):
"""Function to determine if argument needs wrapping in quotes as string."""
dollar = matchobj.group(2)
val = matchobj.group(3)
if(dollar):
# This is a python variable, make sure that it starts with a letter
if(re.match(r'^[A-Za-z][\w]*', val)):
return val
else:
print("Invalid Python variable: %s" % val)
sys.exit(1)
elif(re.match(r'^-?\d+\.?\d*(?:[Ee]-?\d+)?$', val)):
# This must be a number, don't wrap it in quotes
return val
elif(re.match(r'^\'.*\'$', val) or re.match(r'^\".*\"$', val)):
# This is already wrapped in quotes, do nothing
return val
else:
# This must be a string
return "\"%s\"" % val
[docs]def quotify(string):
"""Function to wrap anything that looks like a string in quotes
and to remove leading dollar signs from python variables.
"""
# This wraps anything that looks like a string in quotes, and removes leading
# dollar signs from python variables
wordre = re.compile(r'(([$]?)([-+()*.\w\"\']+))')
string = wordre.sub(process_word_quotes, string)
return string
[docs]def process_option(spaces, module, key, value, line):
"""Function to process a line with set or in a set block
into global/local domain and keyword/value.
"""
value = quotify(value.strip())
temp = ""
global_options = False
module = module.upper()
if(module == "GLOBALS" or module == "GLOBAL" or module == "" or module.isspace()):
global_options = True
if(global_options):
# If it's really a global, we need slightly different syntax
return spaces + "PsiMod.set_global_option(\"%s\", %s)\n" % (key, value)
else:
# It's a local option, so we need the module name in there too
return spaces + "PsiMod.set_local_option(\"%s\", \"%s\", %s)\n" % (module, key, value)
[docs]def process_set_command(matchobj):
"""Function to process match of all individual ``set (module_list)
key {[value_list] or $value or value}``.
"""
result = ""
module_string = ""
if(matchobj.group(2)):
module_string = matchobj.group(2)
for module in module_string.split(","):
result = result + process_option(matchobj.group(1), module, matchobj.group(3), matchobj.group(4), matchobj.group(0))
return result
[docs]def process_set_commands(matchobj):
"""Function to process match of ``set name? { ... }``."""
spaces = matchobj.group(1)
commands = matchobj.group(3)
command_lines = re.split('\n', commands)
# Remove trailing newline from each line
map(lambda x: x.strip(), command_lines)
result = ""
module_string = ""
command = ""
if(matchobj.group(2)):
module_string = matchobj.group(2)
for module in module_string.split(","):
for line in command_lines:
# Chomp the trailing newline and accumulate
command = command + line
if not check_parentheses_and_brackets(command, 0):
# If the brackets don't match up, we need to move on to the next line
# and keep going, until they do match. Only then do we process the command
continue
# Ignore blank/empty lines
if (not line or line.isspace()):
continue
matchobj = re.match(r'^\s*(\w+)[\s=]+(.*?)$', command)
# Is the syntax correct? If so, process the line
if matchobj:
result = result + process_option(spaces, module, matchobj.group(1), matchobj.group(2), command)
# Reset the string
command = ""
else:
bad_option_syntax(command)
return result
[docs]def process_pubchem_command(matchobj):
"""Function to process match of ``pubchem`` in molecule block."""
string = matchobj.group(2)
if re.match(r'^\s*[0-9]+\s*$', string):
# This is just a number - must be a CID
pcobj = PubChemObj(int(string), '', '')
try:
return pcobj.getMoleculeString()
except Exception as e:
return e.message
else:
# Search pubchem for the provided string
try:
results = getPubChemResults(string)
except Exception as e:
return e.message
# N.B. Anything starting with PubchemError will be handled correctly by the molecule parser
# in libmints, which will just print the rest of the string and exit gracefully.
if(not results):
# Nothing!
return "PubchemError\n\tNo results were found when searching PubChem for %s.\n" % string
elif(len(results) == 1):
# There's only 1 result - use it
return results[0].getMoleculeString()
else:
# There are multiple results. Print and exit
msg = "\tPubchemError\n"
msg += "\tMultiple pubchem results were found. Replace\n\n\t\tpubchem:%s\n\n" % string
msg += "\twith the Chemical ID number or exact name from one of the following and re-run.\n\n"
msg += "\t Chemical ID IUPAC Name\n\n"
for result in results:
msg += "%s" % result
if result.name().lower() == string.lower():
#We've found an exact match!
return result.getMoleculeString()
return msg
[docs]def process_molecule_command(matchobj):
"""Function to process match of ``molecule name? { ... }``."""
spaces = matchobj.group(1)
name = matchobj.group(2)
geometry = matchobj.group(3)
pubchemre = re.compile(r'^(\s*pubchem\s*:\s*(.*)\n)$', re.MULTILINE | re.IGNORECASE)
geometry = pubchemre.sub(process_pubchem_command, geometry)
molecule = spaces
if name != "":
molecule += '%s = ' % (name)
molecule += 'geometry("""%s"""' % (geometry)
if name != "":
molecule += ',"%s"' % (name)
molecule += ")\n"
molecule += '%sPsiMod.IO.set_default_namespace("%s")' % (spaces, name)
return molecule
[docs]def process_extract_command(matchobj):
"""Function to process match of ``extract_subsets``."""
spaces = matchobj.group(1)
name = matchobj.group(2)
extract = matchobj.group(0)
extract += spaces + '%s.set_name("%s")' % (name, name)
extract += "\n%sPsiMod.set_active_molecule(%s)" % (spaces, name)
extract += '\n%sPsiMod.IO.set_default_namespace("%s")' % (spaces, name)
return extract
[docs]def process_print_command(matchobj):
"""Function to process match of ``print`` and transform
it to ``PsiMod.print_out()``.
"""
spaces = matchobj.group(1)
string = matchobj.group(2)
printer = str(spaces)
printer += "PsiMod.print_out(str(%s))\n" % str(string)
return printer
[docs]def process_memory_command(matchobj):
"""Function to process match of ``memory ...``."""
spacing = str(matchobj.group(1))
sig = str(matchobj.group(2))
units = str(matchobj.group(3))
val = float(sig)
memory_amount = val
if (units.upper() == 'KB'):
memory_amount = val * 1000
elif (units.upper() == 'MB'):
memory_amount = val * 1000000
elif (units.upper() == 'GB'):
memory_amount = val * 1000000000
command = "%sPsiMod.set_memory(%d)\n" % (spacing, int(memory_amount))
return command
[docs]def process_basis_file(matchobj):
"""Function to process match of ``basis file ...``."""
spacing = str(matchobj.group(1))
basisfile = str(matchobj.group(2)).strip()
command = "%sPsiMod.add_user_basis_file(\"%s\")" % (spacing, basisfile)
return command
[docs]def process_filename(matchobj):
"""Function to process match of ``filename ...``."""
spacing = str(matchobj.group(1))
filename = str(matchobj.group(2)).strip()
command = "%sPsiMod.IO.shared_object().set_pid(\"%s\")" % (spacing, filename)
return command
[docs]def process_basis_block(matchobj):
"""Function to process match of ``basis name { ... }``."""
command_lines = re.split('\n', matchobj.group(2))
spacing = str(matchobj.group(1))
result = "%stemppsioman = PsiMod.IOManager.shared_object()" % spacing
result += "%spsi4tempscratchdir = temppsioman.get_file_path(100)" % spacing
basislabel = re.compile(r'\s*\[([-*\(\)\w]+)\]\s*')
# Start by looking for assign lines, and remove them
label_re = re.compile(r'^\s*assign\s*([A-Za-z]+\d+)\s+([-*\(\)\w]+)\s*(\w+)?\s*$')
symbol_re = re.compile(r'^\s*assign\s*([A-Za-z]+)\s+([-*\(\)\w]+)\s*(\w+)?\s*$')
all_re = re.compile(r'^\s*assign\s*([-*\(\)\w]+)\s*(\w+)?\s*$')
leftover_lines = []
for line in command_lines:
basistype = "BASIS"
if(label_re.match(line)):
m = label_re.match(line)
if m.group(3):
basistype = m.group(3).upper()
result += "%sPsiMod.get_active_molecule().set_basis_by_label(\"%s\",\"%s\",\"%s\")" % (spacing, m.group(1), m.group(2), basistype)
result += "%sPsiMod.set_global_option(\"%s\", \"CUSTOM\")" % (spacing, basistype)
elif(symbol_re.match(line)):
m = symbol_re.match(line)
if m.group(3):
basistype = m.group(3).upper()
result += "%sPsiMod.get_active_molecule().set_basis_by_symbol(\"%s\",\"%s\",\"%s\")" % (spacing, m.group(1), m.group(2), basistype)
result += "%sPsiMod.set_global_option(\"%s\", \"CUSTOM\")" % (spacing, basistype)
custom_basis = True
elif(all_re.match(line)):
m = all_re.match(line)
if m.group(2):
basistype = m.group(2).upper()
result += "%sPsiMod.get_active_molecule().set_basis_all_atoms(\"%s\",\"%s\")" % (spacing, m.group(1), basistype)
result += "%sPsiMod.set_global_option(\"%s\", \"%s\")" % (spacing, basistype, m.group(1))
custom_basis = False
else:
# Ignore blank lines
if (line and not line.isspace()):
leftover_lines.append(line)
# Now look for regular basis set definitions
basisstring = ""
for line in leftover_lines:
# Ignore blank/empty lines
m = re.match(basislabel, line)
if(m):
if(basisstring != ""):
result += "%spsi4tempbasisfile = psi4tempscratchdir + \"%s\"" % (spacing, basisname)
result += "%sPsiMod.add_user_basis_file(psi4tempbasisfile)" % (spacing)
result += "%stemppsioman.write_scratch_file(psi4tempbasisfile, \"\"\"\n%s\"\"\")" % (spacing, basisstring)
basisstring = ""
basisname = PsiMod.BasisSet.make_filename(m.group(1))
basisstring += line + "\n"
if(basisstring != ""):
result += "%spsi4tempbasisfile = psi4tempscratchdir + \"%s\"" % (spacing, basisname)
result += "%sPsiMod.add_user_basis_file(psi4tempbasisfile)" % (spacing)
result += "%stemppsioman.write_scratch_file(psi4tempbasisfile, \"\"\"\n%s\"\"\")" % (spacing, basisstring)
return result
[docs]def process_external_command(matchobj):
"""Function to process match of ``external name? { ... }``."""
spacing = str(matchobj.group(1))
name = str(matchobj.group(2))
if (not name or name.isspace()):
name = "extern"
block = str(matchobj.group(3))
lines = re.split('\n', block)
extern = "%sqmmm = QMMM()\n" % (spacing)
NUMBER = "((?:[-+]?\\d*\\.\\d+(?:[DdEe][-+]?\\d+)?)|(?:[-+]?\\d+\\.\\d*(?:[DdEe][-+]?\\d+)?))"
# Comments are all removed by this point
# 0. Remove blank lines
re_blank = re.compile(r'^\s*$')
lines2 = []
for line in lines:
mobj = re_blank.match(line)
if (mobj):
pass
else:
lines2.append(line)
lines = lines2
# 1. Look for units [ang|bohr|au|a.u.] defaults to ang
re_units = re.compile(r'^\s*units?[\s=]+((ang)|(angstrom)|(bohr)|(au)|(a\.u\.))$\s*', re.IGNORECASE)
units = 'ang'
lines2 = []
for line in lines:
mobj = re_units.match(line)
if (mobj):
unit = mobj.group(1)
if (unit == 'bohr' or unit == 'au' or unit == 'a.u.'):
units = 'bohr'
else:
units = 'ang'
else:
lines2.append(line)
lines = lines2
# 2. Look for basis basisname, defaults to cc-pvdz
# 3. Look for df_basis_scf basisname, defaults to cc-pvdz-jkfit
re_basis = re.compile(r'\s*basis[\s=]+(\S+)\s*$', re.IGNORECASE)
re_df_basis = re.compile(r'\s*df_basis_scf[\s=]+(\S+)\s*$', re.IGNORECASE)
basis = 'cc-pvdz'
df_basis_scf = 'cc-pvdz-jkfit'
lines2 = []
for line in lines:
mobj = re_basis.match(line)
if (mobj):
basis = mobj.group(1)
else:
mobj = re_df_basis.match(line)
if (mobj):
df_basis_scf = mobj.group(1)
else:
lines2.append(line)
lines = lines2
# 4. Look for charge lines Z x y z, convert according to unit convention
charge_re = re.compile(r'^\s*' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s*$')
lines2 = []
for line in lines:
mobj = charge_re.match(line)
if (mobj):
if (units == 'ang'):
extern += '%sqmmm.addChargeAngstrom(%s,%s,%s,%s)\n' % (spacing, mobj.group(1), mobj.group(2), mobj.group(3), mobj.group(4))
if (units == 'bohr'):
extern += '%sqmmm.addChargeBohr(%s,%s,%s,%s)\n' % (spacing, mobj.group(1), mobj.group(2), mobj.group(3), mobj.group(4))
else:
lines2.append(line)
lines = lines2
# 5. Look for diffuse regions, which are XYZ molecules seperated by the usual -- lines
spacer_re = re.compile(r'^\s*--\s*$')
frags = []
frags.append([])
for line in lines:
mobj = spacer_re.match(line)
if (mobj):
if (len(frags[len(frags) - 1])):
frags.append([])
else:
frags[len(frags) - 1].append(line)
extern += '%sextern_mol_temp = PsiMod.get_active_molecule()\n' % (spacing)
mol_re = re.compile(r'\s*\S+\s+' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s*$')
lines = []
for frag in frags:
if not len(frag):
continue
extern += '%sexternal_diffuse = geometry("""\n' % (spacing)
extern += '%s0 1\n' % (spacing)
for line in frag:
if not mol_re.match(line):
lines.append(line)
else:
extern += '%s%s\n' % (spacing, line)
extern += '%sunits %s\n' % (spacing, units)
extern += '%ssymmetry c1\n' % (spacing)
extern += '%sno_reorient\n' % (spacing)
extern += '%sno_com\n' % (spacing)
extern += '%s""")\n' % (spacing)
extern += "%sdiffuse = Diffuse(external_diffuse,'%s','%s')\n" % (spacing, basis, df_basis_scf)
extern += '%sdiffuse.fitScf()\n' % (spacing)
extern += '%sqmmm.addDiffuse(diffuse)\n' % (spacing)
extern += '\n'
extern += '%sPsiMod.set_active_molecule(extern_mol_temp)\n' % (spacing)
# 6. If there is anything left, the user messed up
if (len(lines)):
print('Input parsing for external {}: Extra line(s) present:')
for line in lines:
print(line)
sys.exit(1)
# Return is actually an ExternalPotential, not a QMMM
extern += '%sqmmm.populateExtern()\n' % (spacing)
extern += '%s%s = qmmm.extern\n' % (spacing, name)
return extern
[docs]def check_parentheses_and_brackets(input_string, exit_on_error):
"""Function to check that all parenthesis and brackets
in *input_string* are paired. On that condition, *exit_on_error* =1,
otherwise 0.
"""
# This returns 1 if the string's all matched up, 0 otherwise
import collections
# create left to right parenthesis mappings
lrmap = {"(":")", "[":"]", "{":"}"}
# derive sets of left and right parentheses
lparens = set(lrmap.keys())
rparens = set(lrmap.values())
parenstack = collections.deque()
all_matched = 1
for ch in input_string:
if ch in lparens:
parenstack.append(ch)
elif ch in rparens:
opench = ""
try:
opench = parenstack.pop()
except IndexError:
# Run out of opening parens
all_matched = 0
if exit_on_error:
print("Input error: extra %s" % ch)
sys.exit(1)
if lrmap[opench] != ch:
# wrong type of parenthesis popped from stack
all_matched = 0
if exit_on_error:
print("Input error: %s closed with a %s" % (opench, ch))
sys.exit(1)
if(len(parenstack) != 0):
all_matched = 0
if exit_on_error:
print("Input error: Unmatched %s" % parenstack.pop())
sys.exit(1)
return all_matched
[docs]def parse_multiline_array(input_list):
"""Function to squash multiline arrays into a single line
until all parentheses and brackets are fully paired.
"""
line = input_list.pop(0)
# Keep adding lines to the current one, until all parens match up
while not check_parentheses_and_brackets(line, 0):
thisline = input_list.pop(0).strip()
line += thisline
return "%s\n" % line
[docs]def process_multiline_arrays(inputfile):
"""Function to find array inputs that are spread across multiple
lines and squash them into a single line.
"""
# This function takes multiline array inputs, and puts them on a single line
# Start by converting the input to a list, splitting at newlines
input_list = inputfile.split("\n")
set_re = re.compile(r'^(\s*?)set\s+(?:([-,\w]+)\s+)?(\w+)[\s=]+\[.*', re.IGNORECASE)
newinput = ""
while len(input_list):
line = input_list[0]
if set_re.match(line):
# We've found the start of a set matrix [ .... line - hand it off for more checks
newinput += parse_multiline_array(input_list)
else:
# Nothing to do - just add the line to the string
newinput += "%s\n" % input_list.pop(0)
return newinput
if __name__ == "__main__":
result = process_input("""
molecule h2 {
H
H 1 R
R = .9
}
set basis 6-31G**
""")
print("Result\n==========================")
print(result)