[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gnue] r7616 - trunk/gnue-common/src/datasources
From: |
johannes |
Subject: |
[gnue] r7616 - trunk/gnue-common/src/datasources |
Date: |
Sat, 18 Jun 2005 02:44:58 -0500 (CDT) |
Author: johannes
Date: 2005-06-18 02:44:57 -0500 (Sat, 18 Jun 2005)
New Revision: 7616
Added:
trunk/gnue-common/src/datasources/readgsd.py
Log:
Added missing file to repository
Added: trunk/gnue-common/src/datasources/readgsd.py
===================================================================
--- trunk/gnue-common/src/datasources/readgsd.py 2005-06-17 15:24:00 UTC
(rev 7615)
+++ trunk/gnue-common/src/datasources/readgsd.py 2005-06-18 07:44:57 UTC
(rev 7616)
@@ -0,0 +1,724 @@
+# GNU Enterprise Common Library - Datasources - Read and import gsd-files
+#
+# Copyright 2001-2005 Free Software Foundation
+#
+# This file is part of GNU Enterprise
+#
+# GNU Enterprise is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public
+# License as published by the Free Software Foundation; either
+# version 2, or (at your option) any later version.
+#
+# GNU Enterprise is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+# PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public
+# License along with program; see the file COPYING. If not,
+# write to the Free Software Foundation, Inc., 59 Temple Place
+# - Suite 330, Boston, MA 02111-1307, USA.
+#
+# $Id$
+
+import os
+import re
+import sets
+import mx.DateTime.ISO
+
+from gnue.common.apps import errors
+from gnue.common.apps.GClientApp import *
+from gnue.common.datasources import GSchema, GDataSource
+from gnue.common.utils.FileUtils import openResource
+from gnue.common.apps.i18n import translate as _ # for epydoc
+
+# =============================================================================
+# Exceptions
+# =============================================================================
+
+class MissingTableError (errors.ApplicationError):
+ def __init__ (self, name):
+ msg = u_("Table '%(table)s' not found in the schema") % {'table': name}
+ errors.ApplicationError.__init__ (self, msg)
+
+class MissingKeyFieldError (errors.ApplicationError):
+ def __init__ (self, table, row, keySet, rowSet):
+ msg = u_("Key field(s) '%(fields)s' missing in row '%(row)s' of " \
+ "table '%(table)s'") \
+ % {'table' : table, 'row': row,
+ 'fields': ", ".join (keySet.difference (rowSet))}
+ errors.ApplicationError.__init__ (self, msg)
+
+class InvalidFieldsError (errors.ApplicationError):
+ def __init__ (self, table, row, fields):
+ msg = u_("Table '%(table)s' has no field(s) '%(fields)s'") \
+ % {'table': table, 'fields': ", ".join (fields)}
+ errors.ApplicationError.__init__ (self, msg)
+
+class CircularReferenceError (errors.ApplicationError):
+ def __init__ (self):
+ msg = u_("Tables have circular or unresolveable references")
+ errors.ApplicationError.__init__ (self, msg)
+
+class InvalidNumberError (errors.ApplicationError):
+ def __init__ (self, value, length, scale):
+ msg = u_("The value '%(value)s' is not a valid " \
+ "number (%(length)s.%(scale)s)") \
+ % {'value': value, 'length': length, 'scale': scale}
+ errors.ApplicationError.__init__ (self, msg)
+
+class OutOfRangeError (errors.ApplicationError):
+ def __init__ (self, value, length, scale):
+ msg = u_("The value '%(value)s' is out of range (%(length)s.%(scale)s)") \
+ % {'value': value, 'length': length, 'scale': scale}
+ errors.ApplicationError.__init__ (self, msg)
+
+class InvalidBooleanError (errors.ApplicationError):
+ def __init__ (self, value):
+ msg = u_("'%(value)s' is not a valid boolean value") % {'value': value}
+ errors.ApplicationError.__init__ (self, msg)
+
+class InvalidDateError (errors.ApplicationError):
+ def __init__ (self, value):
+ msg = u_("'%s' is not a vaild date, use 'YYYY-MM-DD' (ISO)") % value
+ errors.ApplicationError.__init__ (self, msg)
+
+class InvalidTimeError (errors.ApplicationError):
+ def __init__ (self, value):
+ msg = u_("'%s' is not a vaild time, use 'HH[:MM[:SS[.ss]]]' (ISO)") % value
+ errors.ApplicationError.__init__ (self, msg)
+
+class InvalidDateTimeError (errors.ApplicationError):
+ def __init__ (self, value):
+ msg = u_("'%s' is not a vaild date/time, use 'YYYY-MM-DD " \
+ "HH[:MM[:SS[.ss]]]' (ISO)") % value
+ errors.ApplicationError.__init__ (self, msg)
+
+class InvalidTypeError (errors.ApplicationError):
+ def __init__ (self, ftype):
+ msg = u_("'%s' is not a recognized field type") % ftype
+ errors.ApplicationError.__init__ (self, msg)
+
+
+# =============================================================================
+# Client application reading and importing GNUe Schema Definition files
+# =============================================================================
+
+class gsdClient (GClientApp):
+
+ NAME = "readgsd"
+ COMMAND = "readgsd"
+ VERSION = "0.1.0"
+ USAGE = "%s file [, file, ...]" % GClientApp.USAGE
+ SUMMARY = _("Import GNUe Schema Definition files into a given connection")
+
+
+ # ---------------------------------------------------------------------------
+ # Constructor
+ # ---------------------------------------------------------------------------
+
+ def __init__ (self, connections = None):
+
+ self.addCommandOption ('connection', 'c', argument='connectionname',
+ default = "gnue",
+ help = _("Use the connection <connectionname> for creating the
schema"))
+
+ self.addCommandOption ('output','o', argument='filename',
+ help = _("Also send the code for creating the schema to this file."))
+
+ self.addCommandOption ('file-only', 'f', default = False,
+ help = _("If this flag is set, only code is sent to the output file "
+ "and the schema is not created automatically."))
+
+ self.addCommandOption ('mode', 'm', argument='both|schema|data',
+ default = 'both',
+ help = _("Mode of operation. If mode is 'schema', only schema "
+ "creation is done. If mode is 'data' only data integration "
+ "is done."))
+
+ self.addCommandOption ('username', 'u', argument="user",
+ help = _("Set the username for the database. If the database is to be "
+ "created, this username will be it's owner."))
+
+ self.addCommandOption ('password', 'p', argument="password",
+ help = _("Set the password for the database."))
+
+ self.addCommandOption ('createdb', 'd', default = False,
+ help = _("If this option is set, the database will be created before "
+ "any schema creation is done. There must be a username "
+ "either from the given connection-configuration or from the "
+ "command line. This user becomes the owner of the database "
+ "and will be implicitly created."))
+
+ self.addCommandOption ('yes', 'y', default = False,
+ help = _("If this option is set, the program runs in batch-mode, "
+ "which means all questions are answered with 'yes' "
+ "automatically."))
+
+ GClientApp.__init__ (self, connections, 'schema', {})
+
+
+ # ---------------------------------------------------------------------------
+ # Run the import
+ # ---------------------------------------------------------------------------
+
+ def run (self):
+ """
+ """
+
+ self.__checkOptions ()
+ self.__loadInputFiles ()
+
+ if self.__doSchema:
+ self.__importSchema ()
+
+ if self.__doData:
+ self.__importData ()
+
+
+
+ # ---------------------------------------------------------------------------
+ # Check the command line arguments
+ # ---------------------------------------------------------------------------
+
+ def __checkOptions (self):
+
+ if not self.ARGUMENTS:
+ raise StartupError, u_("No input file specified.")
+
+ try:
+ self._files = []
+
+ for filename in self.ARGUMENTS:
+ self._files.append (openResource (filename))
+
+ except IOError:
+ raise StartupError, \
+ u_("Unable to open input file: %s") % errors.getException () [2]
+
+
+ self.outfile = self.OPTIONS ['output']
+
+ if self.OPTIONS ['file-only'] and self.outfile is None:
+ raise StartupError, \
+ u_("Output to file only requested, but no filename specified.")
+
+ self.__doSchema = self.OPTIONS ['mode'].lower () in ['both', 'schema']
+ self.__doData = self.OPTIONS ['mode'].lower () in ['both', 'data'] and \
+ not self.OPTIONS ['file-only']
+
+ if not (self.__doSchema or self.__doData):
+ raise StartupError, \
+ u_("Mode of operation must be one of 'both', 'schema' or 'data'.")
+
+ cName = self.OPTIONS ['connection']
+ self.connection = self.connections.getConnection (cName)
+
+ # if a username is given on the command line we pass both username and
+ # password to the connection parameters. If the password is not set, it
+ # defaults to 'gnue'.
+ username = self.connection.parameters.get ('username', 'gnue')
+ password = self.connection.parameters.get ('password', 'gnue')
+
+ if self.OPTIONS ['username'] is not None:
+ username = self.OPTIONS ['username']
+
+ if self.OPTIONS ['password'] is not None:
+ password = self.OPTIONS ['password']
+
+ self.connection.parameters ['username'] = username
+ self.connection.parameters ['password'] = password
+
+
+ # ---------------------------------------------------------------------------
+ # Load input files
+ # ---------------------------------------------------------------------------
+
+ def __loadInputFiles (self):
+
+ # Build up a single object tree from all input files
+ self._schema = None
+ for stream in self._files:
+ xmltree = GSchema.loadFile (stream)
+ if self._schema is None:
+ self._schema = xmltree
+ else:
+ self._schema.merge (xmltree)
+
+
+ # ---------------------------------------------------------------------------
+ # Import the given GSD files into the connection
+ # ---------------------------------------------------------------------------
+
+ def __importSchema (self):
+
+ if self.OPTIONS ['createdb']:
+ # Create a new database
+ if self.__ask (u_("You are about to create the new database '%s'. " \
+ "Continue") % self.connection.name,
+ [u_("y"), u_("n")], "n") == u_("n"):
+ return
+
+ self.connection.createDatabase ()
+
+ # Process schema information (if requested)
+ simulation = self.OPTIONS ['file-only']
+ if not simulation:
+ if self.__ask (u_("You are about to change the database '%s'. Continue")
\
+ % self.connection.name, [u_("y"), u_("n")], u_("n")) == u_("n"):
+ return
+
+ self.connections.loginToConnection (self.connection)
+
+ print o(u_("Updating schema ..."))
+ commands = self.connection.writeSchema (self._schema, simulation)
+
+ # Dump the commands to the output file (if requested)
+ if self.outfile is not None:
+ dest = open (self.outfile, 'w')
+ try:
+ for line in commands:
+ dest.write ("%s%s" % (line, os.linesep))
+
+ finally:
+ dest.close ()
+
+
+ # ---------------------------------------------------------------------------
+ # Import the given <data>
+ # ---------------------------------------------------------------------------
+
+ def __importData (self):
+
+ print o(u_("Updating data ..."))
+
+ # First fetch the current schema from the backend
+ self._current = self.connection.readSchema ()
+
+ tables = {}
+ pkeys = {}
+ fields = {}
+
+ # Then make sure to have valid key information for all tables
+ for tdata in self._schema.findChildrenOfType ('GSTableData', False, True):
+ table = self.__findTable (tdata.tablename)
+ tables [table.name.lower ()] = (table, tdata)
+ fields [table.name.lower ()] = sets.Set ([f.name.lower () for f in \
+ table.findChildrenOfType ('GSField', False, True)])
+
+ key = tdata.findChildOfType ('GSPrimaryKey')
+ if key is None:
+ key = self.__getKeyFromTable (table)
+ if key is not None:
+ GSchema.GSPrimaryKey (tdata).assign (key, True)
+
+ else:
+ q = u_("The table '%s' has no key defined. Shall i insert all rows")
\
+ % table.name
+ if self.__ask (q, [u_("y"), u_("n")], u_("n")) == u_("n"):
+ tdata.getParent ()._children.remove (tdata)
+ del tables [table.name.lower ()]
+
+ if key is not None:
+ pkeys [table.name.lower ()] = sets.Set ([f.name.lower () for f in \
+ key.findChildrenOfType ('GSPKField', False, True)])
+
+ # Since we have all key information available now, double check the rows
+ for item in self._schema.findChildrenOfType ('GSTableData', False, True):
+ keySet = pkeys.get (item.tablename.lower ())
+
+ for (n, r) in enumerate (item.findChildrenOfType ('GSRow',False,True)):
+ rowfields = sets.Set ([value.field.lower () \
+ for value in r.findChildrenOfType ('GSValue', False, True)])
+
+ # If the table has a key, are all keyfields available in the row
+ if keySet is not None and not keySet.issubset (rowfields):
+ raise MissingKeyFieldError, (item.tablename, n, keySet, rowfields)
+
+ # Are all fields in the row defined by the table
+ if not rowfields.issubset (fields [item.tablename.lower ()]):
+ raise InvalidFieldsError, \
+ (item.tablename, n, rowfields.difference (fields [item.tablename]))
+
+ # Order the tables so the do not violate constraints
+ references = {}
+ fishhooks = {}
+
+ for (table, tdata) in tables.values ():
+ deps = references.setdefault (table.name.lower (), [])
+
+ for fk in table.findChildrenOfType ('GSForeignKey', False, True):
+ fkname = fk.references.lower ()
+
+ if fkname == table.name.lower ():
+ fishhooks.setdefault (table.name.lower (), []).append (fk)
+
+ # Only add a dependency for a constraint, if we plan to add data for
+ # that table too
+ elif fkname in tables:
+ deps.append (fkname)
+
+ needCommit = False
+ for name in self.__orderByDependency (references):
+ (table, tdata) = tables [name]
+ if not name in pkeys:
+ needCommit |= self.__importAllInserts (table, tdata)
+ else:
+ needCommit |= self.__importTable (table, tdata, fishhooks.get (name))
+
+ if needCommit:
+ self.connection.commit ()
+
+
+ #
----------------------------------------------------------------------------
+ # Import a table having a primary key
+ #
----------------------------------------------------------------------------
+
+ def __importTable (self, table, tabledata, fishes):
+
+ fields = {}
+ for field in table.findChildrenOfType ('GSField', False, True):
+ fields [field.name.lower ()] = field
+
+ pkf = [f.name.lower () for f in \
+ tabledata.findChildrenOfType ('GSPKField', False, True)]
+ rows = {}
+ rowFields = {}
+ fishLookup = {}
+
+ for row in tabledata.findChildrenOfType ('GSRow', False, True):
+ record = {}
+ for value in row.findChildrenOfType ('GSValue'):
+ fname = value.field.lower ()
+ rowFields [fname] = True
+ record [fname] = self.__getValue (value, fields [fname])
+
+ pkey = tuple ([record [k] for k in pkf])
+ rows [pkey] = record
+
+ if fishes is not None:
+ for fkey in fishes:
+ ref = []
+ for fkfield in fkey.findChildrenOfType ('GSFKField'):
+ ref.append (record.get (fkfield.references.lower ()))
+
+ fishLookup.setdefault (fkey.name, {}) [tuple (ref)] = pkey
+
+
+ if fishes is not None:
+ sortdict = {}
+
+ for (key, data) in rows.items ():
+ deps = sortdict.setdefault (key, [])
+
+ for fkey in fishes:
+ k = tuple ([data.get (f.name.lower ()) for f in \
+ fkey.findChildrenOfType ('GSFKField')])
+ rkey = fishLookup [fkey.name].get (k)
+
+ if rkey is not None:
+ deps.append (rkey)
+
+ recOrder = self.__orderByDependency (sortdict)
+
+ else:
+ recOrder = rows.keys ()
+
+ # Build a datasource and insert the data
+ source = GDataSource.DataSourceWrapper (connections = self.connections,
+ attributes = {'name' : "dts_%s" % table.name,
+ 'connection': self.connection.name,
+ 'table' : table.name,
+ 'primarykey': ",".join (pkf)},
+ fields = rowFields.keys ())
+
+ # Build a mapping for the existing table, based on the primary key
+ existing = {}
+ resultSet = source.createResultSet ()
+
+ rec = resultSet.firstRecord ()
+ while rec is not None:
+ recKey = tuple ([rec [f] for f in pkf])
+ existing [recKey] = rec
+
+ rec = resultSet.nextRecord ()
+
+ # Now run over all datarows in the previously determined order
+ print o (u_(" updating table '%s' ...") % table.name)
+ doPost = False
+ new = 0
+ upd = 0
+
+ for key in recOrder:
+ if key in existing:
+ rs = existing [key]
+ changed = 0
+ for (field, value) in rows [key].items ():
+ if rs.getField (field) != value:
+ rs.setField (field, value)
+ changed = 1
+ upd += changed
+
+ else:
+ new += 1
+ newRec = resultSet.insertRecord ()
+ for (field, value) in rows [key].items ():
+ newRec.setField (field, value)
+
+ if new + upd:
+ resultSet.post ()
+
+ print o (u_(" Rows: %(ins)d inserted, %(upd)d updated, %(kept)d "
+ "unchanged.") \
+ % {'ins': new, 'upd': upd, 'kept': len (rows) - upd - new})
+
+ return (new + upd) > 0
+
+
+ # ---------------------------------------------------------------------------
+ # Import a table by inserting all it's rows
+ # ---------------------------------------------------------------------------
+
+ def __importAllInserts (self, table, tabledata):
+
+ fields = {}
+ for field in table.findChildrenOfType ('GSField', False, True):
+ fields [field.name.lower ()] = field
+
+ rows = []
+ rowFields = {}
+
+ for row in tabledata.findChildrenOfType ('GSRow', False, True):
+ record = {}
+ for value in row.findChildrenOfType ('GSValue'):
+ fname = value.field.lower ()
+ rowFields [fname] = True
+ record [fname] = self.__getValue (value, fields [fname])
+
+ rows.append (record)
+
+ if not rows:
+ return False
+
+ # Build a datasource and insert the data
+ source = GDataSource.DataSourceWrapper (connections = self.connections,
+ attributes = {'name' : "dts_%s" % table.name,
+ 'connection': self.connection.name,
+ 'table' : table.name},
+ fields = rowFields.keys ())
+
+ print o (u_(" inserting into table '%s' ...") % table.name)
+
+ resultSet = source.createEmptyResultSet ()
+ for record in rows:
+ new = resultSet.insertRecord ()
+ for (field, value) in record.items ():
+ new.setField (field, value)
+
+ resultSet.post ()
+
+ print o (u_(" Rows: %(ins)d inserted") % {'ins': len (rows)})
+
+ return True
+
+
+ # ---------------------------------------------------------------------------
+ # Order a given dependency tree
+ # ---------------------------------------------------------------------------
+
+ def __orderByDependency (self, depTree):
+
+ result = []
+
+ while depTree:
+ addition = []
+
+ for (key, deps) in depTree.items ():
+ # If a key has no dependencies, add it to the result
+ if not len (deps):
+ addition.append (key)
+
+ # and remove that key from all other dependency sequences
+ for otherDeps in depTree.values ():
+ if key in otherDeps:
+ otherDeps.remove (key)
+
+ # finally remove it from the dictionary
+ del depTree [key]
+
+ # If no key without a dependency was found, but there are still
+ # entries in the tree, they *must* have circular references
+ if not addition and depTree:
+ raise CircularReferenceError
+
+ result.extend (addition)
+
+ return result
+
+
+ # ---------------------------------------------------------------------------
+ # Get a native python value from a GSValue instance using a given GSField
+ # ---------------------------------------------------------------------------
+
+ def __getValue (self, value, field):
+
+ ftype = field.type.lower ()
+ contents = value.getChildrenAsContent ()
+
+ # unquote the contents if it is quoted
+ if len (contents) > 1 and contents [0] in ["'", '"']:
+ if contents [-1] == contents [0]:
+ contents = contents [1:-1]
+
+ # If no value is given, just return None
+ if not len (contents):
+ return None
+
+ length = hasattr (field, 'length') and field.length or 0
+ scale = hasattr (field, 'precision') and field.precision or 0
+
+ # return string type fields with an optional length restriction
+ if ftype == 'string':
+ maxlen = length and length or len (contents)
+ return contents [:maxlen]
+
+ # Try to convert a numeric field according to length and scale
+ elif ftype == 'number':
+ value = contents.strip ()
+
+ if value in [u'TRUE', u'FALSE']:
+ return int (value == u'TRUE')
+
+ elif length or scale:
+ vmatch = re.compile ('^([+-]{0,1})(\d+)[\.]{0,1}(\d*)$').match (value)
+ if vmatch is None:
+ raise InvalidNumberError, (value, length, scale)
+
+ (sign, pre, frac) = vmatch.groups ()
+ if len (pre) > (length - scale) or len (frac) > scale:
+ OutOfRangeError, (value, length, scale)
+
+ if len (frac):
+ return float ("%s%s.%s" % (sign, pre, frac))
+
+ else:
+ return int ("%s%s" % (sign, pre))
+
+ # we don't know anything about precision
+ else:
+ if '.' in value or ',' in value:
+ return float (value)
+
+ else:
+ return int (value)
+
+ # booleans must be 'TRUE' or 'FALSE', otherwise they're None
+ elif ftype == 'boolean':
+ bool = contents.upper ().strip ()
+ if bool in ['TRUE', 'FALSE']:
+ return bool == 'TRUE'
+ else:
+ raise InvalidBooleanError, bool
+
+ # Dates must conform with the ISO spec: YYYY-MM-DD
+ elif ftype == 'date':
+ try:
+ return mx.DateTime.ISO.ParseDate (contents.strip ())
+
+ except ValueError:
+ raise InvalidDateError, contents.strip ()
+
+ # Times must conform with the ISO spec: HH:[MM:[:SS[.ss]]]
+ elif ftype == 'time':
+ try:
+ return mx.DateTime.ISO.ParseTime (contents.strip ())
+
+ except ValueError:
+ raise InvalidTimeError, contents.strip ()
+
+ elif ftype == 'datetime':
+ try:
+ return mx.DateTime.ISO.ParseDateTime (contents.strip ())
+
+ except ValueError:
+ raise InvalidDateTimeError, contents.strip ()
+
+ else:
+ raise InvalidTypeError, ftype
+
+
+ #
----------------------------------------------------------------------------
+ # Find a given GSTable instance in the current schema
+ #
----------------------------------------------------------------------------
+
+ def __findTable (self, name):
+
+ for item in self._current.findChildrenOfType ('GSTable', False, True):
+ if item.name.lower () == name.lower ():
+ return item
+
+ raise MissingTableError, (name)
+
+
+ #
----------------------------------------------------------------------------
+ # Get a usable key from the given table
+ #
----------------------------------------------------------------------------
+
+ def __getKeyFromTable (self, table):
+
+ # Is there a PK defined in the backend schema ?
+ pk = table.findChildOfType ('GSPrimaryKey')
+ if pk is not None:
+ return pk
+
+ # Maybe we could use a 'unique index' to as key, since it has the same
+ # nature as a primary key. But then we have to check wether all <rows>
+ # have all fields used by the index available. This is left for a future
+ # version though :)
+
+ return None
+
+
+ # ---------------------------------------------------------------------------
+ # Ask a question with a set of valid options and a default
+ # ---------------------------------------------------------------------------
+
+ def __ask (self, question, options, default):
+ """
+ Ask for a question, allowing a set of answers, using a default-value if the
+ user just presses <Enter>.
+
+ @param question: string with the question to ask
+ @param options: sequence of allowed options, i.e. ['y', 'n']
+ @param default: string with the default option
+
+ @return: string with the option selected
+ """
+
+ if self.OPTIONS ['yes']:
+ return u_("y")
+
+ answer = None
+ default = default.lower ()
+ lopts = [opt.lower () for opt in options]
+
+ dopts = []
+ for item in lopts:
+ dopts.append (item == default and item.upper () or item)
+
+ while True:
+ print o(question), o("[%s]:" % ",".join (dopts)),
+ answer = raw_input ().lower () or default
+
+ if answer in lopts:
+ break
+
+ return answer
+
+
+# =============================================================================
+# Main program
+# =============================================================================
+
+if __name__ == '__main__':
+ gsdClient ().run ()
Property changes on: trunk/gnue-common/src/datasources/readgsd.py
___________________________________________________________________
Name: svn:keywords
+ Id
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [gnue] r7616 - trunk/gnue-common/src/datasources,
johannes <=