# =============================================================================
# database.py - A Python interface to an SQL database
# Copyright (C) 2000, 2001 Ole Moller Nielsen & Peter Christen
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License (http://www.gnu.org/copyleft/gpl.html)
#    for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
#
#    
# Contact address: Ole.Nielsen@bigfoot.com
#                  Peter.Christen@anu.edu.au
#
# Version 1.0 March 2001
# Version 1.0.1 May 2001 [PC fixed bug in simple_query: forgotten mydb.close()]
# =============================================================================


# -----------------------------------------------------------------------------
# Options directory with default values - to be set by user

options = {
  'database_dir':  'my_db_dir/',       # Location of database
  'database_name': 'my_db_name',       # Database name
  'user_name':     'my_user',          # User name
  'user_password': 'my_password',      # User password
  'working_dir':   'my_working_dir/',  # Working directory for parallel cache 
  'block_size':    'my_block_size'     # Number of transactions for blocking
}

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

def set_option(key, value):
  """Function to set values in the options directory.

  USAGE:
    set_option(key, value)

  ARGUMENTS:
    key --   Key in options dictionary. (Required)
    value -- New value for key. (Required)

  DESCRIPTION:
    Function to set values in the options directory.
    Raises an exception if key is not in options.
  """

  from os import sep

  if (key=='database_dir') or (key=='working_dir'):
    if value[-1] != sep:
      value = value+sep  # Add separator for directories

  if options.has_key(key):
    options[key] = value
  else:
    raise KeyError(key)  # Key not found, raise an exception

# -----------------------------------------------------------------------------

def exec_query(queries, parallel=1, caching=0, dependencies=None,
               workdir=None):
  """Execute any valid SQL query or list of queries.

  USAGE:
    result_list = exec_query(queries, parallel, caching, dependencies, workdir)

  ARGUMENTS:
    queries --      A list of SQL queries. (Required)
    parallel --     Attempt parallel execution. (Default: 1)
    caching --      Cache final result for efficiency of subsequent calls.
                    (Default: 0)
    dependencies -- If present dependencies must be a list of files that the
                    queries depend explicitly on. If any file in this list is
                    newer than the cached queries, they are recomputed.
                    (Default: None)
    workdir --      Specifies where temporary cache files are to be stored.
                    (Default: Takes value from options directory)

  DESCRIPTION:
    This is the user interface to the database module. It executes any valid
    MySQL query using the default database, user and password. The argument
    'queries' can be a list of queries which are executed in parallel on
    multiprocessor architectures. Multiple queries are returned as a list of
    results.
  """

  # Ideas:
  #  
  # Let user specify number of processors and run multiple queries sequentially
  # on each of those. Can be done by calling exec_query recursively with
  # parallel=0.

  if not workdir:  # Use default working directory if not specified otherwise
    workdir = options['working_dir']

  if caching:
    from caching import cache

    argtup = (queries, parallel, workdir)
    result = cache(multi_query, argtup, verbose=0, dependencies=dependencies)

  else:
    result = multi_query(queries, parallel, workdir)

  return(result)

# -----------------------------------------------------------------------------

def get_table_list():
  """Return list of tables names in current database.

  USAGE:
    table_list = get_table_list()

  ARGUMENTS:
    None

  DESCRIPTION:
    This function extracts a list of tables of the currently used database and
    returns it as a list of strings.
  """

  result = simple_query('SHOW tables')
  def a(x): return("%s" % (x))
  return(map(a,result))

# -----------------------------------------------------------------------------

def get_table_attributes(table_list):
  """Return dictionary with attributes for tables (which are keys).

  USAGE:
    attr_dict = get_table_attributes(table_list)

  ARGUMENTS:
    table_list -- A list of database tables. (Required)

  DESCRIPTION:
    This function creates a dictionary of tables and their according
    attributes. If you reference a table, you will get the list of
    attributes belonging to that table.
  """

  from string import lower

  table_dict = {}
  for table in table_list:
    result = simple_query('DESCRIBE '+table)
    dict = []
    for l in result:
      dict.append(lower(l[0]))
    table_dict[table] = dict
  return(table_dict)

#==============================================================================
# Auxiliary functions
#==============================================================================
  
def multi_query(queries, parallel=1, workdir=None):
  """Core function for parallel database querying.

  USAGE:
    result_list = multi_query(queries, parallel=1, workdir=None)

  ARGUMENTS:
    queries --  A list of SQL queries. (Required)
    parallel -- Attempt parallel execution. (Default: 1)
    workdir --  Specifies where temporary cache files are to be stored.
                (Default: Takes value from options directory)

  DESCRIPTION:
    Used by function exec_query, see documentation for the latter.
  """

  from os import fork
  from sys import exit
  import types
  from caching import cache, checkdir

  if not workdir:  # Use default working directory if not specified otherwise
    workdir = options['working_dir']

  # Do not compress parallel queries, because of the danger of memory overflow
  # when they are decompressed.
  #
  compress = 0

  wd = checkdir(workdir)  # Create parallel work directory if needed

  if type(queries) != types.ListType:  # Make sure it's a list
    queries = [queries]

  num_queries = len(queries)
  if (parallel and num_queries > 1):

    # Run all queries in parallel and cache results for subsequent retrieval
    # by the master process. This loop does *not* rely on previously cached
    # results, it will always query the database. This choice has been made
    # to ensure integrity of data.
    #
    n = 0
    for query in queries:
      n = n+1

      if n < num_queries:
        # Spawn separate process for parallel execution (worker processes)
        #
        pid = fork()
        if pid == 0:

	  # Execute query in worker process
  	  #
          ll = cache(simple_query, query, evaluate=1, verbose=0, \
                     compression=compress, cachedir=wd)
          exit()

      else:
        # Run last query on master process
        #
        ll = cache(simple_query, query, evaluate=1, verbose=0, \
                   compression=compress, cachedir=wd)

    # Now retrieve cached results and return them in a list
    #
    result = []
    for query in queries:
      ll = cache(simple_query, query, evaluate=0, verbose=0, cachedir=wd)
      result.append(ll)

    # Clean-up temporary cached parallel queries
    #
    cache(simple_query, 'clear', verbose=1, cachedir=wd)

  else:
    # Sequential version
    #
    result = []
    for query in queries:
      ll = simple_query(query)
      result.append(ll)
    if num_queries == 1:
      result = result[0]

  return(result)

# -----------------------------------------------------------------------------

def simple_query(query):
  """Execute a general SQL query.

  USAGE:
    result_list = simple_query(query)

  ARGUMENTS:
    query -- A valid SQL query. (Required)

  DESCRIPTION:
    Execute any valid SQL query using the default database, user and password.
    This routine has been tested with MySQLdb 0.2.2
  """

  import MySQLdb

  database = options['database_name']
  username = options['user_name']
  password = options['user_password']

  mydb=MySQLdb.Connect(db=database, user=username, passwd=password)

  cu = mydb.cursor()

  lines = cu.execute(query)
  if lines > 0:
    result = cu.fetchall()
  else:
    result = []  # fetchall returns an empty tuple () if now rows are returned

  mydb.close()
  return(result)

# -----------------------------------------------------------------------------

