#!/usr/bin/env python

#import readdat as rd
import matplotlib.pyplot as plt
import pandas as pd
import sys

def from_dat(f):
    """Read in a dakota tabular file and create a DataFrame."""  
    # Determine whether f is a string or file object. If the
    # former, assume it's a file name, and attempt to open it.
    if type(f) is str:
        # allow potential IOError ot be raised in calling context
        ifp = open(f,"r")
    elif type(f) is file:
        ifp = f
    else:
        raise TypeError("Expected either a file object or string.")
    # Assumed format is a header column followed by rows of data. The
    # first column is assumed to be an integer (the evalulation id), while
    # the remaining columns may contain either string or floating point values.
    headerRow = ifp.readline()
    columns = headerRow.split()
    numCols = len(columns)
    # Very basic error checking
    if columns[0] != "%eval_id" or numCols < 2:
        raise TypeError("%s does not appear to be a Dakota " % ifp.name + \
                " tabular graphics file.")
    # Read .dat file into a dictionary of lists. Keys are column labels, each column
    # is a list.
    data = dict.fromkeys(columns)
    for key in columns:
        data[key] = []
    for line in ifp:
        tokens = line.split()
        if len(tokens) != numCols:
            raise TypeError("%s does not appear to be a Dakota tabular " + \
                    "graphics file.")
        for col, token in zip(columns, tokens):
            data[col].append(token)
    # Determine types by attempting to convert to float. If successful, convert
    # all values for the column.
    for key, col in data.iteritems():
        if key == '%eval_id':
            data[key] = [int(d) for d in col]
        else:
            try:
                data[key] = [float(d) for d in col]
            except ValueError: # conversion to float failed.
                pass

    df = pd.DataFrame(data)
    # The columns of df are in alphabetical order, which probably isn't desirable.
    # reorder them according to the file ordering
    df = df[columns]
    return df

# Collect command line arguments.
if len(sys.argv) != 4:
    print >>sys.stderr,"Usage: %s dakota_tabular_file " % sys.argv[0] + \
            "comma_separated_variables comma_separated_resposnes"
    sys.exit(1)
tabularFilename = sys.argv[1]
variables = sys.argv[2].split(",")
responses = sys.argv[3].split(",")

# Read in tabular data
try:
    raw = from_dat(tabularFilename)
except IOError:
    print >>sys.stderr,"ERORR: Failed to open %s." % tabularFilename
    sys.exit(1)

# Confirm presence of requested variables and responses in the tabular file
c = set(raw.columns)
c.discard("%eval_id"); c.discard("interface")
for f in variables + responses:
    if f not in c:
        print >> sys.stderr, "ERROR: Variable or response '%s' not found in %s" %(f, tabularFilename)
        sys.exit(1)


for v in variables:
    for r in responses:
        f = plt.figure("Scatter - %s vs. %s" % (r,v))
        plt.scatter(raw[v],raw[r])
        plt.ylabel(r); plt.xlabel(v)
        plt.title("%s vs. %s" %(r,v))
        plt.savefig("%s-%s.png" %(r,v))
        f.show()

print "Press Enter to close plotting windows and exit.."
raw_input()





