#!/usr/bin/env python

#import readdat as rd
import matplotlib.pyplot as plt
import pandas as pd
import sys

def from_dat(f):
    """Read in a dakota tabular file and create a DataFrame."""  
    # Determine whether f is a string or file object. If the
    # former, assume it's a file name, and attempt to open it.
    if type(f) is str:
        # allow potential IOError ot be raised in calling context
        ifp = open(f,"r")
    elif type(f) is file:
        ifp = f
    else:
        raise TypeError("Expected either a file object or string.")
    # Assumed format is a header column followed by rows of data. The
    # first column is assumed to be an integer (the evalulation id), while
    # the remaining columns may contain either string or floating point values.
    headerRow = ifp.readline()
    columns = headerRow.split()
    numCols = len(columns)
    # Very basic error checking
    if columns[0] != "%eval_id" or numCols < 2:
        raise TypeError("%s does not appear to be a Dakota " % ifp.name + \
                " tabular graphics file.")
    # Read .dat file into a dictionary of lists. Keys are column labels, each column
    # is a list.
    data = dict.fromkeys(columns)
    for key in columns:
        data[key] = []
    for line in ifp:
        tokens = line.split()
        if len(tokens) != numCols:
            raise TypeError("%s does not appear to be a Dakota tabular " + \
                    "graphics file.")
        for col, token in zip(columns, tokens):
            data[col].append(token)
    # Determine types by attempting to convert to float. If successful, convert
    # all values for the column.
    for key, col in data.iteritems():
        if key == '%eval_id':
            data[key] = [int(d) for d in col]
        else:
            try:
                data[key] = [float(d) for d in col]
            except ValueError: # conversion to float failed.
                pass

    df = pd.DataFrame(data)
    # The columns of df are in alphabetical order, which probably isn't desirable.
    # reorder them according to the file ordering
    df = df[columns]
    return df

# Collect command line arguments.
if len(sys.argv) != 4:
    print >>sys.stderr,"Usage: %s dakota_tabular_file " % sys.argv[0] + \
            "comma_separated_variables comma_separated_resposnes"
    sys.exit(1)
tabularFilename = sys.argv[1]
variables = sys.argv[2].split(",")
responses = sys.argv[3].split(",")

# Read in tabular data
try:
    raw = from_dat(tabularFilename)
except IOError:
    print >>sys.stderr,"ERORR: Failed to open %s." % tabularFilename
    sys.exit(1)

# Confirm presence of requested variables and responses in the tabular file
c = set(raw.columns)
c.discard("%eval_id"); c.discard("interface")
for f in variables + responses:
    if f not in c:
        print >> sys.stderr, "ERROR: Variable or response '%s' not found in %s" %(f, tabularFilename)
        sys.exit(1)
# A properly formatted tabular file for a centered pstudy begins with the eval
# at the nominal values. Then, each variable is varied one at a time.
# The number of steps can differ from variable to variable, but must be odd.
# Extracting the steps for a particular variable could be achieved in a variety
# of imaginative ways. Here I will filter using the unique values of each variable.
# This is tricky because one of the unique values is the nominal value, which does
# not correspond to just one eval. However, because the nominal eval comes first, I
# can filter using every unique value EXCEPT the nominal one, then append the nominal
# eval to the result.
nominal = raw.loc[0,:]
for v in variables:
    uniqueVals = raw[v].unique()
    # the first value in uniqueVals must be the nominal value
    # This is probably not guaranteed to be true, so cross your fingers that it doesn't
    # change. :)
    if nominal[v] != uniqueVals[0]:
        print >> sys.stderr,"ERROR: eval at nominal values must appear first." % tabularFilename
        sys.exit(1)
    # extract v + the requested responses at the steps, append the appropriate columns
    # from the nominal evaluation, and sort by variable value
    steps = raw[ [v]+responses ][raw[v].isin(uniqueVals[1:])]
    steps = steps.append(nominal[ [v] + responses],ignore_index=True)
    steps = steps.sort(v)
    if len(steps) % 2 == 0: # basic test: must have odd number of rows
        print >> sys.stderr,"ERROR: Unexpected number of steps for '%s'." % v
        sys.exit(1)
    # figure out sensible x-axis limits and the nomial point at which to draw
    # a vertical line
    xmin = steps[v].iloc[0]
    xmax = steps[v].iloc[-1]
    xmin, xmax = xmin - 0.1*(xmax-xmin), xmax+0.1*(xmax-xmin)
    center = nominal[v]
    ## Loop over responses and create plots
    for r in responses:
        f = plt.figure("Centered Parameter Study - %s vs. %s" % (r,v))
        plt.plot(steps[v],steps[r],"+-")
        plt.ylabel(r); plt.xlabel(v)
        plt.title("%s vs. %s" %(r,v))
        plt.xlim(xmin,xmax)
        plt.axvline(center,color="black",linestyle="--")
        plt.savefig("%s-%s.png" %(r,v))
        f.show()

print "Press Enter to close plotting windows and exit.."
raw_input()





