Logo Search packages:      
Sourcecode: weka version File versions

CheckEstimator.java

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    CheckEstimator.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.estimators;

import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;

import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/**
 * Class for examining the capabilities and finding problems with 
 * estimators. If you implement a estimator using the WEKA.libraries,
 * you should run the checks on it to ensure robustness and correct
 * operation. Passing all the tests of this object does not mean
 * bugs in the estimator don't exist, but this will help find some
 * common ones. <p/>
 * 
 * Typical usage: <p/>
 * <code>java weka.estimators.CheckEstimator -W estimator_name 
 * estimator_options </code><p/>
 * 
 * This class uses code from the CheckEstimatorClass
 * ATTENTION! Current estimators can only 
 * 1. split on a nominal class attribute
 * 2. build estimators for nominal and numeric attributes
 * 3. build estimators independendly of the class type
 * The functionality to test on other class and attribute types
 * is left in big parts in the code. 
 * 
 * CheckEstimator reports on the following:
 * <ul>
 *    <li> Estimator abilities 
 *      <ul>
 *         <li> Possible command line options to the estimator </li>
 *         <li> Whether the estimator can predict nominal, numeric, string, 
 *              date or relational class attributes. Warnings will be displayed if 
 *              performance is worse than ZeroR </li>
 *         <li> Whether the estimator can be trained incrementally </li>
 *         <li> Whether the estimator can build estimates for numeric attributes </li>
 *         <li> Whether the estimator can handle nominal attributes </li>
 *         <li> Whether the estimator can handle string attributes </li>
 *         <li> Whether the estimator can handle date attributes </li>
 *         <li> Whether the estimator can handle relational  attributes </li>
 *         <li> Whether the estimator build estimates for multi-instance data </li>
 *         <li> Whether the estimator can handle missing attribute values </li>
 *         <li> Whether the estimator can handle missing class values </li>
 *         <li> Whether a nominal estimator only handles 2 class problems </li>
 *         <li> Whether the estimator can handle instance weights </li>
 *      </ul>
 *    </li>
 *    <li> Correct functioning 
 *      <ul>
 *         <li> Correct initialisation during addvalues (i.e. no result
 *              changes when addValues called repeatedly) </li>
 *         <li> Whether incremental training produces the same results
 *              as during non-incremental training (which may or may not 
 *              be OK) </li>
 *         <li> Whether the estimator alters the data pased to it 
 *              (number of instances, instance order, instance weights, etc) </li>
 *      </ul>
 *    </li>
 *    <li> Degenerate cases 
 *      <ul>
 *         <li> building estimator with zero training instances </li>
 *         <li> all but one attribute attribute values missing </li>
 *         <li> all attribute attribute values missing </li>
 *         <li> all but one class values missing </li>
 *         <li> all class values missing </li>
 *      </ul>
 *    </li>
 * </ul>
 * Running CheckEstimator with the debug option set will output the 
 * training and test datasets for any failed tests.<p/>
 *
 * The <code>weka.estimators.AbstractEstimatorTest</code> uses this
 * class to test all the estimators. Any changes here, have to be 
 * checked in that abstract test class, too. <p/>
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -D
 *  Turn on debugging output.</pre>
 * 
 * <pre> -S
 *  Silent mode - prints nothing to stdout.</pre>
 * 
 * <pre> -N &lt;num&gt;
 *  The number of instances in the datasets (default 100).</pre>
 * 
 * <pre> -W
 *  Full name of the estimator analysed.
 *  eg: weka.estimators.NormalEstimator</pre>
 * 
 * <pre> 
 * Options specific to estimator weka.estimators.NormalEstimator:
 * </pre>
 * 
 * <pre> -D
 *  If set, estimator is run in debug mode and
 *  may output additional info to the console</pre>
 * 
 <!-- options-end -->
 *
 * Options after -- are passed to the designated estimator.<p/>
 *
 * @author Len Trigg (trigg@cs.waikato.ac.nz)
 * @author FracPete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 1.5 $
 * @see TestInstances
 */
00143 public class CheckEstimator implements OptionHandler, RevisionHandler {

  /*
   * Note about test methods:
   * - methods return array of booleans
   * - first index: success or not
   * - second index: acceptable or not (e.g., Exception is OK)
   * - in case the performance is worse than that of ZeroR both indices are true
   *
   * FracPete (fracpete at waikato dot ac dot nz)
   */
  
  /** a class for postprocessing the test-data 
   */
00157   public class PostProcessor
    implements RevisionHandler {
    /**
     * Provides a hook for derived classes to further modify the data. Currently,
     * the data is just passed through.
     * 
     * @param data      the data to process
     * @return          the processed data
     */
00166     protected Instances process(Instances data) {
      return data;
    }
    
    /**
     * Returns the revision string.
     * 
     * @return          the revision
     */
00175     public String getRevision() {
      return RevisionUtils.extract("$Revision: 1.5 $");
    }
  }
  
  /*** The estimator to be examined */
  protected Estimator m_Estimator = (Estimator) new weka.estimators.NormalEstimator(0.000001);
  
  /** The options to be passed to the base estimator. */
00184   protected String[] m_EstimatorOptions;
  
  /** The results of the analysis as a string */
00187   protected String m_AnalysisResults;
  
  /** Debugging mode, gives extra output if true */
00190   protected boolean m_Debug = false;
  
  /** Silent mode, for no output at all to stdout */
00193   protected boolean m_Silent = false;
  
  /** The number of instances in the datasets */
00196   protected int m_NumInstances = 100;
  
  /** for post-processing the data even further */
00199   protected PostProcessor m_PostProcessor = null;
  
  /** whether classpath problems occurred */
00202   protected boolean m_ClasspathProblems = false;
  
  /**
   * class that contains info about the attribute types the estimator can estimate
   * estimator work on one attribute only
   */
00208   public static class AttrTypes
    implements RevisionHandler {
    
    boolean nominal = false;
    boolean numeric = false; 
    boolean string = false;
    boolean date = false;
    boolean relational = false;
      
    AttrTypes() {
    }

    AttrTypes (AttrTypes newTypes) {
      nominal = newTypes.nominal;
      numeric = newTypes.numeric;
      string = newTypes.string;
      date = newTypes.date;
      relational = newTypes.relational;
    }
                  
    AttrTypes (int type) {
      if (type == Attribute.NOMINAL) nominal = true;
      if (type == Attribute.NUMERIC) numeric = true;
      if (type == Attribute.STRING) string = true;
      if (type == Attribute.DATE) date = true;
      if (type == Attribute.RELATIONAL) relational = true;
    }

    int getSetType() throws Exception {               
      int sum = 0;
      int type = -1;
      if (nominal) { sum ++; type = Attribute.NOMINAL; }
      if (numeric) { sum ++; type = Attribute.NUMERIC; }
      if (string) { sum ++; type = Attribute.STRING; }
      if (date) { sum ++; type = Attribute.DATE; }
      if (relational) { sum ++; type = Attribute.RELATIONAL; }
      if (sum > 1)
      throw new Exception("Expected to have only one type set used wrongly.");
      if (type < 0)
      throw new Exception("No type set.");
      return type;
    }

    boolean oneIsSet() {
      return (nominal || numeric || string || date || relational);
    }

    public Vector getVectorOfAttrTypes() {
      Vector attrs = new Vector();
      if (nominal) attrs.add(new Integer(Attribute.NOMINAL));
      if (numeric) attrs.add(new Integer(Attribute.NUMERIC));
      if (string) attrs.add(new Integer(Attribute.STRING));
      if (date) attrs.add(new Integer(Attribute.DATE));
      if (relational) attrs.add(new Integer(Attribute.RELATIONAL));
      return attrs;
    }   
    
    /**
     * Returns the revision string.
     * 
     * @return          the revision
     */
00270     public String getRevision() {
      return RevisionUtils.extract("$Revision: 1.5 $");
    }
  }

  /**
   * public class that contains info about the chosen attribute type
   * estimator work on one attribute only
   */
00279   public static class EstTypes
    implements RevisionHandler {
    
    boolean incremental = false;
    boolean weighted = false;
    boolean supervised = false;

    /**
     * Constructor
     */
00289     public EstTypes () {
    }

    /**
     * Constructor
     */
00295     public EstTypes (boolean i, boolean w, boolean s) {
      incremental = i;
      weighted    = w;
      supervised  = s;
    }
    
    /**
     * Returns the revision string.
     * 
     * @return          the revision
     */
00306     public String getRevision() {
      return RevisionUtils.extract("$Revision: 1.5 $");
    }
  }

  /**
   * Returns an enumeration describing the available options.
   *
   * @return an enumeration of all the available options.
   */
00316   public Enumeration listOptions() {
    
    Vector newVector = new Vector(2);
    
    newVector.addElement(new Option(
        "\tTurn on debugging output.",
        "D", 0, "-D"));
    
    newVector.addElement(new Option(
        "\tSilent mode - prints nothing to stdout.",
        "S", 0, "-S"));
    
    newVector.addElement(new Option(
        "\tThe number of instances in the datasets (default 100).",
        "N", 1, "-N <num>"));
    
    newVector.addElement(new Option(
        "\tFull name of the estimator analysed.\n"
        +"\teg: weka.estimators.NormalEstimator",
        "W", 1, "-W"));
    
    if ((m_Estimator != null) 
        && (m_Estimator instanceof OptionHandler)) {
      newVector.addElement(new Option("", "", 0, 
          "\nOptions specific to estimator "
          + m_Estimator.getClass().getName()
          + ":"));
      Enumeration enu = ((OptionHandler)m_Estimator).listOptions();
      while (enu.hasMoreElements())
        newVector.addElement(enu.nextElement());
    }
    
    return newVector.elements();
  }
  
  /**
   * Parses a given list of options. 
   *
   <!-- options-start -->
   * Valid options are: <p/>
   * 
   * <pre> -D
   *  Turn on debugging output.</pre>
   * 
   * <pre> -S
   *  Silent mode - prints nothing to stdout.</pre>
   * 
   * <pre> -N &lt;num&gt;
   *  The number of instances in the datasets (default 100).</pre>
   * 
   * <pre> -W
   *  Full name of the estimator analysed.
   *  eg: weka.estimators.NormalEstimator</pre>
   * 
   * <pre> 
   * Options specific to estimator weka.estimators.NormalEstimator:
   * </pre>
   * 
   * <pre> -D
   *  If set, estimator is run in debug mode and
   *  may output additional info to the console</pre>
   * 
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
00383   public void setOptions(String[] options) throws Exception {
    String      tmpStr;
    
    setDebug(Utils.getFlag('D', options));
    
    setSilent(Utils.getFlag('S', options));
    
    tmpStr = Utils.getOption('N', options);
    if (tmpStr.length() != 0)
      setNumInstances(Integer.parseInt(tmpStr));
    else
      setNumInstances(100);
    
    tmpStr = Utils.getOption('W', options);
    if (tmpStr.length() == 0)
      throw new Exception("A estimator must be specified with the -W option.");
    setEstimator(Estimator.forName(tmpStr, Utils.partitionOptions(options)));
  }
  
  /**
   * Gets the current settings of the CheckEstimator.
   *
   * @return an array of strings suitable for passing to setOptions
   */
00407   public String[] getOptions() {
    Vector        result;
    String[]      options;
    int           i;
    
    result = new Vector();
    
    if (getDebug())
      result.add("-D");
    
    if (getSilent())
      result.add("-S");
    
    result.add("-N");
    result.add("" + getNumInstances());
    
    if (getEstimator() != null) {
      result.add("-W");
      result.add(getEstimator().getClass().getName());
    }
    
    if ((m_Estimator != null) && (m_Estimator instanceof OptionHandler))
      options = ((OptionHandler) m_Estimator).getOptions();
    else
      options = new String[0];
    
    if (options.length > 0) {
      result.add("--");
      for (i = 0; i < options.length; i++)
        result.add(options[i]);
    }
    
    return (String[]) result.toArray(new String[result.size()]);
  }
  
  /**
   * sets the PostProcessor to use
   * 
   * @param value the new PostProcessor
   * @see #m_PostProcessor
   */
00448   public void setPostProcessor(PostProcessor value) {
    m_PostProcessor = value;
  }
  
  /**
   * returns the current PostProcessor, can be null
   * 
   * @return            the current PostProcessor
   */
00457   public PostProcessor getPostProcessor() {
    return m_PostProcessor;
  }
  
  /**
   * returns TRUE if the estimator returned a "not in classpath" Exception
   * 
   * @return      true if CLASSPATH problems occurred
   */
00466   public boolean hasClasspathProblems() {
    return m_ClasspathProblems;
  }
  
  /**
   * Begin the tests, reporting results to System.out
   */
00473   public void doTests() {
    
    if (getEstimator() == null) {
      println("\n=== No estimator set ===");
      return;
    }
    println("\n=== Check on Estimator: "
        + getEstimator().getClass().getName()
        + " ===\n");
    
    m_ClasspathProblems = false;

    // Start tests with test for options
    canTakeOptions();

    // test what type of estimator it is 
    EstTypes estTypes = new EstTypes();
    estTypes.incremental = incrementalEstimator()[0];
    estTypes.weighted = weightedInstancesHandler()[0];
    estTypes.supervised = supervisedEstimator()[0];
   
    // in none of the estimators yet the functionality is depending on the class type
    // since this could change the basic structure taken from checkclassifiers is kept here
    int classType = Attribute.NOMINAL;
    AttrTypes attrTypes = testsPerClassType(classType, estTypes);
    
 
    // only nominal class can be split up so far
    canSplitUpClass(attrTypes, classType);
 }
  
  
  /**
   * Set debugging mode
   *
   * @param debug true if debug output should be printed
   */
00510   public void setDebug(boolean debug) {
    m_Debug = debug;

    // disable silent mode, if necessary
    if (getDebug())
      setSilent(false);
  }
  
  /**
   * Get whether debugging is turned on
   *
   * @return true if debugging output is on
   */
00523   public boolean getDebug() {
    return m_Debug;
  }
  
  /**
   * Set slient mode, i.e., no output at all to stdout
   *
   * @param value whether silent mode is active or not
   */
00532   public void setSilent(boolean value) {
    m_Silent = value;
  }
  
  /**
   * Get whether silent mode is turned on
   *
   * @return true if silent mode is on
   */
00541   public boolean getSilent() {
    return m_Silent;
  }
  
  /**
   * Sets the number of instances to use in the datasets (some estimators
   * might require more instances).
   *
   * @param value the number of instances to use
   */
00551   public void setNumInstances(int value) {
    m_NumInstances = value;
  }
  
  /**
   * Gets the current number of instances to use for the datasets.
   *
   * @return the number of instances
   */
00560   public int getNumInstances() {
    return m_NumInstances;
  }
  
  /**
   * Set the estimator for boosting. 
   *
   * @param newEstimator the Estimator to use.
   */
00569   public void setEstimator(Estimator newEstimator) {
    m_Estimator = newEstimator;
  }
  
  /**
   * Get the estimator used as the estimator
   *
   * @return the estimator used as the estimator
   */
00578   public Estimator getEstimator() {
    return m_Estimator;
  }
  
  /**
   * prints the given message to stdout, if not silent mode
   * 
   * @param msg         the text to print to stdout
   */
00587   protected void print(Object msg) {
    if (!getSilent())
      System.out.print(msg);
  }
  
  /**
   * prints the given message (+ LF) to stdout, if not silent mode
   * 
   * @param msg         the message to println to stdout
   */
00597   protected void println(Object msg) {
    print(msg + "\n");
  }
  
  /**
   * prints a LF to stdout, if not silent mode
   */
00604   protected void println() {
    print("\n");
  }
  
  /**
   * Run a battery of tests for a given class attribute type
   *
   * @param classType true if the class attribute should be numeric
   * @param estTypes types the estimator is, like incremental, weighted, supervised etc
   * @return attribute types estimator can work with
   */
00615   protected AttrTypes testsPerClassType(int classType, EstTypes estTypes) {
    
    // in none of the estimators yet is the estimation depending on the class type
    // since this could change the basic structure taken from checkclassifiers is kept here
    
    // test A: simple test - if can estimate
    AttrTypes attrTypes = new AttrTypes();
    AttrTypes at = new AttrTypes(Attribute.NOMINAL);
    attrTypes.nominal = canEstimate(at, estTypes.supervised, classType)[0];
    at = new AttrTypes(Attribute.NUMERIC);
    attrTypes.numeric = canEstimate(at, estTypes.supervised, classType)[0];
    attrTypes.string = false;
    attrTypes.date = false;
    attrTypes.relational = false;
    
//  if (!multiInstance)
//  PRel = canEstimate(false, false, false, false,  true, classType)[0];
//  else
//  PRel = false;
    
//  one of the attribute types succeeded
    
    if (attrTypes.oneIsSet()) {
      Vector attributesSet = attrTypes.getVectorOfAttrTypes();
      
      // make tests for each attribute
      for (int i = 0; i < attributesSet.size(); i++) {
        AttrTypes workAttrTypes = new AttrTypes(((Integer) attributesSet.elementAt(i)).intValue());
        
        // test B: weights change estimate or not
        if (estTypes.weighted)
          instanceWeights(workAttrTypes, classType);
        
        if (classType == Attribute.NOMINAL) {
          int numClasses = 4;
          canHandleNClasses(workAttrTypes, numClasses);
        }
        
        // tests with class not the last attribute and the attribute not the first
        
        //   if (!multiInstance) {
        int numAtt = 4; 
        
        canHandleClassAsNthAttribute(workAttrTypes, numAtt, 0, classType, 1);
        
        //TODOTODOcanHandleAttrAsNthAttribute(workAttrTypes, numAtt, 2, classType);
        //}
        
        canHandleZeroTraining(workAttrTypes, classType);
        boolean handleMissingAttributes = canHandleMissing(workAttrTypes, 
            classType, true, false, 20)[0];
        if (handleMissingAttributes)
          canHandleMissing(workAttrTypes, classType, true, false, 100);
        
        boolean handleMissingClass = canHandleMissing(workAttrTypes, 
            classType, 
            false, true, 20)[0];
        if (handleMissingClass)
          canHandleMissing(workAttrTypes, classType, false, true, 100);
        
        correctBuildInitialisation(workAttrTypes, classType);
        datasetIntegrity(workAttrTypes, classType,
            handleMissingAttributes, handleMissingClass);
        
        if (estTypes.incremental)
          incrementingEquality(workAttrTypes, classType);
      }
    }
    return attrTypes;
  }
  
  /**
   * Checks whether the scheme can take command line options.
   *
   * @return index 0 is true if the estimator can take options
   */
00691   protected boolean[] canTakeOptions() {
    
    boolean[] result = new boolean[2];
    
    print("options...");
    if (m_Estimator instanceof OptionHandler) {
      println("yes");
      if (m_Debug) {
        println("\n=== Full report ===");
        Enumeration enu = ((OptionHandler)m_Estimator).listOptions();
        while (enu.hasMoreElements()) {
          Option option = (Option) enu.nextElement();
          print(option.synopsis() + "\n" 
              + option.description() + "\n");
        }
        println("\n");
      }
      result[0] = true;
    }
    else {
      println("no");
      result[0] = false;
    }
    
    return result;
  }
  
  /**
   * Checks whether the scheme can build models incrementally.
   *
   * @return index 0 is true if the estimator can train incrementally
   */
00723   protected boolean[] incrementalEstimator() {
    
    boolean[] result = new boolean[2];
    
    print("incremental estimator...");
    if (m_Estimator instanceof IncrementalEstimator) {
      println("yes");
      result[0] = true;
    }
    else {
      println("no");
      result[0] = false;
    }
    
    return result;
  }
  
  /**
   * Checks whether the scheme says it can handle instance weights.
   *
   * @return true if the estimator handles instance weights
   */
00745   protected boolean[] weightedInstancesHandler() {
    
    boolean[] result = new boolean[2];
    
    print("weighted instances estimator...");
    if (m_Estimator instanceof WeightedInstancesHandler) {
      println("yes");
      result[0] = true;
    }
    else {
      println("no");
      result[0] = false;
    }
    
    return result;
  }

  /**
   * Checks whether the estimator is supervised.
   *
   * @return true if the estimator handles instance weights
   */
00767   protected boolean[] supervisedEstimator() {
    boolean[] result = new boolean[2];
    result[0] = false;
    return result;
  }

  /**
   * Checks basic estimation of one attribute of the scheme, for simple non-troublesome
   * datasets.
   *
   * @param attrTypes the types the estimator can work with
   * @param classType the class type (NOMINAL, NUMERIC, etc.)
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
00782   protected boolean[] canEstimate(AttrTypes attrTypes, boolean supervised, int classType) {
    
  // supervised is ignored, no supervised estimators used yet
    
    print("basic estimation");
    printAttributeSummary(attrTypes, classType);
    print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("nominal");
    accepts.addElement("numeric");
    accepts.addElement("string");
    accepts.addElement("date");
    accepts.addElement("relational");
    accepts.addElement("not in classpath");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
    numClasses = 2, missingLevel = 0;
    boolean attributeMissing = false, classMissing = false;
    int numAtts = 1, attrIndex = 0;

    return runBasicTest(attrTypes, numAtts, attrIndex,
                  classType, 
                  missingLevel, attributeMissing, classMissing,
                  numTrain, numTest, numClasses, 
                  accepts);
  }
  
  /**
   * Checks basic estimation of one attribute of the scheme, for simple non-troublesome
   * datasets.
   *
   * @param attrTypes the types the estimator can work with
   * @param classType the class type (NOMINAL, NUMERIC, etc.)
    */
00815   protected void canSplitUpClass(AttrTypes attrTypes, int classType) {
    
    if (attrTypes.nominal)
      canSplitUpClass(Attribute.NOMINAL, classType);
    if (attrTypes.numeric)
      canSplitUpClass(Attribute.NUMERIC, classType);
  }
  
  /**
   * Checks basic estimation of one attribute of the scheme, for simple non-troublesome
   * datasets.
   *
   * @param attrType the type of the estimator
   * @param classType the class type (NOMINAL, NUMERIC, etc.)
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
00832   protected boolean[] canSplitUpClass(int attrType, int classType) {
    
    boolean[] result = new boolean[2];

    FastVector accepts = new FastVector();
    accepts.addElement("not in classpath");

    // supervised is ignored, no supervised estimators used yet
    print("split per class type ");
    printAttributeSummary(attrType, Attribute.NOMINAL);
    print("...");
      
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
    numClasses = 2;
    boolean attributeMissing = false, classMissing = false;
    int numAtts = 3, attrIndex = 0, classIndex = 1;
    Instances train = null;
    Vector test;
    Estimator estimator = null;
    boolean built = false;
    
    try {
      AttrTypes at = new AttrTypes(attrType);
      train = makeTestDataset(42, numTrain, numAtts, at,
          numClasses, classType, classIndex);
      
       // prepare training data set and test value list
      test = makeTestValueList(24, numTest, train, attrIndex,
          attrType);
      
       estimator = Estimator.makeCopies(getEstimator(), 1)[0];
    } catch (Exception ex) {
      ex.printStackTrace();
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      estimator.addValues(train, attrIndex, classType, classIndex);
      built = true;
      
      testWithTestValues(estimator, test);
      
      println("yes");
      result[0] = true;
    } 
    catch (Exception ex) {
      boolean acceptable = false;
      String msg;
      if (ex.getMessage() == null)
        msg = "";
      else
        msg = ex.getMessage().toLowerCase();
      if (msg.indexOf("not in classpath") > -1)
        m_ClasspathProblems = true;
      
      for (int i = 0; i < accepts.size(); i++) {
        if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
          acceptable = true;
        }
      }
      
      println("no" + (acceptable ? " (OK error message)" : ""));
      result[1] = acceptable;
      
      
      if (m_Debug) {
        println("\n=== Full Report ===");
        print("Problem during");
        if (built) {
          print(" testing");
        } else {
          print(" training");
        }
        println(": " + ex.getMessage() + "\n");
        if (!acceptable) {
          if (accepts.size() > 0) {
            print("Error message doesn't mention ");
            for (int i = 0; i < accepts.size(); i++) {
              if (i != 0) {
                print(" or ");
              }
              print('"' + (String)accepts.elementAt(i) + '"');
            }
          }
          println("here are the datasets:\n");
          println("=== Train Dataset ===\n"
              + train.toString() + "\n");
          println("=== Test Dataset ===\n"
              + test.toString() + "\n\n");
        }
        
      }
    }
    return result;
   }
  
  /**
   * Checks whether nominal schemes can handle more than two classes.
   * If a scheme is only designed for two-class problems it should
   * throw an appropriate exception for multi-class problems.
   *
   * @param attrTypes attribute types the estimator excepts 
   * @param numClasses the number of classes to test
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
00937   protected boolean[] canHandleNClasses(AttrTypes attrTypes, int numClasses) {
    
    print("more than two class problems");
    printAttributeSummary(attrTypes, Attribute.NOMINAL);
    print("...");

    FastVector accepts = new FastVector();
    accepts.addElement("number");
    accepts.addElement("class");

    int numTrain = getNumInstances(), numTest = getNumInstances(), 
      missingLevel = 0;
    boolean attributeMissing = false, classMissing = false;
    int numAttr = 1, attrIndex = 0;

    return runBasicTest(attrTypes,
                        numAttr, attrIndex,
                        Attribute.NOMINAL,
                        missingLevel, attributeMissing, classMissing,
                        numTrain, numTest, numClasses, 
                        accepts);
  }
  
  /**
   * Checks whether the scheme can handle class attributes as Nth attribute.
   *
   * @param attrTypes the attribute types the estimator accepts
   * @param numAtts of attributes
   * @param attrIndex the index of the attribute
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @param classIndex the index of the class attribute (0-based, -1 means last attribute)
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   * @see TestInstances#CLASS_IS_LAST
   */
00972   protected boolean[] canHandleClassAsNthAttribute(AttrTypes attrTypes,
                                       int numAtts,
                                       int attrIndex,
                                       int classType,
                                       int classIndex) {
    
    if (classIndex == TestInstances.CLASS_IS_LAST)
      print("class attribute as last attribute");
    else
      print("class attribute as " + (classIndex + 1) + ". attribute");
    printAttributeSummary(attrTypes, classType);
    print("...");
    FastVector accepts = new FastVector();
    int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, 
    missingLevel = 0;
    boolean attributeMissing = false, classMissing = false;
    
    return runBasicTest(attrTypes,
                  numAtts, attrIndex,
                        classType, classIndex,
                        missingLevel, attributeMissing, classMissing,
                        numTrain, numTest, numClasses, 
                        accepts);
  }
  
  /**
   * Checks whether the scheme can handle zero training instances.
   *
   * @param attrTypes attribute types that can be estimated
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
01005   protected boolean[] canHandleZeroTraining(AttrTypes attrTypes, int classType) {
    
    print("handle zero training instances");
    printAttributeSummary(attrTypes, classType);

    print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("train");
    accepts.addElement("value");
    int numTrain = 0, numTest = getNumInstances(), numClasses = 2, 
    missingLevel = 0;
    boolean attributeMissing = false, classMissing = false;
    int numAtts = 1;
    int attrIndex = 0;
    return runBasicTest(
              attrTypes, numAtts, attrIndex,
              classType, 
              missingLevel, attributeMissing, classMissing,
              numTrain, numTest, numClasses, 
              accepts);
  }
  
  /**
   * Checks whether the scheme correctly initialises models when 
   * buildEstimator is called. This test calls buildEstimator with
   * one training dataset and records performance on a test set. 
   * buildEstimator is then called on a training set with different
   * structure, and then again with the original training set. The
   * performance on the test set is compared with the original results
   * and any performance difference noted as incorrect build initialisation.
   *
   * @param attrTypes attribute types that can be estimated
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @return index 0 is true if the test was passed, index 1 is true if the
   *         scheme performs worse than ZeroR, but without error (index 0 is
   *         false)
   */
01042   protected boolean[] correctBuildInitialisation(AttrTypes attrTypes,
                                     int classType) {

    boolean[] result = new boolean[2];
    
    print("correct initialisation during buildEstimator");
    printAttributeSummary(attrTypes, classType);

    print("...");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
    numClasses = 2, missingLevel = 0;
    boolean attributeMissing = false, classMissing = false;
    
    Instances train1 = null;
    Instances test1 = null;
    Instances train2 = null;
    Instances test2 = null;
    Estimator estimator = null;
    Estimator estimator1 = null;
    
    boolean built = false;
    int stage = 0;
    int attrIndex1 = 1;
    int attrIndex2 = 2;

    try {
      
      // Make two sets of train/test splits with different 
      // numbers of attributes
      train1 = makeTestDataset(42, numTrain, 2, attrTypes,
                               numClasses, 
                               classType);
      train2 = makeTestDataset(84, numTrain, 3, attrTypes,
                               numClasses, 
                               classType);
      if (missingLevel > 0) {
        addMissing(train1, missingLevel, attributeMissing, classMissing, attrIndex1);
        addMissing(train2, missingLevel, attributeMissing, classMissing, attrIndex2);
      }
      
      estimator = Estimator.makeCopies(getEstimator(), 1)[0];
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      //TESTING??
      stage = 0;
      estimator.addValues(train1, attrIndex1);
      built = true;

      estimator1 = estimator.makeCopies(getEstimator(), 1)[0];
      
      stage = 1;
      built = false;
      estimator.addValues(train2, attrIndex2);
      built = true;
       
      stage = 2;
      built = false;
      estimator.addValues(train1, attrIndex1);
      built = true;
      
      stage = 3;
      if (!estimator.equals(estimator1)) {
        if (m_Debug) {
          println("\n=== Full report ===\n"
              + "\nFirst build estimator\n"+
                  estimator.toString() + "\n\n");
          println("\nSecond build estimator\n"+
              estimator.toString() + "\n\n");
      }
        throw new Exception("Results differ between buildEstimator calls");
      }
      println("yes");
      result[0] = true;
      
      if (false && m_Debug) {
        println("\n=== Full report ===\n"
            + "\nFirst buildEstimator()"
                + "\n\n");
        println("\nSecond buildEstimator()" 
            + "\n\n");
      }
    }
    catch (Exception ex) {
      String msg = ex.getMessage().toLowerCase();
      if (msg.indexOf("worse than zeror") >= 0) {
        println("warning: performs worse than ZeroR");
        result[0] = true;
        result[1] = true;
      } else {
        println("no");
        result[0] = false;
      }
      if (m_Debug) {
        println("\n=== Full Report ===");
        print("Problem during");
        if (built) {
          print(" testing");
        } else {
          print(" training");
        }
        switch (stage) {
          case 0:
            print(" of dataset 1");
            break;
          case 1:
            print(" of dataset 2");
            break;
          case 2:
            print(" of dataset 1 (2nd build)");
            break;
          case 3:
            print(", comparing results from builds of dataset 1");
            break;        
        }
        println(": " + ex.getMessage() + "\n");
        println("here are the datasets:\n");
        println("=== Train1 Dataset ===\n"
            + train1.toString() + "\n");
        println("=== Test1 Dataset ===\n"
            + test1.toString() + "\n\n");
        println("=== Train2 Dataset ===\n"
            + train2.toString() + "\n");
        println("=== Test2 Dataset ===\n"
            + test2.toString() + "\n\n");
      }
    }
    
    return result;
  }
  
  /**
   * Checks basic missing value handling of the scheme. If the missing
   * values cause an exception to be thrown by the scheme, this will be
   * recorded.
   *
   * @param attrTypes attribute types that can be estimated
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @param attributeMissing true if the missing values may be in 
   * the attributes
   * @param classMissing true if the missing values may be in the class
   * @param missingLevel the percentage of missing values
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
01188   protected boolean[] canHandleMissing(AttrTypes attrTypes,
                               int classType,
                               boolean attributeMissing,
                               boolean classMissing,
                               int missingLevel) {
    
    if (missingLevel == 100)
      print("100% ");
    print("missing");
    if (attributeMissing) {
      print(" attribute");
      if (classMissing)
        print(" and");
    }
    if (classMissing)
      print(" class");
    print(" values");
    printAttributeSummary(attrTypes, classType);

    print("...");
    FastVector accepts = new FastVector();
    accepts.addElement("missing");
    accepts.addElement("value");
    accepts.addElement("train");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
    numClasses = 2;
    
    int numAtts = 1, attrIndex = 0;
    return runBasicTest(attrTypes,
                  numAtts, attrIndex,
                  classType, 
                  missingLevel, attributeMissing, classMissing,
                  numTrain, numTest, numClasses, 
                  accepts);
  }
  
  /**
   * Checks whether an incremental scheme produces the same model when
   * trained incrementally as when batch trained. The model itself
   * cannot be compared, so we compare the evaluation on test data
   * for both models. It is possible to get a false positive on this
   * test (likelihood depends on the estimator).
   *
   * @param attrTypes attribute types that can be estimated
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @return index 0 is true if the test was passed
   */
01235   protected boolean[] incrementingEquality(AttrTypes attrTypes,
                                 int classType) {
    
    print("incremental training produces the same results"
        + " as batch training");
    printAttributeSummary(attrTypes, classType);

    print("...");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
    numClasses = 2, missingLevel = 0;
    boolean attributeMissing = false, classMissing = false;
    
    boolean[] result = new boolean[2];
    Instances train = null;
    Estimator [] estimators = null;
    boolean built = false;
    int attrIndex = 0;
    Vector test;
    try {
      train = makeTestDataset(42, numTrain, 1, attrTypes,
                              numClasses, 
                              classType
                              );

        // prepare training data set and test value list
      test = makeTestValueList(24, numTest, train, attrIndex,
                         attrTypes.getSetType());

      if (missingLevel > 0) {
        addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
      }
      estimators = Estimator.makeCopies(getEstimator(), 2);
      estimators[0].addValues(train, attrIndex);
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      for (int i = 0; i < train.numInstances(); i++) {
        ((IncrementalEstimator)estimators[1]).addValue(train.instance(i).value(attrIndex), 1.0);
      }
      built = true;
      if (!estimators[0].equals(estimators[1])) {
        println("no");
        result[0] = false;
       
        if (m_Debug) {
          println("\n=== Full Report ===");
          println("Results differ between batch and "
              + "incrementally built models.\n"
              + "Depending on the estimator, this may be OK");
          println("Here are the results:\n");
          println("batch built results\n" + estimators[0].toString());
          println("incrementally built results\n" + estimators[1].toString());
          println("Here are the datasets:\n");
          println("=== Train Dataset ===\n"
              + train.toString() + "\n");
          println("=== Test Dataset ===\n"
              + test.toString() + "\n\n");
        }
      }
      else {
        println("yes");
        result[0] = true;
      }
    } catch (Exception ex) {
      result[0] = false;
      
      print("Problem during");
      if (built)
        print(" testing");
      else
        print(" training");
      println(": " + ex.getMessage() + "\n");
    }
    
    return result;
  }
  
  
  /**
   * Checks whether the estimator can handle instance weights.
   * This test compares the estimator performance on two datasets
   * that are identical except for the training weights. If the 
   * results change, then the estimator must be using the weights. It
   * may be possible to get a false positive from this test if the 
   * weight changes aren't significant enough to induce a change
   * in estimator performance (but the weights are chosen to minimize
   * the likelihood of this).
   *
   * @param attrTypes attribute types that can be estimated
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @return index 0 true if the test was passed
   */
01328   protected boolean[] instanceWeights(AttrTypes attrTypes,
                              int classType) {
    
    print("estimator uses instance weights");
    printAttributeSummary(attrTypes, classType);

    print("...");

    int numTrain = 2 * getNumInstances(), numTest = getNumInstances(), 
      numClasses = 2, missingLevel = 0;
    boolean attributeMissing = false, classMissing = false;
    
    boolean[] result = new boolean[2];
    Instances train = null;
    Vector test = null;
    Estimator [] estimators = null;
    
    Vector resultProbsO = null;
    Vector resultProbsW = null;
    boolean built = false;
    boolean evalFail = false;
    int attrIndex = 0;
    try {
      train = makeTestDataset(42, numTrain, 1, 
                              attrTypes, numClasses, 
                              classType);
  
      // prepare training data set and test value list
      test = makeTestValueList(24, numTest, train, attrIndex,
                         attrTypes.getSetType());

      if (missingLevel > 0) {
        addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
      }

      estimators = Estimator.makeCopies(getEstimator(), 2);

      estimators[0].addValues(train, attrIndex);
      resultProbsO = testWithTestValues(estimators[0], test);

    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
            
      // Now modify instance weights and re-built
      for (int i = 0; i < train.numInstances(); i++) {
        train.instance(i).setWeight(0);
      }
      Random random = new Random(1);
      for (int i = 0; i < train.numInstances() / 2; i++) {
        int inst = Math.abs(random.nextInt()) % train.numInstances();
        int weight = Math.abs(random.nextInt()) % 10 + 1;
        train.instance(inst).setWeight(weight);
      }
      estimators[1].addValues(train, attrIndex);
      resultProbsW = testWithTestValues(estimators[1], test);

      built = true;
      if (resultProbsO.equals(resultProbsW)) {
        //  println("no");
        evalFail = true;
        throw new Exception("evalFail");
      }
      
      println("yes");
      result[0] = true;
    } catch (Exception ex) {
      println("no");
      result[0] = false;
      
      if (m_Debug) {
        println("\n=== Full Report ===");
        
        if (evalFail) {
          println("Results don't differ between non-weighted and "
              + "weighted instance models.");
          println("Here are the results:\n");
          println(probsToString(resultProbsO));
        } else {
          print("Problem during");
          if (built) {
            print(" testing");
          } else {
            print(" training");
          }
          println(": " + ex.getMessage() + "\n");
        }
        println("Here are the datasets:\n");
        println("=== Train Dataset ===\n"
            + train.toString() + "\n");
        println("=== Train Weights ===\n");
        for (int i = 0; i < train.numInstances(); i++) {
          println(" " + (i + 1) 
              + "    " + train.instance(i).weight());
        }
        println("=== Test Dataset ===\n"
            + test.toString() + "\n\n");  
        println("(test weights all 1.0\n");
      }
    }
    
    return result;
  }
  
  /**
   * Checks whether the scheme alters the training dataset during
   * training. If the scheme needs to modify the training
   * data it should take a copy of the training data. Currently checks
   * for changes to header structure, number of instances, order of
   * instances, instance weights.
   *
   * @param attrTypes attribute types that can be estimated
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @param attributeMissing true if we know the estimator can handle
   * (at least) moderate missing attribute values
   * @param classMissing true if we know the estimator can handle
   * (at least) moderate missing class values
   * @return index 0 is true if the test was passed
   */
01448   protected boolean[] datasetIntegrity(AttrTypes attrTypes,
                               int classType,
                               boolean attributeMissing,
                               boolean classMissing) {
    
    Estimator estimator = null;
    print("estimator doesn't alter original datasets");
    printAttributeSummary(attrTypes, classType);
    print("...");
    int numTrain = getNumInstances(), numTest = getNumInstances(), 
    numClasses = 2, missingLevel = 100;
    
    boolean[] result = new boolean[2];
    Instances train = null;
     boolean built = false;
    try {
      train = makeTestDataset(42, numTrain, 1, attrTypes,
                              numClasses, 
                              classType);
      int attrIndex = 0;
 
      if (missingLevel > 0) {
        addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
      }
      estimator = Estimator.makeCopies(getEstimator(), 1)[0];
    } catch (Exception ex) {
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      Instances trainCopy = new Instances(train);
      int attrIndex = 0;
      estimator.addValues(trainCopy, attrIndex);
      compareDatasets(train, trainCopy);
      built = true;
      
      println("yes");
      result[0] = true;
    } catch (Exception ex) {
      println("no");
      result[0] = false;
      
      if (m_Debug) {
        println("\n=== Full Report ===");
        print("Problem during");
        if (built) {
          print(" testing");
        } else {
          print(" training");
        }
        println(": " + ex.getMessage() + "\n");
        println("Here are the datasets:\n");
        println("=== Train Dataset ===\n"
            + train.toString() + "\n");
      }
    }
    
    return result;
  }
  
  /**
   * Runs a text on the datasets with the given characteristics.
   * 
   * @param attrTypes attribute types that can be estimated
   * @param numAtts number of attributes
   * @param attrIndex attribute index 
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @param missingLevel the percentage of missing values
   * @param attributeMissing true if the missing values may be in 
   * the attributes
   * @param classMissing true if the missing values may be in the class
   * @param numTrain the number of instances in the training set
   * @param numTest the number of instaces in the test set
   * @param numClasses the number of classes
   * @param accepts the acceptable string in an exception
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
01525   protected boolean[] runBasicTest(AttrTypes attrTypes,
                           int numAtts,
                           int attrIndex,
                           int classType,
                           int missingLevel,
                           boolean attributeMissing,
                           boolean classMissing,
                           int numTrain,
                           int numTest,
                           int numClasses,
                           FastVector accepts) {
    
    return runBasicTest(attrTypes,
                  numAtts,
                  attrIndex,
                  classType, 
                  TestInstances.CLASS_IS_LAST,
                  missingLevel,
                  attributeMissing,
                  classMissing,
                  numTrain,
                  numTest,
                  numClasses,
            accepts);
  }
  
  /**
   * Runs a text on the datasets with the given characteristics.
   * 
   * @param attrTypes attribute types that can be estimated
   * @param numAtts number of attributes
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @param classIndex the attribute index of the class
   * @param missingLevel the percentage of missing values
   * @param attributeMissing true if the missing values may be in 
   * the attributes
   * @param classMissing true if the missing values may be in the class
   * @param numTrain the number of instances in the training set
   * @param numTest the number of instaces in the test set
   * @param numClasses the number of classes
   * @param accepts the acceptable string in an exception
   * @return index 0 is true if the test was passed, index 1 is true if test 
   *         was acceptable
   */
01569   protected boolean[] runBasicTest(AttrTypes attrTypes,
                           int numAtts,
                           int attrIndex,
                           int classType,
                           int classIndex,
                           int missingLevel,
                           boolean attributeMissing,
                           boolean classMissing,
                           int numTrain,
                           int numTest,
                           int numClasses,
                           FastVector accepts) {
    
    boolean[] result = new boolean[2];
    Instances train = null;
    Vector test = null;
    Estimator estimator = null;
    boolean built = false;
   
    try {
      train = makeTestDataset(42, numTrain, numAtts, attrTypes,
          numClasses, 
          classType,
          classIndex);
            
      // prepare training data set and test value list
      if (numTrain > 0) {
        test = makeTestValueList(24, numTest, train, attrIndex,
            attrTypes.getSetType());
     
      } else {
        double min = -10.0;
        double max = 8.0;
        test = makeTestValueList(24, numTest, min, max,
            attrTypes.getSetType());
     }
      
      if (missingLevel > 0) {
        addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
      }
      estimator = Estimator.makeCopies(getEstimator(), 1)[0];
    } catch (Exception ex) {
      ex.printStackTrace();
      throw new Error("Error setting up for tests: " + ex.getMessage());
    }
    try {
      estimator.addValues(train, attrIndex);
      built = true;
      
      testWithTestValues(estimator, test);
      
      println("yes");
      result[0] = true;
    } 
    catch (Exception ex) {
      boolean acceptable = false;
      String msg;
      if (ex.getMessage() == null)
        msg = "";
      else
        msg = ex.getMessage().toLowerCase();
      if (msg.indexOf("not in classpath") > -1)
        m_ClasspathProblems = true;
      
      for (int i = 0; i < accepts.size(); i++) {
        if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
          acceptable = true;
        }
      }
      
      println("no" + (acceptable ? " (OK error message)" : ""));
      result[1] = acceptable;
      
      
      if (m_Debug) {
        println("\n=== Full Report ===");
        print("Problem during");
        if (built) {
          print(" testing");
        } else {
          print(" training");
        }
        println(": " + ex.getMessage() + "\n");
        if (!acceptable) {
          if (accepts.size() > 0) {
            print("Error message doesn't mention ");
            for (int i = 0; i < accepts.size(); i++) {
              if (i != 0) {
                print(" or ");
              }
              print('"' + (String)accepts.elementAt(i) + '"');
            }
          }
          println("here are the datasets:\n");
          println("=== Train Dataset ===\n"
              + train.toString() + "\n");
          println("=== Test Dataset ===\n"
              + test.toString() + "\n\n");
        }
        
      }
    }
    return result;
  }
  
  /**
   * Compare two datasets to see if they differ.
   *
   * @param data1 one set of instances
   * @param data2 the other set of instances
   * @throws Exception if the datasets differ
   */
01681   protected void compareDatasets(Instances data1, Instances data2)
  throws Exception {
    if (!data2.equalHeaders(data1)) {
      throw new Exception("header has been modified");
    }
    if (!(data2.numInstances() == data1.numInstances())) {
      throw new Exception("number of instances has changed");
    }
    for (int i = 0; i < data2.numInstances(); i++) {
      Instance orig = data1.instance(i);
      Instance copy = data2.instance(i);
      for (int j = 0; j < orig.numAttributes(); j++) {
        if (orig.isMissing(j)) {
          if (!copy.isMissing(j)) {
            throw new Exception("instances have changed");
          }
        } else if (orig.value(j) != copy.value(j)) {
          throw new Exception("instances have changed");
        }
        if (orig.weight() != copy.weight()) {
          throw new Exception("instance weights have changed");
        }     
      }
    }
  }
  
  /**
   * Add missing values to a dataset.
   *
   * @param data the instances to add missing values to
   * @param level the level of missing values to add (if positive, this
   * is the probability that a value will be set to missing, if negative
   * all but one value will be set to missing (not yet implemented))
   * @param attributeMissing if true, attributes will be modified
   * @param classMissing if true, the class attribute will be modified
   * @param attrIndex index of the attribute
   */
01718   protected void addMissing(Instances data, int level,
                      boolean attributeMissing, boolean classMissing,
                      int attrIndex) {
    
    int classIndex = data.classIndex();
    Random random = new Random(1);
    for (int i = 0; i < data.numInstances(); i++) {
      Instance current = data.instance(i);

      for (int j = 0; j < data.numAttributes(); j++) {
        if (((j == classIndex) && classMissing) ||
            ((j == attrIndex) && attributeMissing)) {
          if (Math.abs(random.nextInt()) % 100 < level)
            current.setMissing(j);
        }
      }
    }
  }
  
  /**
   * Make a simple set of instances, which can later be modified
   * for use in specific tests.
   *
   * @param seed the random number seed
   * @param numInstances the number of instances to generate
   * @param numAttr the number of attributes
   * @param attrTypes the attribute types
   * @param numClasses the number of classes (if nominal class)
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @return the test dataset
   * @throws Exception if the dataset couldn't be generated
   * @see #process(Instances)
   */
01751   protected Instances makeTestDataset(int seed, 
                              int numInstances, 
                              int numAttr,
                              AttrTypes attrTypes,
                              int numClasses, 
                              int classType)
    throws Exception {
    
    return makeTestDataset(
                     seed,
                     numInstances,
                     numAttr,
                     attrTypes,
                     numClasses, 
                     classType,
                     TestInstances.CLASS_IS_LAST);
  }


  /**
   * Make a simple set of instances with variable position of the class 
   * attribute, which can later be modified for use in specific tests.
   *
   * @param seed the random number seed
   * @param numInstances the number of instances to generate
   * @param numAttr the number of attributes to generate
   * @param attrTypes the type of attrbute that is excepted
   * @param numClasses the number of classes (if nominal class)
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   * @param classIndex the index of the class (0-based, -1 as last)
   * @return the test dataset
   * @throws Exception if the dataset couldn't be generated
   * @see TestInstances#CLASS_IS_LAST
   * @see #process(Instances)
   */
01786   protected Instances makeTestDataset(int seed, int numInstances, 
                              int numAttr, AttrTypes attrTypes,
                              int numClasses, int classType,
                              int classIndex)
    throws Exception {
    
    TestInstances dataset = new TestInstances();
    
    dataset.setSeed(seed);
    dataset.setNumInstances(numInstances);
    dataset.setNumNominal   (attrTypes.nominal     ? numAttr : 0);
    dataset.setNumNumeric   (attrTypes.numeric     ? numAttr : 0);
    dataset.setNumString    (attrTypes.string      ? numAttr : 0);
    dataset.setNumDate      (attrTypes.date        ? numAttr : 0);
    dataset.setNumRelational(attrTypes.relational  ? numAttr : 0);
    dataset.setNumClasses(numClasses);
    dataset.setClassType(classType);
    dataset.setClassIndex(classIndex);
    
    return process(dataset.generate());
  }

  /**
   * Make a simple set of values. Only one of the num'type' parameters should be larger 0.
   * (just to make parameter similar to the makeTestDataset parameters)
   *
   * @param seed the random number seed
   * @param numValues the number of values to generate
   * @param data the dataset to make test examples for
   * @param attrIndex index of the attribute
   * @param attrType the class type (NUMERIC, NOMINAL, etc.)
   * @throws Exception if the dataset couldn't be generated
   * @see #process(Instances)
   */
01820   protected Vector makeTestValueList(int seed, int numValues, 
      Instances data, int attrIndex, int attrType)
  throws Exception {
    
    // get min max
    double []minMax = getMinimumMaximum(data, attrIndex);
    double minValue = minMax[0];
    double maxValue = minMax[1];
    
    // make value list and put into a VECTOR
    double range = maxValue - minValue; 
    Vector values = new Vector(numValues); 
    Random random = new Random(seed);
    
    if (attrType == Attribute.NOMINAL) {
      for (int i = 0; i < numValues; i++) {
        Double v = new Double((Math.abs(random.nextInt()) % (int)range)+ (int)minValue);
        values.add(v);
      }
    }
    if (attrType == Attribute.NUMERIC) {
      for (int i = 0; i < numValues; i++) {
        Double v = new Double(random.nextDouble() * range + minValue);
        values.add(v);
      }
    }
    return values;
  }

  /**
   * Make a simple set of values. Only one of the num'type' parameters should be larger 0.
   * (just to make parameter similar to the makeTestDataset parameters)
   *
   * @param seed the random number seed
   * @param numValues the number of values to generate
   * @param minValue the minimal data value
   * @param maxValue the maximal data value
   * @param attrType the class type (NUMERIC, NOMINAL, etc.)
   * @throws Exception if the dataset couldn't be generated
   * @see #process(Instances)
   */
01861   protected Vector makeTestValueList(int seed, int numValues, 
      double minValue, double maxValue, int attrType)
  throws Exception {
    
      
    // make value list and put into a VECTOR
    double range = maxValue - minValue; 
    Vector values = new Vector(numValues); 
    Random random = new Random(seed);
    
    if (attrType == Attribute.NOMINAL) {
      for (int i = 0; i < numValues; i++) {
        Double v = new Double((Math.abs(random.nextInt()) % (int)range)+ (int)minValue);
        values.add(v);
      }
    }
    if (attrType == Attribute.NUMERIC) {
      for (int i = 0; i < numValues; i++) {
        Double v = new Double(random.nextDouble() * range + minValue);
        values.add(v);
      }
    }
    return values;
  }

  /**
   * Test with test values.
   *
   * @param est estimator to be tested
   * @param test vector with test values
   *
   **/
01893   protected Vector testWithTestValues(Estimator est, Vector test) {
    
    Vector results = new Vector();
    for (int i = 0; i < test.size(); i++) {
      double testValue = ((Double)(test.elementAt(i))).doubleValue();
      double prob = est.getProbability(testValue);
      Double p = new Double(prob);
      results.add(p);
    }
    return results;
  }

  /**
   * Gets the minimum and maximum of the values a the first attribute
   * of the given data set
   *
   * @param inst the instance
   * @param attrIndex the index of the attribut to find min and max
   * @return the array with the minimum value on index 0 and the max on index 1
   */
  
01914   protected double[] getMinimumMaximum(Instances inst, int attrIndex) {
    double []minMax = new double[2];
    
    try {
      int num = getMinMax(inst, attrIndex, minMax);
    } catch (Exception ex) {
      ex.printStackTrace();
      System.out.println(ex.getMessage());
    }
    return minMax;
    //      double minValue = minMax[0];
    //      double maxValue = minMax[1];
  }
  
  /** 
   * Find the minimum and the maximum of the attribute and return it in 
   * the last parameter..
   * @param inst instances used to build the estimator
   * @param attrIndex index of the attribute
   * @param minMax the array to return minimum and maximum in
   * @return number of not missing values
   * @exception Exception if parameter minMax wasn't initialized properly
   */
01937   public static int getMinMax(Instances inst, int attrIndex, double [] minMax) 
    throws Exception {
    double min = Double.NaN;
    double max = Double.NaN;
    Instance instance = null;
    int numNotMissing = 0;
    if ((minMax == null) || (minMax.length < 2)) {
      throw new Exception("Error in Program, privat method getMinMax");
    }
    
    Enumeration enumInst = inst.enumerateInstances();
    if (enumInst.hasMoreElements()) {
      do {
      instance = (Instance) enumInst.nextElement();
      } while (instance.isMissing(attrIndex) && (enumInst.hasMoreElements()));
      
      // add values if not  missing
      if (!instance.isMissing(attrIndex)) {
      numNotMissing++;
      min = instance.value(attrIndex);
      max = instance.value(attrIndex);
      }
      while (enumInst.hasMoreElements()) {
      instance = (Instance) enumInst.nextElement();
      if (!instance.isMissing(attrIndex)) {
        numNotMissing++;
        if (instance.value(attrIndex) < min) {
          min = (instance.value(attrIndex));
        } else {
          if (instance.value(attrIndex) > max) {            
            max = (instance.value(attrIndex));
          }
        }
      }
      }
    }
    minMax[0] = min;
    minMax[1] = max;
    return numNotMissing;
  }

  /**
   * Print the probabilities after testing
   * @param probs vector with probability values
   * @return string with probability values printed
   */ 
01983   private String probsToString(Vector probs) {
    StringBuffer txt = new StringBuffer (" ");
    for (int i = 0; i < probs.size(); i++) {
      txt.append("" + ((Double)(probs.elementAt(i))).doubleValue() + " ");
    }
    return txt.toString();
  }
  
  /**
   * Provides a hook for derived classes to further modify the data. 
   * 
   * @param data  the data to process
   * @return            the processed data
   * @see #m_PostProcessor
   */
01998   protected Instances process(Instances data) {
    if (getPostProcessor() == null)
      return data;
    else
      return getPostProcessor().process(data);
  }
  
  /**
   * Print out a short summary string for the dataset characteristics
   *
   * @param attrTypes the attribute types used (NUMERIC, NOMINAL, etc.)
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   */
02011   protected void printAttributeSummary(AttrTypes attrTypes, int classType) {
    
    String str = "";
    
    if (attrTypes.numeric)
      str += " numeric";
    
    if (attrTypes.nominal) {
      if (str.length() > 0)
        str += " &";
      str += " nominal";
    }
    
    if (attrTypes.string) {
      if (str.length() > 0)
        str += " &";
      str += " string";
    }
    
    if (attrTypes.date) {
      if (str.length() > 0)
        str += " &";
      str += " date";
    }
    
    if (attrTypes.relational) {
      if (str.length() > 0)
        str += " &";
      str += " relational";
    }
    
    str += " attributes)";
    
    switch (classType) {
      case Attribute.NUMERIC:
        str = " (numeric class," + str;
        break;
      case Attribute.NOMINAL:
        str = " (nominal class," + str;
        break;
      case Attribute.STRING:
        str = " (string class," + str;
        break;
      case Attribute.DATE:
        str = " (date class," + str;
        break;
      case Attribute.RELATIONAL:
        str = " (relational class," + str;
        break;
    }
    
    print(str);
  }
  
  /**
   * Print out a short summary string for the dataset characteristics
   *
   * @param attrType the attribute type (NUMERIC, NOMINAL, etc.)
   * @param classType the class type (NUMERIC, NOMINAL, etc.)
   */
02071   protected void printAttributeSummary(int attrType, int classType) {
    
    String str = "";
    
    switch (attrType) {
    case Attribute.NUMERIC:
      str = " numeric" + str;
      break;
    case Attribute.NOMINAL:
      str = " nominal" + str;
      break;
    case Attribute.STRING:
      str = " string" + str;
      break;
    case Attribute.DATE:
      str = " date" + str;
      break;
    case Attribute.RELATIONAL:
      str = " relational" + str;
      break;
    }
    str += " attribute(s))";
    
    switch (classType) {
    case Attribute.NUMERIC:
      str = " (numeric class," + str;
      break;
    case Attribute.NOMINAL:
      str = " (nominal class," + str;
      break;
    case Attribute.STRING:
      str = " (string class," + str;
      break;
    case Attribute.DATE:
      str = " (date class," + str;
      break;
    case Attribute.RELATIONAL:
      str = " (relational class," + str;
      break;
    }
    
    print(str);
  }
  
  /**
   * Returns the revision string.
   * 
   * @return            the revision
   */
02120   public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.5 $");
  }

  /**
   * Test method for this class
   * 
   * @param args the commandline parameters
   */
02129   public static void main(String [] args) {
    try {
      CheckEstimator check = new CheckEstimator();
      
      try {
        check.setOptions(args);
        Utils.checkForRemainingOptions(args);
      } catch (Exception ex) {
        String result = ex.getMessage() + "\n\n" + check.getClass().getName().replaceAll(".*\\.", "") + " Options:\n\n";
        Enumeration enu = check.listOptions();
        while (enu.hasMoreElements()) {
          Option option = (Option) enu.nextElement();
          result += option.synopsis() + "\n" + option.description() + "\n";
        }
        throw new Exception(result);
      }
      
      check.doTests();
    } catch (Exception ex) {
      System.err.println(ex.getMessage());
    }
  }
}


Generated by  Doxygen 1.6.0   Back to index