Logo Search packages:      
Sourcecode: weka version File versions

RaceSearch.java

/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    RaceSearch.java
 *    Copyright (C) 2000 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.attributeSelection;

import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SelectedTag;
import weka.core.Statistics;
import weka.core.Tag;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.experiment.PairedStats;
import weka.experiment.Stats;

import java.util.BitSet;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

/** 
 <!-- globalinfo-start -->
 * Races the cross validation error of competing attribute subsets. Use in conjuction with a ClassifierSubsetEval. RaceSearch has four modes:<br/>
 * <br/>
 * forward selection races all single attribute additions to a base set (initially  no attributes), selects the winner to become the new base set and then iterates until there is no improvement over the base set. <br/>
 * <br/>
 * Backward elimination is similar but the initial base set has all attributes included and races all single attribute deletions. <br/>
 * <br/>
 * Schemata search is a bit different. Each iteration a series of races are run in parallel. Each race in a set determines whether a particular attribute should be included or not---ie the race is between the attribute being "in" or "out". The other attributes for this race are included or excluded randomly at each point in the evaluation. As soon as one race has a clear winner (ie it has been decided whether a particular attribute should be inor not) then the next set of races begins, using the result of the winning race from the previous iteration as new base set.<br/>
 * <br/>
 * Rank race first ranks the attributes using an attribute evaluator and then races the ranking. The race includes no attributes, the top ranked attribute, the top two attributes, the top three attributes, etc.<br/>
 * <br/>
 * It is also possible to generate a raked list of attributes through the forward racing process. If generateRanking is set to true then a complete forward race will be run---that is, racing continues until all attributes have been selected. The order that they are added in determines a complete ranking of all the attributes.<br/>
 * <br/>
 * Racing uses paired and unpaired t-tests on cross-validation errors of competing subsets. When there is a significant difference between the means of the errors of two competing subsets then the poorer of the two can be eliminated from the race. Similarly, if there is no significant difference between the mean errors of two competing subsets and they are within some threshold of each other, then one can be eliminated from the race.<br/>
 * <br/>
 * For more information see:<br/>
 * <br/>
 * Andrew W. Moore, Mary S. Lee: Efficient Algorithms for Minimizing Cross Validation Error. In: Eleventh International Conference on Machine Learning, 190-198, 1994.
 * <p/>
 <!-- globalinfo-end -->
 *
 <!-- technical-bibtex-start -->
 * BibTeX:
 * <pre>
 * &#64;inproceedings{Moore1994,
 *    author = {Andrew W. Moore and Mary S. Lee},
 *    booktitle = {Eleventh International Conference on Machine Learning},
 *    pages = {190-198},
 *    publisher = {Morgan Kaufmann},
 *    title = {Efficient Algorithms for Minimizing Cross Validation Error},
 *    year = {1994}
 * }
 * </pre>
 * <p/>
 <!-- technical-bibtex-end -->
 *
 <!-- options-start -->
 * Valid options are: <p/>
 * 
 * <pre> -R &lt;0 = forward | 1 = backward race | 2 = schemata | 3 = rank&gt;
 *  Type of race to perform.
 *  (default = 0).</pre>
 * 
 * <pre> -L &lt;significance&gt;
 *  Significance level for comaparisons
 *  (default = 0.001(forward/backward/rank)/0.01(schemata)).</pre>
 * 
 * <pre> -T &lt;threshold&gt;
 *  Threshold for error comparison.
 *  (default = 0.001).</pre>
 * 
 * <pre> -A &lt;attribute evaluator&gt;
 *  Attribute ranker to use if doing a 
 *  rank search. Place any
 *  evaluator options LAST on 
 *  the command line following a "--".
 *  eg. -A weka.attributeSelection.GainRatioAttributeEval ... -- -M.
 *  (default = GainRatioAttributeEval)</pre>
 * 
 * <pre> -F &lt;0 = 10 fold | 1 = leave-one-out&gt;
 *  Folds for cross validation
 *  (default = 0 (1 if schemata race)</pre>
 * 
 * <pre> -Q
 *  Generate a ranked list of attributes.
 *  Forces the search to be forward
 *  and races until all attributes have
 *  selected, thus producing a ranking.</pre>
 * 
 * <pre> -N &lt;num to select&gt;
 *  Specify number of attributes to retain from 
 *  the ranking. Overides -T. Use in conjunction with -Q</pre>
 * 
 * <pre> -J &lt;threshold&gt;
 *  Specify a theshold by which attributes
 *  may be discarded from the ranking.
 *  Use in conjuction with -Q</pre>
 * 
 * <pre> -Z
 *  Verbose output for monitoring the search.</pre>
 * 
 * <pre> 
 * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
 * </pre>
 * 
 * <pre> -M
 *  treat missing values as a seperate value.</pre>
 * 
 <!-- options-end -->
 *
 * @author Mark Hall (mhall@cs.waikato.ac.nz)
 * @version $Revision: 1.26 $
 */
00140 public class RaceSearch 
  extends ASSearch 
  implements RankedOutputSearch, OptionHandler, TechnicalInformationHandler {

  /** for serialization */
00145   static final long serialVersionUID = 4015453851212985720L;

  /** the training instances */
00148   private Instances m_Instances = null;

  /** search types */
00151   private static final int FORWARD_RACE = 0;
  private static final int BACKWARD_RACE = 1;
  private static final int SCHEMATA_RACE = 2;
  private static final int RANK_RACE = 3;
  public static final Tag [] TAGS_SELECTION = {
    new Tag(FORWARD_RACE, "Forward selection race"),
    new Tag(BACKWARD_RACE, "Backward elimination race"),
    new Tag(SCHEMATA_RACE, "Schemata race"),
    new Tag(RANK_RACE, "Rank race")
      };
  
  /** the selected search type */
00163   private int m_raceType = FORWARD_RACE;
  
  /** xval types */
00166   private static final int TEN_FOLD = 0;
  private static final int LEAVE_ONE_OUT = 1;
  public static final Tag [] XVALTAGS_SELECTION = {
    new Tag(TEN_FOLD, "10 Fold"),
    new Tag(LEAVE_ONE_OUT, "Leave-one-out"),
      };

  /** the selected xval type */
00174   private int m_xvalType = TEN_FOLD;
  
  /** the class index */
00177   private int m_classIndex;

  /** the number of attributes in the data */
00180   private int m_numAttribs;

  /** the total number of partially/fully evaluated subsets */
00183   private int m_totalEvals;

  /** holds the merit of the best subset found */
00186   private double m_bestMerit = -Double.MAX_VALUE;

  /** the subset evaluator to use */
00189   private HoldOutSubsetEvaluator m_theEvaluator = null;

  /** the significance level for comparisons */
00192   private double m_sigLevel = 0.001;

  /** threshold for comparisons */
00195   private double m_delta = 0.001;

  /** the number of samples above which to begin testing for similarity
      between competing subsets */
00199   private int m_samples = 20;

  /** number of cross validation folds---equal to the number of instances
      for leave-one-out cv */
00203   private int m_numFolds = 10;

  /** the attribute evaluator to generate the initial ranking when
      doing a rank race */
00207   private ASEvaluation m_ASEval = new GainRatioAttributeEval();

  /** will hold the attribute ranking produced by the above attribute
      evaluator if doing a rank search */
00211   private int [] m_Ranking;

  /** verbose output for monitoring the search and debugging */
00214   private boolean m_debug = false;

  /** If true then produce a ranked list of attributes by fully traversing
      a forward hillclimb race */
00218   private boolean m_rankingRequested = false;

  /** The ranked list of attributes produced if m_rankingRequested is true */
00221   private double [][] m_rankedAtts;

  /** The number of attributes ranked so far (if ranking is requested) */
00224   private int m_rankedSoFar;

  /** The number of attributes to retain if a ranking is requested. -1
      indicates that all attributes are to be retained. Has precedence over
      m_threshold */
00229   private int m_numToSelect = -1;

  private int m_calculatedNumToSelect = -1;

  /** the threshold for removing attributes if ranking is requested */
00234   private double m_threshold = -Double.MAX_VALUE;

  /**
   * Returns a string describing this search method
   * @return a description of the search method suitable for
   * displaying in the explorer/experimenter gui
   */
00241   public String globalInfo() {
    return "Races the cross validation error of competing "
      +"attribute subsets. Use in conjuction with a ClassifierSubsetEval. "
      +"RaceSearch has four modes:\n\nforward selection "
      +"races all single attribute additions to a base set (initially "
      +" no attributes), selects the winner to become the new base set "
      +"and then iterates until there is no improvement over the base set. "
      +"\n\nBackward elimination is similar but the initial base set has all "
      +"attributes included and races all single attribute deletions. "
      +"\n\nSchemata search is a bit different. Each iteration a series of "
      +"races are run in parallel. Each race in a set determines whether "
      +"a particular attribute should be included or not---ie the race is "
      +"between the attribute being \"in\" or \"out\". The other attributes "
      +"for this race are included or excluded randomly at each point in the "
      +"evaluation. As soon as one race "
      +"has a clear winner (ie it has been decided whether a particular "
      +"attribute should be inor not) then the next set of races begins, "
      +"using the result of the winning race from the previous iteration as "
      +"new base set.\n\nRank race first ranks the attributes using an "
      +"attribute evaluator and then races the ranking. The race includes "
      +"no attributes, the top ranked attribute, the top two attributes, the "
      +"top three attributes, etc.\n\nIt is also possible to generate a "
      +"raked list of attributes through the forward racing process. "
      +"If generateRanking is set to true then a complete forward race will "
      +"be run---that is, racing continues until all attributes have been "
      +"selected. The order that they are added in determines a complete "
      +"ranking of all the attributes.\n\nRacing uses paired and unpaired "
      +"t-tests on cross-validation errors of competing subsets. When there "
      +"is a significant difference between the means of the errors of two "
      +"competing subsets then the poorer of the two can be eliminated from "
      +"the race. Similarly, if there is no significant difference between "
      +"the mean errors of two competing subsets and they are within some "
      +"threshold of each other, then one can be eliminated from the race.\n\n"
      + "For more information see:\n\n"
      + getTechnicalInformation().toString();
  }

  /**
   * Returns an instance of a TechnicalInformation object, containing 
   * detailed information about the technical background of this class,
   * e.g., paper reference or book this class is based on.
   * 
   * @return the technical information about this class
   */
00285   public TechnicalInformation getTechnicalInformation() {
    TechnicalInformation        result;
    
    result = new TechnicalInformation(Type.INPROCEEDINGS);
    result.setValue(Field.AUTHOR, "Andrew W. Moore and Mary S. Lee");
    result.setValue(Field.TITLE, "Efficient Algorithms for Minimizing Cross Validation Error");
    result.setValue(Field.BOOKTITLE, "Eleventh International Conference on Machine Learning");
    result.setValue(Field.YEAR, "1994");
    result.setValue(Field.PAGES, "190-198");
    result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
    
    return result;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00304   public String raceTypeTipText() {
    return "Set the type of search.";
  }

  /**
   * Set the race type
   *
   * @param d the type of race
   */
00313   public void setRaceType (SelectedTag d) {
    
    if (d.getTags() == TAGS_SELECTION) {
      m_raceType = d.getSelectedTag().getID();
    }
    if (m_raceType == SCHEMATA_RACE && !m_rankingRequested) {
      try {
        setFoldsType(new SelectedTag(LEAVE_ONE_OUT,
                                     XVALTAGS_SELECTION));
        setSignificanceLevel(0.01);
      } catch (Exception ex) {
      }
    } else {
      try {
        setFoldsType(new SelectedTag(TEN_FOLD,
                                     XVALTAGS_SELECTION));
        setSignificanceLevel(0.001);
      } catch (Exception ex) {
      }
    }
  }

  /**
   * Get the race type
   *
   * @return the type of race
   */
00340   public SelectedTag getRaceType() {
    return new SelectedTag(m_raceType, TAGS_SELECTION);
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00349   public String significanceLevelTipText() {
    return "Set the significance level to use for t-test comparisons.";
  }

  /**
   * Sets the significance level to use
   * @param sig the significance level
   */
00357   public void setSignificanceLevel(double sig) {
    m_sigLevel = sig;
  }

  /**
   * Get the significance level
   * @return the current significance level
   */
00365   public double getSignificanceLevel() {
    return m_sigLevel;
  }
  
  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00374   public String thresholdTipText() {
    return "Set the error threshold by which to consider two subsets "
      +"equivalent.";
  }

  /**
   * Sets the threshold for comparisons
   * @param t the threshold to use
   */
00383   public void setThreshold(double t) {
    m_delta = t;
  }

  /**
   * Get the threshold
   * @return the current threshold
   */
00391   public double getThreshold() {
    return m_delta;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00400   public String foldsTypeTipText() {
    return "Set the number of folds to use for x-val error estimation; "
      +"leave-one-out is selected automatically for schemata search.";
  }

  /**
   * Set the xfold type
   *
   * @param d the type of xval
   */
00410   public void setFoldsType (SelectedTag d) {
    
    if (d.getTags() == XVALTAGS_SELECTION) {
      m_xvalType = d.getSelectedTag().getID();
    }
  }

  /**
   * Get the xfold type
   *
   * @return the type of xval
   */
00422   public SelectedTag getFoldsType () {
    return new SelectedTag(m_xvalType, XVALTAGS_SELECTION);
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00431   public String debugTipText() {
    return "Turn on verbose output for monitoring the search's progress.";
  }

  /**
   * Set whether verbose output should be generated.
   * @param d true if output is to be verbose.
   */
00439   public void setDebug(boolean d) {
    m_debug = d;
  }

  /**
   * Get whether output is to be verbose
   * @return true if output will be verbose
   */
00447   public boolean getDebug() {
    return m_debug;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00456   public String attributeEvaluatorTipText() {
    return "Attribute evaluator to use for generating an initial ranking. "
      +"Use in conjunction with a rank race";    
  }

  /**
   * Set the attribute evaluator to use for generating the ranking.
   * @param newEvaluator the attribute evaluator to use.
   */
00465   public void setAttributeEvaluator(ASEvaluation newEvaluator) {
    m_ASEval = newEvaluator;
  }

  /**
   * Get the attribute evaluator used to generate the ranking.
   * @return the evaluator used to generate the ranking.
   */
00473   public ASEvaluation getAttributeEvaluator() {
    return m_ASEval;
  }

    /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00482   public String generateRankingTipText() {
    return "Use the racing process to generate a ranked list of attributes. "
      +"Using this mode forces the race to be a forward type and then races "
      +"until all attributes have been added, thus giving a ranked list";
  }
  
  /**
   * Records whether the user has requested a ranked list of attributes.
   * @param doRank true if ranking is requested
   */
00492   public void setGenerateRanking(boolean doRank) {
    m_rankingRequested = doRank;
    if (m_rankingRequested) {
      try {
        setRaceType(new SelectedTag(FORWARD_RACE,
                                    TAGS_SELECTION));
      } catch (Exception ex) {
      }
    }
  }

  /**
   * Gets whether ranking has been requested. This is used by the
   * AttributeSelection module to determine if rankedAttributes()
   * should be called.
   * @return true if ranking has been requested.
   */
00509   public boolean getGenerateRanking() {
    return m_rankingRequested;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00518   public String numToSelectTipText() {
    return "Specify the number of attributes to retain. Use in conjunction "
      +"with generateRanking. The default value "
      +"(-1) indicates that all attributes are to be retained. Use either "
      +"this option or a threshold to reduce the attribute set.";
  }

  /**
   * Specify the number of attributes to select from the ranked list
   * (if generating a ranking). -1
   * indicates that all attributes are to be retained.
   * @param n the number of attributes to retain
   */
00531   public void setNumToSelect(int n) {
    m_numToSelect = n;
  }

  /**
   * Gets the number of attributes to be retained.
   * @return the number of attributes to retain
   */
00539   public int getNumToSelect() {
    return m_numToSelect;
  }

  /**
   * Gets the calculated number of attributes to retain. This is the
   * actual number of attributes to retain. This is the same as
   * getNumToSelect if the user specifies a number which is not less
   * than zero. Otherwise it should be the number of attributes in the
   * (potentially transformed) data.
   */
00550   public int getCalculatedNumToSelect() {
    if (m_numToSelect >= 0) {
      m_calculatedNumToSelect = m_numToSelect;
    }
    return m_calculatedNumToSelect;
  }

  /**
   * Returns the tip text for this property
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
00562   public String selectionThresholdTipText() {
    return "Set threshold by which attributes can be discarded. Default value "
      + "results in no attributes being discarded. Use in conjunction with "
      + "generateRanking";
  }

  /**
   * Set the threshold by which the AttributeSelection module can discard
   * attributes.
   * @param threshold the threshold.
   */
00573   public void setSelectionThreshold(double threshold) {
    m_threshold = threshold;
  }

  /**
   * Returns the threshold so that the AttributeSelection module can
   * discard attributes from the ranking.
   */
00581   public double getSelectionThreshold() {
    return m_threshold;
  }


  /**
   * Returns an enumeration describing the available options.
   * @return an enumeration of all the available options.
   **/
00590   public Enumeration listOptions () {
    Vector newVector = new Vector();
    
     newVector.addElement(new Option(
       "\tType of race to perform.\n"
       + "\t(default = 0).",
       "R", 1 ,"-R <0 = forward | 1 = backward race | 2 = schemata | 3 = rank>"));
     
     newVector.addElement(new Option(
       "\tSignificance level for comaparisons\n"
       + "\t(default = 0.001(forward/backward/rank)/0.01(schemata)).",
       "L",1,"-L <significance>"));
     
     newVector.addElement(new Option(
       "\tThreshold for error comparison.\n"
       + "\t(default = 0.001).",
       "T",1,"-T <threshold>"));
     
     newVector.addElement(new Option(
       "\tAttribute ranker to use if doing a \n"
       + "\trank search. Place any\n"
       + "\tevaluator options LAST on \n"
       + "\tthe command line following a \"--\".\n" 
       + "\teg. -A weka.attributeSelection.GainRatioAttributeEval ... -- -M.\n"
       + "\t(default = GainRatioAttributeEval)", 
       "A", 1, "-A <attribute evaluator>"));
    
     newVector.addElement(new Option(
       "\tFolds for cross validation\n"
       + "\t(default = 0 (1 if schemata race)",
       "F",1,"-F <0 = 10 fold | 1 = leave-one-out>"));
     
     newVector.addElement(new Option(
       "\tGenerate a ranked list of attributes.\n"
       +"\tForces the search to be forward\n"
       +"\tand races until all attributes have\n"
       +"\tselected, thus producing a ranking.",
       "Q",0,"-Q"));

    newVector.addElement(new Option(
      "\tSpecify number of attributes to retain from \n"
      + "\tthe ranking. Overides -T. Use in conjunction with -Q", 
      "N", 1, "-N <num to select>"));

    newVector.addElement(new Option(
      "\tSpecify a theshold by which attributes\n" 
      + "\tmay be discarded from the ranking.\n"
      +"\tUse in conjuction with -Q",
      "J",1, "-J <threshold>"));

     newVector.addElement(new Option(
       "\tVerbose output for monitoring the search.",
       "Z",0,"-Z"));
     
     if ((m_ASEval != null) && 
         (m_ASEval instanceof OptionHandler)) {
       newVector.addElement(new Option(
         "", 
         "", 0, "\nOptions specific to evaluator " 
         + m_ASEval.getClass().getName() + ":"));

       Enumeration enu = ((OptionHandler)m_ASEval).listOptions();
       while (enu.hasMoreElements()) {
         newVector.addElement(enu.nextElement());
       }
     }
     
     return newVector.elements();
  }

  /**
   * Parses a given list of options. <p/>
   *
   <!-- options-start -->
   * Valid options are: <p/>
   * 
   * <pre> -R &lt;0 = forward | 1 = backward race | 2 = schemata | 3 = rank&gt;
   *  Type of race to perform.
   *  (default = 0).</pre>
   * 
   * <pre> -L &lt;significance&gt;
   *  Significance level for comaparisons
   *  (default = 0.001(forward/backward/rank)/0.01(schemata)).</pre>
   * 
   * <pre> -T &lt;threshold&gt;
   *  Threshold for error comparison.
   *  (default = 0.001).</pre>
   * 
   * <pre> -A &lt;attribute evaluator&gt;
   *  Attribute ranker to use if doing a 
   *  rank search. Place any
   *  evaluator options LAST on 
   *  the command line following a "--".
   *  eg. -A weka.attributeSelection.GainRatioAttributeEval ... -- -M.
   *  (default = GainRatioAttributeEval)</pre>
   * 
   * <pre> -F &lt;0 = 10 fold | 1 = leave-one-out&gt;
   *  Folds for cross validation
   *  (default = 0 (1 if schemata race)</pre>
   * 
   * <pre> -Q
   *  Generate a ranked list of attributes.
   *  Forces the search to be forward
   *  and races until all attributes have
   *  selected, thus producing a ranking.</pre>
   * 
   * <pre> -N &lt;num to select&gt;
   *  Specify number of attributes to retain from 
   *  the ranking. Overides -T. Use in conjunction with -Q</pre>
   * 
   * <pre> -J &lt;threshold&gt;
   *  Specify a theshold by which attributes
   *  may be discarded from the ranking.
   *  Use in conjuction with -Q</pre>
   * 
   * <pre> -Z
   *  Verbose output for monitoring the search.</pre>
   * 
   * <pre> 
   * Options specific to evaluator weka.attributeSelection.GainRatioAttributeEval:
   * </pre>
   * 
   * <pre> -M
   *  treat missing values as a seperate value.</pre>
   * 
   <!-- options-end -->
   *
   * @param options the list of options as an array of strings
   * @throws Exception if an option is not supported
   */
00720   public void setOptions (String[] options)
    throws Exception {
    String optionString;
    resetOptions();
    
    optionString = Utils.getOption('R', options);
    if (optionString.length() != 0) {
      setRaceType(new SelectedTag(Integer.parseInt(optionString),
                                  TAGS_SELECTION));
    }
    
    optionString = Utils.getOption('F', options);
    if (optionString.length() != 0) {
      setFoldsType(new SelectedTag(Integer.parseInt(optionString),
                                  XVALTAGS_SELECTION));
    }

    optionString = Utils.getOption('L', options);
    if (optionString.length() !=0) {
      setSignificanceLevel(Double.parseDouble(optionString));
    }

    optionString = Utils.getOption('T', options);
    if (optionString.length() !=0) {
      setThreshold(Double.parseDouble(optionString));
    }

    optionString = Utils.getOption('A', options);
    if (optionString.length() != 0) {
      setAttributeEvaluator(ASEvaluation.forName(optionString, 
                            Utils.partitionOptions(options)));
    }

    setGenerateRanking(Utils.getFlag('Q', options));

    optionString = Utils.getOption('J', options);
    if (optionString.length() != 0) {
      setSelectionThreshold(Double.parseDouble(optionString));
    }
    
    optionString = Utils.getOption('N', options);
    if (optionString.length() != 0) {
      setNumToSelect(Integer.parseInt(optionString));
    }

    setDebug(Utils.getFlag('Z', options));
  }

  /**
   * Gets the current settings of BestFirst.
   * @return an array of strings suitable for passing to setOptions()
   */
00772   public String[] getOptions () {
    int current = 0;
    String[] evaluatorOptions = new String[0];

    if ((m_ASEval != null) && 
        (m_ASEval instanceof OptionHandler)) {
      evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
    }
    String[] options = new String[17+evaluatorOptions.length];

    options[current++] = "-R"; options[current++] = ""+m_raceType;
    options[current++] = "-L"; options[current++] = ""+getSignificanceLevel();
    options[current++] = "-T"; options[current++] = ""+getThreshold();
    options[current++] = "-F"; options[current++] = ""+m_xvalType;
    if (getGenerateRanking()) {
      options[current++] = "-Q";
    }
    options[current++] = "-N"; options[current++] = ""+getNumToSelect();
    options[current++] = "-J"; options[current++] = ""+getSelectionThreshold();
    if (getDebug()) {
      options[current++] = "-Z";
    }
    
    if (getAttributeEvaluator() != null) {
      options[current++] = "-A";
      options[current++] = getAttributeEvaluator().getClass().getName();
      options[current++] = "--";
      System.arraycopy(evaluatorOptions, 0, options, current, 
                       evaluatorOptions.length);
      current += evaluatorOptions.length;
    }

    
    while (current < options.length) {
      options[current++] = "";
    }

    return  options;
  }




  /**
   * Searches the attribute subset space by racing cross validation
   * errors of competing subsets
   *
   * @param ASEval the attribute evaluator to guide the search
   * @param data the training instances.
   * @return an array (not necessarily ordered) of selected attribute indexes
   * @throws Exception if the search can't be completed
   */
00824   public int[] search (ASEvaluation ASEval, Instances data)
    throws Exception {
    if (!(ASEval instanceof SubsetEvaluator)) {
      throw  new Exception(ASEval.getClass().getName() 
                           + " is not a " 
                           + "Subset evaluator! (RaceSearch)");
    }

    if (ASEval instanceof UnsupervisedSubsetEvaluator) {
      throw new Exception("Can't use an unsupervised subset evaluator "
                          +"(RaceSearch).");
    }

    if (!(ASEval instanceof HoldOutSubsetEvaluator)) {
      throw new Exception("Must use a HoldOutSubsetEvaluator, eg. "
                          +"weka.attributeSelection.ClassifierSubsetEval "
                          +"(RaceSearch)");
    }

    if (!(ASEval instanceof ErrorBasedMeritEvaluator)) {
      throw new Exception("Only error based subset evaluators can be used, "
                          +"eg. weka.attributeSelection.ClassifierSubsetEval "
                          +"(RaceSearch)");
    }

    m_Instances = new Instances(data);
    m_Instances.deleteWithMissingClass();
    if (m_Instances.numInstances() == 0) {
      throw new Exception("All train instances have missing class! (RaceSearch)");
    }
    if (m_rankingRequested && m_numToSelect > m_Instances.numAttributes()-1) {
      throw new Exception("More attributes requested than exist in the data "
                          +"(RaceSearch).");
    }
    m_theEvaluator = (HoldOutSubsetEvaluator)ASEval;
    m_numAttribs = m_Instances.numAttributes();
    m_classIndex = m_Instances.classIndex();

    if (m_rankingRequested) {
      m_rankedAtts = new double[m_numAttribs-1][2];
      m_rankedSoFar = 0;
    }

    if (m_xvalType == LEAVE_ONE_OUT) {
      m_numFolds = m_Instances.numInstances();
    } else {
      m_numFolds = 10;
    }

    Random random = new Random(1); // I guess this should really be a parameter?
    m_Instances.randomize(random);
    int [] bestSubset=null;

    switch (m_raceType) {
    case FORWARD_RACE:
    case BACKWARD_RACE: 
      bestSubset = hillclimbRace(m_Instances, random);
      break;
    case SCHEMATA_RACE:
      bestSubset = schemataRace(m_Instances, random);
      break;
    case RANK_RACE:
      bestSubset = rankRace(m_Instances, random);
      break;
    }

    return bestSubset;
  }

00893   public double [][] rankedAttributes() throws Exception {
    if (!m_rankingRequested) {
      throw new Exception("Need to request a ranked list of attributes "
                          +"before attributes can be ranked (RaceSearch).");
    }
    if (m_rankedAtts == null) {
      throw new Exception("Search must be performed before attributes "
                          +"can be ranked (RaceSearch).");
    }
    
    double [][] final_rank = new double [m_rankedSoFar][2];
    for (int i=0;i<m_rankedSoFar;i++) {
      final_rank[i][0] = m_rankedAtts[i][0];
      final_rank[i][1] = m_rankedAtts[i][1];
    }

    if (m_numToSelect <= 0) {
      if (m_threshold == -Double.MAX_VALUE) {
        m_calculatedNumToSelect = final_rank.length;
      } else {
        determineNumToSelectFromThreshold(final_rank);
      }
    }

    return final_rank;
  }

  private void determineNumToSelectFromThreshold(double [][] ranking) {
    int count = 0;
    for (int i = 0; i < ranking.length; i++) {
      if (ranking[i][1] > m_threshold) {
        count++;
      }
    }
    m_calculatedNumToSelect = count;
  }

  /**
   * Print an attribute set.
   */
00933   private String printSets(char [][]raceSets) {
    StringBuffer temp = new StringBuffer();
    for (int i=0;i<raceSets.length;i++) {
      for (int j=0;j<m_numAttribs;j++) {
        temp.append(raceSets[i][j]);
      }
      temp.append('\n');
    }
    return temp.toString();
  }

  /**
   * Performs a schemata race---a series of races in parallel.
   * @param data the instances to estimate accuracy over.
   * @param random a random number generator
   * @return an array of selected attribute indices.
   */
00950   private int [] schemataRace(Instances data, Random random) throws Exception {
    // # races, 2 (competitors in each race), # attributes
    char [][][] parallelRaces;
    int numRaces = m_numAttribs-1;
    Random r = new Random(42);
    int numInstances = data.numInstances();
    Instances trainCV; Instances testCV;
    Instance testInstance;

    // statistics on the racers
    Stats [][] raceStats = new Stats[numRaces][2];
    
    parallelRaces = new char [numRaces][2][m_numAttribs-1];
    char [] base = new char [m_numAttribs];
    for (int i=0;i<m_numAttribs;i++) {
      base[i] = '*';
    }

    int count=0;
    // set up initial races
    for (int i=0;i<m_numAttribs;i++) {
      if (i != m_classIndex) {
        parallelRaces[count][0] = (char [])base.clone();
        parallelRaces[count][1] = (char [])base.clone();
        parallelRaces[count][0][i] = '1';
        parallelRaces[count++][1][i] = '0';
      }
    }
    
    if (m_debug) {
      System.err.println("Initial sets:\n");
      for (int i=0;i<numRaces;i++) {
        System.err.print(printSets(parallelRaces[i])+"--------------\n");
      }
    }
    
    BitSet randomB = new BitSet(m_numAttribs);
    char [] randomBC = new char [m_numAttribs];

    // notes which bit positions have been decided
    boolean [] attributeConstraints = new boolean[m_numAttribs];
    double error;
    int evaluationCount = 0;
    raceSet: while (numRaces > 0) {
      boolean won = false;
      for (int i=0;i<numRaces;i++) {
        raceStats[i][0] = new Stats();
        raceStats[i][1] = new Stats();
      }

      // keep an eye on how many test instances have been randomly sampled
      int sampleCount = 0;
      // run the current set of races
      while (!won) {
        // generate a random binary string
        for (int i=0;i<m_numAttribs;i++) {
          if (i != m_classIndex) {
            if (!attributeConstraints[i]) {
              if (r.nextDouble() < 0.5) {
                randomB.set(i);
              } else {
                randomB.clear(i);
              }
            } else { // this position has been decided from previous races
              if (base[i] == '1') { 
                randomB.set(i);
              } else {
                randomB.clear(i);
              }
            }
          }
        }
        
        // randomly select an instance to test on
        int testIndex = Math.abs(r.nextInt() % numInstances);


        // We want to randomize the data the same way for every 
        // learning scheme.
        trainCV = data.trainCV(numInstances, testIndex, new Random (1));
        testCV = data.testCV(numInstances, testIndex);
        testInstance = testCV.instance(0);
        sampleCount++;
        /*      if (sampleCount > numInstances) {
          throw new Exception("raceSchemata: No clear winner after sampling "
                              +sampleCount+" instances.");
                              } */
        
        m_theEvaluator.buildEvaluator(trainCV);
        
        // the evaluator must retrain for every test point
        error = -((HoldOutSubsetEvaluator)m_theEvaluator).
          evaluateSubset(randomB, 
                         testInstance,
                         true);
        evaluationCount++;
        
        // see which racers match this random subset
        for (int i=0;i<m_numAttribs;i++) {
          if (randomB.get(i)) {
            randomBC[i] = '1';
          } else {
            randomBC[i] = '0';
          }
        }
        //      System.err.println("Random subset: "+(new String(randomBC)));

        checkRaces: for (int i=0;i<numRaces;i++) {
          // if a pair of racers has evaluated more than num instances
          // then bail out---unlikely that having any more atts is any
          // better than the current base set.
          if (((raceStats[i][0].count + raceStats[i][1].count) / 2) > 
              (numInstances)) {
            break raceSet;
          }
          for (int j=0;j<2;j++) {
            boolean matched = true;
            for (int k =0;k<m_numAttribs;k++) {
              if (parallelRaces[i][j][k] != '*') {
                if (parallelRaces[i][j][k] != randomBC[k]) {
                  matched = false;
                  break;
                }
              }
            }
            if (matched) { // update the stats for this racer
              //              System.err.println("Matched "+i+" "+j);
              raceStats[i][j].add(error);

                // does this race have a clear winner, meaning we can
                // terminate the whole set of parallel races?
                if (raceStats[i][0].count > m_samples &&
                    raceStats[i][1].count > m_samples) {
                  raceStats[i][0].calculateDerived();
                  raceStats[i][1].calculateDerived();
                  //              System.err.println(j+" : "+(new String(parallelRaces[i][j])));
                  //              System.err.println(raceStats[i][0]);
                  //              System.err.println(raceStats[i][1]);
                  // check the ttest
                  double prob = ttest(raceStats[i][0], raceStats[i][1]);
                  //              System.err.println("Prob :"+prob);
                  if (prob < m_sigLevel) { // stop the races we have a winner!
                    if (raceStats[i][0].mean < raceStats[i][1].mean) {
                      base = (char [])parallelRaces[i][0].clone();
                      m_bestMerit = raceStats[i][0].mean;
                      if (m_debug) {
                        System.err.println("contender 0 won ");
                      }
                    } else {
                      base = (char [])parallelRaces[i][1].clone();
                      m_bestMerit = raceStats[i][1].mean;
                      if (m_debug) {
                        System.err.println("contender 1 won");
                      }
                    }
                    if (m_debug) {
                      System.err.println((new String(parallelRaces[i][0]))
                                 +" "+(new String(parallelRaces[i][1])));
                      System.err.println("Means : "+raceStats[i][0].mean
                                         +" vs"+raceStats[i][1].mean);
                      System.err.println("Evaluations so far : "
                                         +evaluationCount);
                    }
                    won = true;
                    break checkRaces;
                  }
                }
             
            }
          }
        }
      }

      numRaces--;
      // set up the next set of races if necessary
      if (numRaces > 0 && won) {
        parallelRaces = new char [numRaces][2][m_numAttribs-1];
        raceStats = new Stats[numRaces][2];
        // update the attribute constraints
        for (int i=0;i<m_numAttribs;i++) {
          if (i != m_classIndex && !attributeConstraints[i] &&
              base[i] != '*') {
            attributeConstraints[i] = true;
            break;
          }
        }
        count=0;
        for (int i=0;i<numRaces;i++) {
          parallelRaces[i][0] = (char [])base.clone();
          parallelRaces[i][1] = (char [])base.clone();
          for (int j=count;j<m_numAttribs;j++) {
            if (j != m_classIndex && parallelRaces[i][0][j] == '*') {
              parallelRaces[i][0][j] = '1';
              parallelRaces[i][1][j] = '0';
              count = j+1;
              break;
            }
          }
        }
        
        if (m_debug) {
          System.err.println("Next sets:\n");
          for (int i=0;i<numRaces;i++) {
            System.err.print(printSets(parallelRaces[i])+"--------------\n");
          }
        }
      }
    }

    if (m_debug) {
      System.err.println("Total evaluations : "
                         +evaluationCount);
    }
    return attributeList(base);
  }

  /** 
   * t-test for unequal sample sizes and same variance. Returns probability
   * that observed difference in means is due to chance.
   */
01170   private double ttest(Stats c1, Stats c2) throws Exception {
    double n1 = c1.count; double n2 = c2.count;
    double v1 = c1.stdDev * c1.stdDev;
    double v2 = c2.stdDev * c2.stdDev;
    double av1 = c1.mean;
    double av2 = c2.mean;
    
    double df = n1 + n2 - 2;
    double cv = (((n1 - 1) * v1) + ((n2 - 1) * v2)) /df;
    double t = (av1 - av2) / Math.sqrt(cv * ((1.0 / n1) + (1.0 / n2)));
    
    return Statistics.incompleteBeta(df / 2.0, 0.5,
                                     df / (df + (t * t)));
  }
    
  /**
   * Performs a rank race---race consisting of no attributes, the top
   * ranked attribute, the top two attributes etc. The initial ranking
   * is determined by an attribute evaluator.
   * @param data the instances to estimate accuracy over
   * @param random a random number generator
   * @return an array of selected attribute indices.
   */
01193   private int [] rankRace(Instances data, Random random) throws Exception {
    char [] baseSet = new char [m_numAttribs];
    char [] bestSet;
    double bestSetError;
    for (int i=0;i<m_numAttribs;i++) {
      if (i == m_classIndex) {
        baseSet[i] = '-';
      } else {
        baseSet[i] = '0';
      }
    }

    int numCompetitors = m_numAttribs-1;
    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];
    
    if (m_ASEval instanceof AttributeEvaluator) {
      // generate the attribute ranking first
      Ranker ranker = new Ranker();
      m_ASEval.buildEvaluator(data);
      m_Ranking = ranker.search(m_ASEval,data);
    } else {
      GreedyStepwise fs = new GreedyStepwise();
      double [][]rankres; 
      fs.setGenerateRanking(true);
      ((ASEvaluation)m_ASEval).buildEvaluator(data);
      fs.search(m_ASEval, data);
      rankres = fs.rankedAttributes();
      m_Ranking = new int[rankres.length];
      for (int i=0;i<rankres.length;i++) {
        m_Ranking[i] = (int)rankres[i][0];
      }
    }

    // set up the race
    raceSets[0] = (char [])baseSet.clone();
    for (int i=0;i<m_Ranking.length;i++) {
      raceSets[i+1] = (char [])raceSets[i].clone();
      raceSets[i+1][m_Ranking[i]] = '1';
    }
    
    if (m_debug) {
      System.err.println("Initial sets:\n"+printSets(raceSets));
    }
    
    // run the race
    double [] winnerInfo = raceSubsets(raceSets, data, true, random);
    bestSetError = winnerInfo[1];
    bestSet = (char [])raceSets[(int)winnerInfo[0]].clone();
    m_bestMerit = bestSetError;
    return attributeList(bestSet);
  }
  
  /**
   * Performs a hill climbing race---all single attribute changes to a
   * base subset are raced in parallel. The winner is chosen and becomes
   * the new base subset and the process is repeated until there is no
   * improvement in error over the base subset.
   * @param data the instances to estimate accuracy over
   * @param random a random number generator
   * @return an array of selected attribute indices.
   * @throws Exception if something goes wrong
   */
01255   private int [] hillclimbRace(Instances data, Random random) throws Exception {
    double baseSetError;
    char [] baseSet = new char [m_numAttribs];

    for (int i=0;i<m_numAttribs;i++) {
      if (i != m_classIndex) {
        if (m_raceType == FORWARD_RACE) {
          baseSet[i] = '0';
        } else {
          baseSet[i] = '1';
        } 
      } else {
        baseSet[i] = '-';
      }
    }

    int numCompetitors = m_numAttribs-1;
    char [][] raceSets = new char [numCompetitors+1][m_numAttribs];

    raceSets[0] = (char [])baseSet.clone();
    int count = 1;
    // initialize each race set to 1 attribute
    for (int i=0;i<m_numAttribs;i++) {
      if (i != m_classIndex) {
        raceSets[count] = (char [])baseSet.clone();
        if (m_raceType == BACKWARD_RACE) {
          raceSets[count++][i] = '0';
        } else {
          raceSets[count++][i] = '1';
        }
      }
    }

    if (m_debug) {
      System.err.println("Initial sets:\n"+printSets(raceSets));
    }
    
    // race the initial sets (base set either no or all features)
    double [] winnerInfo = raceSubsets(raceSets, data, true, random);
    baseSetError = winnerInfo[1];
    m_bestMerit = baseSetError;
    baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();
    if (m_rankingRequested) {
      m_rankedAtts[m_rankedSoFar][0] = (int)(winnerInfo[0]-1);
      m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];
      m_rankedSoFar++;
    }

    boolean improved = true;
    int j;
    // now race until there is no improvement over the base set or only
    // one competitor remains
    while (improved) {
      // generate the next set of competitors
      numCompetitors--;
      if (numCompetitors == 0) { //race finished!
        break;
      }
      j=0;
      // +1. we'll race against the base set---might be able to bail out
      // of the race if none from the new set are statistically better
      // than the base set. Base set is stored in loc 0.
      raceSets = new char [numCompetitors+1][m_numAttribs];
      for (int i=0;i<numCompetitors+1;i++) {
        raceSets[i] = (char [])baseSet.clone();
        if (i > 0) {
          for (int k=j;k<m_numAttribs;k++) {
            if (m_raceType == 1) {
              if (k != m_classIndex && raceSets[i][k] != '0') {
                raceSets[i][k] = '0';
                j = k+1;
                break;
              }
            } else {
              if (k != m_classIndex && raceSets[i][k] != '1') {
                raceSets[i][k] = '1';
                j = k+1;
                break;
              }
            }
          }
        }
      }
      
      if (m_debug) {
        System.err.println("Next set : \n"+printSets(raceSets));
      }
      improved = false;
      winnerInfo = raceSubsets(raceSets, data, true, random);
      String bs = new String(baseSet); 
      String win = new String(raceSets[(int)winnerInfo[0]]);
      if (bs.compareTo(win) == 0) {
        // race finished
      } else {
        if (winnerInfo[1] < baseSetError || m_rankingRequested) {
          improved = true;
          baseSetError = winnerInfo[1];
          m_bestMerit = baseSetError;
          // find which att is different
          if (m_rankingRequested) {
            for (int i = 0; i < baseSet.length; i++) {
              if (win.charAt(i) != bs.charAt(i)) {
                m_rankedAtts[m_rankedSoFar][0] = i;
                m_rankedAtts[m_rankedSoFar][1] = winnerInfo[1];
                m_rankedSoFar++;
              }
            }
          }
          baseSet = (char [])raceSets[(int)winnerInfo[0]].clone();
        } else {
          // Will get here for a subset whose error is outside the delta
          // threshold but is not *significantly* worse than the base
          // subset
          //throw new Exception("RaceSearch: problem in hillClimbRace");
        }
      }
    }
    return attributeList(baseSet);
  }

  /**
   * Convert an attribute set to an array of indices
   */
01378   private int [] attributeList(char [] list) {
    int count = 0;

    for (int i=0;i<m_numAttribs;i++) {
      if (list[i] == '1') {
        count++;
      }
    }

    int [] rlist = new int[count];
    count = 0;
     for (int i=0;i<m_numAttribs;i++) {
       if (list[i] == '1') {
         rlist[count++] = i;
       }
     }

     return rlist;
  }

  /**
   * Races the leave-one-out cross validation errors of a set of
   * attribute subsets on a set of instances.
   * @param raceSets a set of attribute subset specifications
   * @param data the instances to use when cross validating
   * @param baseSetIncluded true if the first attribute set is a
   * base set generated from the previous race
   * @param random a random number generator
   * @return the index of the winning subset
   * @throws Exception if an error occurs during cross validation
   */
01409   private double [] raceSubsets(char [][]raceSets, Instances data,
                                boolean baseSetIncluded, Random random) 
    throws Exception {
    // the evaluators --- one for each subset
    ASEvaluation [] evaluators = 
      ASEvaluation.makeCopies(m_theEvaluator, raceSets.length);

    // array of subsets eliminated from the race
    boolean [] eliminated = new boolean [raceSets.length];

    // individual statistics
    Stats [] individualStats = new Stats [raceSets.length];

    // pairwise statistics
    PairedStats [][] testers = 
      new PairedStats[raceSets.length][raceSets.length];

    /** do we ignore the base set or not? */
    int startPt = m_rankingRequested ? 1 : 0;

    for (int i=0;i<raceSets.length;i++) {
      individualStats[i] = new Stats();
      for (int j=i+1;j<raceSets.length;j++) {
        testers[i][j] = new PairedStats(m_sigLevel);
      }
    }
    
    BitSet [] raceBitSets = new BitSet[raceSets.length];
    for (int i=0;i<raceSets.length;i++) {
      raceBitSets[i] = new BitSet(m_numAttribs);
      for (int j=0;j<m_numAttribs;j++) {
        if (raceSets[i][j] == '1') {
          raceBitSets[i].set(j);
        }
      }
    }

    // now loop over the data points collecting leave-one-out errors for
    // each attribute set
    Instances trainCV;
    Instances testCV;
    Instance testInst;
    double [] errors = new double [raceSets.length];
    int eliminatedCount = 0;
    int processedCount = 0;
    // if there is one set left in the race then we need to continue to
    // evaluate it for the remaining instances in order to get an
    // accurate error estimate
    processedCount = 0;
    race: for (int i=0;i<m_numFolds;i++) {

      // We want to randomize the data the same way for every 
      // learning scheme.
      trainCV = data.trainCV(m_numFolds, i, new Random (1));
      testCV = data.testCV(m_numFolds, i);
      
      // loop over the surviving attribute sets building classifiers for this
      // training set
      for (int j=startPt;j<raceSets.length;j++) {
        if (!eliminated[j]) {
          evaluators[j].buildEvaluator(trainCV);
        }
      }

      for (int z=0;z<testCV.numInstances();z++) {
        testInst = testCV.instance(z);
        processedCount++;

        // loop over surviving attribute sets computing errors for this
        // test point
        for (int zz=startPt;zz<raceSets.length;zz++) {
          if (!eliminated[zz]) {
            if (z == 0) {// first test instance---make sure classifier is built
              errors[zz] = -((HoldOutSubsetEvaluator)evaluators[zz]).
                evaluateSubset(raceBitSets[zz], 
                               testInst,
                               true);
            } else { // must be k fold rather than leave one out
              errors[zz] = -((HoldOutSubsetEvaluator)evaluators[zz]).
                evaluateSubset(raceBitSets[zz], 
                               testInst,
                               false);
            }
          }
        }

        // now update the stats
        for (int j=startPt;j<raceSets.length;j++) {
          if (!eliminated[j]) {
            individualStats[j].add(errors[j]);
            for (int k=j+1;k<raceSets.length;k++) {
              if (!eliminated[k]) {
                testers[j][k].add(errors[j], errors[k]);
              }
            }
          }
        }
      
        // test for near identical models and models that are significantly
        // worse than some other model
        if (processedCount > m_samples-1 && 
            (eliminatedCount < raceSets.length-1)) {
          for (int j=0;j<raceSets.length;j++) {
            if (!eliminated[j]) {
              for (int k=j+1;k<raceSets.length;k++) {
                if (!eliminated[k]) {
                  testers[j][k].calculateDerived();
                  // near identical ?
                  if ((testers[j][k].differencesSignificance == 0) && 
                      (Utils.eq(testers[j][k].differencesStats.mean, 0.0) ||
                      (Utils.gr(m_delta, Math.abs(testers[j][k].
                                                  differencesStats.mean))))) {
                    // if they're exactly the same and there is a base set
                    // in this race, make sure that the base set is NOT the
                    // one eliminated.
                    if (Utils.eq(testers[j][k].differencesStats.mean, 0.0)) {

                      if (baseSetIncluded) { 
                        if (j != 0) {
                          eliminated[j] = true;
                        } else {
                          eliminated[k] = true;
                        }
                        eliminatedCount++;
                      } else {
                        eliminated[j] = true;
                      }
                      if (m_debug) {
                        System.err.println("Eliminating (identical) "
                                           +j+" "+raceBitSets[j].toString()
                                           +" vs "+k+" "
                                           +raceBitSets[k].toString()
                                           +" after "
                                           +processedCount
                                           +" evaluations\n"
                                           +"\nerror "+j+" : "
                                           +testers[j][k].xStats.mean
                                           +" vs "+k+" : "
                                           +testers[j][k].yStats.mean
                                           +" diff : "
                                           +testers[j][k].differencesStats
                                           .mean);
                      }
                    } else {
                      // eliminate the one with the higer error
                      if (testers[j][k].xStats.mean > 
                          testers[j][k].yStats.mean) {
                        eliminated[j] = true;
                        eliminatedCount++;
                        if (m_debug) {
                          System.err.println("Eliminating (near identical) "
                                           +j+" "+raceBitSets[j].toString()
                                           +" vs "+k+" "
                                           +raceBitSets[k].toString()
                                           +" after "
                                           +processedCount
                                           +" evaluations\n"
                                           +"\nerror "+j+" : "
                                           +testers[j][k].xStats.mean
                                           +" vs "+k+" : "
                                           +testers[j][k].yStats.mean
                                           +" diff : "
                                           +testers[j][k].differencesStats
                                           .mean);
                        }
                        break;
                      } else {
                        eliminated[k] = true;
                        eliminatedCount++;
                        if (m_debug) {
                          System.err.println("Eliminating (near identical) "
                                           +k+" "+raceBitSets[k].toString()
                                           +" vs "+j+" "
                                           +raceBitSets[j].toString()
                                           +" after "
                                           +processedCount
                                           +" evaluations\n"
                                           +"\nerror "+k+" : "
                                           +testers[j][k].yStats.mean
                                           +" vs "+j+" : "
                                           +testers[j][k].xStats.mean
                                           +" diff : "
                                           +testers[j][k].differencesStats
                                             .mean);
                        }
                      }
                    }
                  } else {
                    // significantly worse ?
                    if (testers[j][k].differencesSignificance != 0) {
                      if (testers[j][k].differencesSignificance > 0) {
                        eliminated[j] = true;
                        eliminatedCount++;
                        if (m_debug) {
                          System.err.println("Eliminating (-worse) "
                                           +j+" "+raceBitSets[j].toString()
                                           +" vs "+k+" "
                                           +raceBitSets[k].toString()
                                           +" after "
                                           +processedCount
                                           +" evaluations"
                                           +"\nerror "+j+" : "
                                           +testers[j][k].xStats.mean
                                           +" vs "+k+" : "
                                           +testers[j][k].yStats.mean);
                        }
                        break;
                      } else {
                        eliminated[k] = true;
                        eliminatedCount++;
                        if (m_debug) {
                          System.err.println("Eliminating (worse) "
                                           +k+" "+raceBitSets[k].toString()
                                           +" vs "+j+" "
                                           +raceBitSets[j].toString()
                                           +" after "
                                           +processedCount
                                           +" evaluations"
                                           +"\nerror "+k+" : "
                                           +testers[j][k].yStats.mean
                                           +" vs "+j+" : "
                                           +testers[j][k].xStats.mean);
                        }
                      }
                    }
                  }
                }    
              }
            }
          }
        }
        // if there is a base set from the previous race and it's the
        // only remaining subset then terminate the race.
        if (eliminatedCount == raceSets.length-1 && baseSetIncluded &&
            !eliminated[0] && !m_rankingRequested) {
          break race;
        }
      }
    }

    if (m_debug) {
      System.err.println("*****eliminated count: "+eliminatedCount);
    }
    double bestError = Double.MAX_VALUE;
    int bestIndex=0;
    // return the index of the winner
    for (int i=startPt;i<raceSets.length;i++) {
      if (!eliminated[i]) {
        individualStats[i].calculateDerived();
        if (m_debug) {
          System.err.println("Remaining error: "+raceBitSets[i].toString()
                             +" "+individualStats[i].mean);
        }
        if (individualStats[i].mean < bestError) {
          bestError = individualStats[i].mean;
          bestIndex = i;
        }
      }
    }

    double [] retInfo = new double[2];
    retInfo[0] = bestIndex;
    retInfo[1] = bestError;
    
    if (m_debug) {
      System.err.print("Best set from race : ");
      
      for (int i=0;i<m_numAttribs;i++) {
        if (raceSets[bestIndex][i] == '1') {
          System.err.print('1');
        } else {
          System.err.print('0');
        }
      }
      System.err.println(" :"+bestError+" Processed : "+(processedCount)
                         +"\n"+individualStats[bestIndex].toString());
    }
    return retInfo;
  }

  /**
   * Returns a string represenation
   * 
   * @return a string representation
   */
01694   public String toString() {
    StringBuffer text = new StringBuffer();
    
    text.append("\tRaceSearch.\n\tRace type : ");
    switch (m_raceType) {
    case FORWARD_RACE: 
      text.append("forward selection race\n\tBase set : no attributes");
      break;
    case BACKWARD_RACE:
      text.append("backward elimination race\n\tBase set : all attributes");
      break;
    case SCHEMATA_RACE:
      text.append("schemata race\n\tBase set : no attributes");
      break;
    case RANK_RACE:
      text.append("rank race\n\tBase set : no attributes\n\t");
      text.append("Attribute evaluator : "
                  + getAttributeEvaluator().getClass().getName() +" ");
      if (m_ASEval instanceof OptionHandler) {
        String[] evaluatorOptions = new String[0];
        evaluatorOptions = ((OptionHandler)m_ASEval).getOptions();
        for (int i=0;i<evaluatorOptions.length;i++) {
          text.append(evaluatorOptions[i]+' ');
        }
      }
      text.append("\n");
      text.append("\tAttribute ranking : \n");
      int rlength = (int)(Math.log(m_Ranking.length) / Math.log(10) + 1);
      for (int i=0;i<m_Ranking.length;i++) {
        text.append("\t "+Utils.doubleToString((double)(m_Ranking[i]+1),
                                               rlength,0)
                    +" "+m_Instances.attribute(m_Ranking[i]).name()+'\n');
      }
      break;
    }
    text.append("\n\tCross validation mode : ");
    if (m_xvalType == TEN_FOLD) {
      text.append("10 fold");
    } else {
      text.append("Leave-one-out");
    }

    text.append("\n\tMerit of best subset found : ");
    int fieldwidth = 3;
    double precision = (m_bestMerit - (int)m_bestMerit);
    if (Math.abs(m_bestMerit) > 0) {
      fieldwidth = (int)Math.abs((Math.log(Math.abs(m_bestMerit)) / 
                                  Math.log(10)))+2;
    }
    if (Math.abs(precision) > 0) {
      precision = Math.abs((Math.log(Math.abs(precision)) / Math.log(10)))+3;
    } else {
      precision = 2;
    }

    text.append(Utils.doubleToString(Math.abs(m_bestMerit),
                                     fieldwidth+(int)precision,
                                     (int)precision)+"\n");
    return text.toString();
    
  }

  /**
   * Reset the search method.
   */
01759   protected void resetOptions () {
    m_sigLevel = 0.001;
    m_delta = 0.001;
    m_ASEval = new GainRatioAttributeEval();
    m_Ranking = null;
    m_raceType = FORWARD_RACE;
    m_debug = false;
    m_theEvaluator = null;
    m_bestMerit = -Double.MAX_VALUE;
    m_numFolds = 10;
  }
  
  /**
   * Returns the revision string.
   * 
   * @return            the revision
   */
01776   public String getRevision() {
    return RevisionUtils.extract("$Revision: 1.26 $");
  }
}

Generated by  Doxygen 1.6.0   Back to index