[jboss-svn-commits] JBL Code SVN: r19317 - in labs/jbossrules/contrib/machinelearning/decisiontree/src: dt and 3 other directories.
jboss-svn-commits at lists.jboss.org
jboss-svn-commits at lists.jboss.org
Sat Mar 29 22:50:49 EDT 2008
Author: gizil
Date: 2008-03-29 22:50:49 -0400 (Sat, 29 Mar 2008)
New Revision: 19317
Added:
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/TreeNode.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DBFactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Domain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainFactory.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FSFactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Fact.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/LiteralDomain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/NumericDomain.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/OOFactSet.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/WorkingMemory.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java
Log:
new file system
Copied: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java (from rev 19315, labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/DecisionTree.java)
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/DecisionTree.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,606 @@
+package dt;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import dt.memory.Domain;
+import dt.memory.Fact;
+import dt.tools.Util;
+
+public class DecisionTree {
+
+ public long FACTS_READ = 0;
+
+ /* set of the attributes, their types */
+ private Hashtable<String, Domain<?>> domainSet;
+
+ /* the class of the objects */
+ private String className;
+
+ /* the target attribute */
+ private String target;
+
+ private TreeNode root;
+
+ /* all attributes that can be used during classification */
+ private ArrayList<String> attrsToClassify;
+
+ public DecisionTree(String klass) {
+ this.className = klass;
+ this.domainSet = new Hashtable<String, Domain<?>>();
+ this.attrsToClassify = new ArrayList<String>();
+ }
+
+ private Object getConsensus(List<Fact> facts) {
+ List<?> targetValues = getPossibleValues(this.target);
+ Hashtable<Object, Integer> facts_in_class = getStatistics(facts, target);
+ // , targetValues
+
+ int winner_vote = 0;
+ Object winner = null;
+ for (Object key : targetValues) {
+
+ int num_in_class = facts_in_class.get(key).intValue();
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+ return winner;
+ }
+
+ // *OPT* public double calculateGain(List<FactSet> facts, String
+ // attributeName) {
+ // I dont use
+ public double calculateGain(List<Fact> facts,
+ Hashtable<Object, Integer> facts_in_class, String attributeName) {
+
+ return getInformation(facts_in_class, facts.size())
+ - getGain(facts, attributeName);
+ }
+
+ // *OPT* public double getGain(List<FactSet> facts, String attributeToSplit)
+ // {
+ public double getGain(List<Fact> facts, String attributeToSplit) {
+ System.out.println("What is the attributeToSplit? " + attributeToSplit);
+ List<?> attributeValues = getPossibleValues(attributeToSplit);
+
+ String attr_sum = "sum";
+
+ List<?> targetValues = getPossibleValues(getTarget());
+ // Hashtable<Object, Integer> facts_in_class = new Hashtable<Object,
+ // Integer>(targetValues.size());
+
+ /* initialize the hashtable */
+ Hashtable<Object, Hashtable<Object, Integer>> facts_of_attribute = new Hashtable<Object, Hashtable<Object, Integer>>(
+ attributeValues.size());
+ for (Object attr : attributeValues) {
+ facts_of_attribute.put(attr, new Hashtable<Object, Integer>(
+ targetValues.size() + 1));
+ for (Object t : targetValues) {
+ facts_of_attribute.get(attr).put(t, 0);
+ }
+ facts_of_attribute.get(attr).put(attr_sum, 0);
+ }
+
+ int total_num_facts = 0;
+ // *OPT* for (FactSet fs: facts) {
+ // *OPT* for (Fact f: fs.getFacts()) {
+ for (Fact f : facts) {
+ total_num_facts++;
+ Object targetKey = f.getFieldValue(target);
+ // System.out.println("My key: "+ targetKey.toString());
+
+ Object attr_key = f.getFieldValue(attributeToSplit);
+ int num = facts_of_attribute.get(attr_key).get(targetKey)
+ .intValue();
+ num++;
+ facts_of_attribute.get(attr_key).put(targetKey, num);
+
+ int total_num = facts_of_attribute.get(attr_key).get(attr_sum)
+ .intValue();
+ total_num++;
+ facts_of_attribute.get(attr_key).put(attr_sum, total_num);
+
+ // System.out.println("getGain of "+attributeToSplit+
+ // ": total_num "+ facts_of_attribute.get(attr_key).get(attr_sum) +
+ // " and "+facts_of_attribute.get(attr_key).get(targetKey) +
+ // " at attr=" + attr_key + " of t:"+targetKey);
+ }
+ FACTS_READ += facts.size();
+ // *OPT* }
+ // *OPT* }
+ double sum = getAttrInformation(facts_of_attribute, total_num_facts);
+// for (Object attr : attributeValues) {
+// int total_num_attr = facts_of_attribute.get(attr).get(attr_sum)
+// .intValue();
+//
+// double sum_attr = 0.0;
+// if (total_num_attr > 0)
+// for (Object t : targetValues) {
+// int num_attr_target = facts_of_attribute.get(attr).get(t)
+// .intValue();
+//
+// double prob = (double) num_attr_target / total_num_attr;
+// // System.out.println("prob "+ prob);
+// sum_attr += (prob == 0.0) ? 0.0 : (-1 * prob * Util
+// .log2(prob));
+// }
+// sum += ((double) total_num_attr / (double) total_num_facts)
+// * sum_attr;
+// }
+ return sum;
+ }
+
+ /*
+ * GLOBAL DISCRETIZATION a a b a b b b b b (target) 1 2 3 4 5 6 7 8 9 (attr
+ * c) 0 0 0 0 1 1 1 1 1 "<5", ">=5" "true" "false"
+ */
+ /*
+ * The algorithm is basically (per attribute):
+ *
+ * 1. Sort the instances on the attribute of interest
+ *
+ * 2. Look for potential cut-points. Cut points are points in the sorted
+ * list above where the class labels change. Eg. if I had five instances
+ * with values for the attribute of interest and labels (1.0,A), (1.4,A),
+ * (1.7, A), (2.0,B), (3.0, B), (7.0, A), then there are only two cutpoints
+ * of interest: 1.85 and 5 (mid-way between the points where the classes
+ * change from A to B or vice versa).
+ *
+ * 3. Evaluate your favourite disparity measure (info gain, gain ratio, gini
+ * coefficient, chi-squared test) on each of the cutpoints, and choose the
+ * one with the maximum value (I think Fayyad and Irani used info gain).
+ *
+ * 4. Repeat recursively in both subsets (the ones less than and greater
+ * than the cutpoint) until either (a) the subset is pure i.e. only contains
+ * instances of a single class or (b) some stopping criterion is reached. I
+ * can't remember what stopping criteria they used.
+ */
+
+ // *OPT* public double getGain(List<FactSet> facts, String attributeToSplit)
+ public double getContinuousGain(List<Fact> facts,
+ List<Integer> split_facts, int begin_index, int end_index,
+ Hashtable<Object, Integer> facts_in_class, String attributeToSplit) {
+
+ System.out.println("What is the attributeToSplit? " + attributeToSplit);
+
+ if (facts.size() <= 1) {
+ System.out
+ .println("The size of the fact list is 0 oups??? exiting....");
+ System.exit(0);
+ }
+ if (split_facts.size() < 1) {
+ System.out
+ .println("The size of the splits is 0 oups??? exiting....");
+ System.exit(0);
+ }
+
+ String targetAttr = getTarget();
+ List<?> targetValues = getPossibleValues(getTarget());
+ List<?> boundaries = getPossibleValues(attributeToSplit);
+
+ // Fact split_point = facts.get(facts.size() / 2);
+ // a b a a b
+ // 1 2 3 4 5
+ // 1.5
+ // 2.5
+ // 3.5
+ // 0.00001 0.00002 1 100
+ // 0.000015
+
+ // < 50 >
+ // 25 75
+ // HashTable<Boolean>
+
+ String attr_sum = Util.getSum();
+
+
+
+ /* initialize the hashtable */
+ Hashtable<Object, Hashtable<Object, Integer>> facts_of_attribute =
+ new Hashtable<Object, Hashtable<Object, Integer>>(Util.getDividingSize());
+ // attr_0 bhas nothing everything inside attr_1
+ Object cut_point; //attr_0
+ Object last_poit = facts.get(facts.size()-1).getFieldValue(attributeToSplit);
+ for (int i = 0; i < 2; i++) {
+ facts_of_attribute.put(Integer.valueOf(i),
+ new Hashtable<Object, Integer>(targetValues.size() + 1));
+ //Hashtable<Object, Integer> facts_in_class
+ if (i == 1) {
+ for (Object t : targetValues) {
+ facts_of_attribute.get(Integer.valueOf(i)).put(t,
+ facts_in_class.get(t));
+ }
+ facts_of_attribute.get(Integer.valueOf(i)).put(attr_sum,
+ facts.size());
+ } else {
+ for (Object t : targetValues) {
+ facts_of_attribute.get(Integer.valueOf(i)).put(t, 0);
+ }
+ facts_of_attribute.get(Integer.valueOf(i)).put(attr_sum, 0);
+ }
+ }
+
+ /*
+ * 2. Look for potential cut-points.
+ */
+
+ int split_index = 1;
+ int last_index = facts.size();
+ Iterator<Fact> f_ite = facts.iterator();
+ Fact f1 = f_ite.next();
+ while (f_ite.hasNext()) {
+
+ Fact f2 = f_ite.next();
+
+ // everytime it is not a split change the place in the distribution
+
+ Object targetKey = f2.getFieldValue(target);
+
+ // System.out.println("My key: "+ targetKey.toString());
+
+ //for (Object attr_key : attr_values)
+
+ Object attr_key_1 = Integer.valueOf(0);
+ int num_1 = facts_of_attribute.get(attr_key_1).get(targetKey).intValue();
+ num_1++;
+ facts_of_attribute.get(attr_key_1).put(targetKey, num_1);
+
+ int total_num_1 = facts_of_attribute.get(attr_key_1).get(attr_sum).intValue();
+ total_num_1++;
+ facts_of_attribute.get(attr_key_1).put(attr_sum, total_num_1);
+
+ Object attr_key_2= Integer.valueOf(1);
+ int num_2 = facts_of_attribute.get(attr_key_2).get(targetKey).intValue();
+ num_2--;
+ facts_of_attribute.get(attr_key_2).put(targetKey, num_2);
+
+ int total_num_2 = facts_of_attribute.get(attr_key_2).get(attr_sum).intValue();
+ total_num_2++;
+ facts_of_attribute.get(attr_key_2).put(attr_sum, total_num_2);
+
+ /*
+ * 2.1 Cut points are points in the sorted list above where the class labels change.
+ * Eg. if I had five instances with values for the attribute of interest and labels
+ * (1.0,A), (1.4,A), (1.7, A), (2.0,B), (3.0, B), (7.0, A), then there are only
+ * two cutpoints of interest: 1.85 and 5 (mid-way between the points
+ * where the classes change from A to B or vice versa).
+ */
+ if (f1.getFieldValue(targetAttr) != f2.getFieldValue(targetAttr)) {
+ // the cut point
+ Number cp_i = (Number) f1.getFieldValue(attributeToSplit);
+ Number cp_i_next = (Number) f2.getFieldValue(attributeToSplit);
+
+ cut_point = (cp_i.doubleValue() + cp_i_next
+ .doubleValue()) / 2;
+
+ /*
+ * 3. Evaluate your favourite disparity measure
+ * (info gain, gain ratio, gini coefficient, chi-squared test) on the cut point
+ * and calculate its gain
+ */
+ double sum = getAttrInformation(facts_of_attribute, facts.size());
+//
+// double sum = 0.0;
+// // for (Object attr : attributeValues) {
+// for (int i = 0; i < 2; i++) {
+//
+// int total_num_attr = facts_of_attribute.get(Integer.valueOf(i)).get(attr_sum).intValue();
+//
+// double sum_attr = 0.0;
+// if (total_num_attr > 0)
+// for (Object t : targetValues) {
+// int num_attr_target = facts_of_attribute.get(Integer.valueOf(i)).get(t).intValue();
+//
+// double prob = (double) num_attr_target / total_num_attr;
+// // System.out.println("prob "+ prob);
+// sum_attr += (prob == 0.0) ? 0.0 : (-1 * prob * Util.log2(prob));
+// }
+// sum += ((double) total_num_attr / (double) facts.size())* sum_attr;
+// }
+
+
+ } else {}
+
+// getContinuousGain(facts, split_facts.subList(0,
+// split_index+1), 0, split_index+1,
+// facts_in_class1, attributeToSplit);
+//
+// getContinuousGain(facts, split_facts.subList(split_index+1,
+// last_index), split_index+1, last_index,
+// facts_in_class2, attributeToSplit);
+
+ f1 = f2;
+ split_index ++;
+ }
+
+ return 1.0;
+ }
+
+ public double getContinuousGain_(List<Fact> facts,
+ List<Integer> split_facts, int begin_index, int end_index,
+ Hashtable<Object, Integer> facts_in_class, String attributeToSplit) {
+ System.out.println("What is the attributeToSplit? " + attributeToSplit);
+
+ if (facts.size() <= 1) {
+ System.out
+ .println("The size of the fact list is 0 oups??? exiting....");
+ System.exit(0);
+ }
+ if (split_facts.size() < 1) {
+ System.out
+ .println("The size of the splits is 0 oups??? exiting....");
+ System.exit(0);
+ }
+
+ String targetAttr = getTarget();
+ List<?> boundaries = getPossibleValues(attributeToSplit);
+
+ // Fact split_point = facts.get(facts.size() / 2);
+ // a b a a b
+ // 1 2 3 4 5
+ // 1.5
+ // 2.5
+ // 3.5
+ // 0.00001 0.00002 1 100
+ // 0.000015
+
+ // < 50 >
+ // 25 75
+ // HashTable<Boolean>
+
+ String attr_sum = "sum";
+
+ /*
+ * 2. Look for potential cut-points. Cut points are points in the sorted
+ * list above where the class labels change. Eg. if I had five instances
+ * with values for the attribute of interest and labels (1.0,A),
+ * (1.4,A), (1.7, A), (2.0,B), (3.0, B), (7.0, A), then there are only
+ * two cutpoints of interest: 1.85 and 5 (mid-way between the points
+ * where the classes change from A to B or vice versa).
+ */
+
+ /* initialize the hashtable */
+ // Hashtable<Object, Hashtable<Object, Integer>> facts_of_attribute =
+ // new Hashtable<Object, Hashtable<Object,
+ // Integer>>(Util.getDividingSize());
+ // for (Object attr : attributeValues) {
+ // facts_of_attribute.put(attr, new Hashtable<Object, Integer>(
+ // targetValues.size() + 1));
+ // for (Object t : targetValues) {
+ // facts_of_attribute.get(attr).put(t, 0);
+ // }
+ // facts_of_attribute.get(attr).put(attr_sum, 0);
+ // }
+ //
+ int split_index = 0;
+ Iterator<Integer> split_ite = split_facts.iterator();
+ int f1_index = split_ite.next().intValue();
+ Fact f1 = facts.get(f1_index);
+ while (split_ite.hasNext()) {
+ int f2_index = f1_index + 1;
+ Fact f2 = facts.get(f2_index);
+
+ if (f1.getFieldValue(targetAttr) == f2.getFieldValue(targetAttr)) {
+ // the cut point
+ System.out
+ .println("Bok i have splited what the fuck is happening f1:"
+ + f1 + " f2:" + f2);
+ System.exit(0);
+
+ }
+ Number cp_i = (Number) f1.getFieldValue(attributeToSplit);
+ Number cp_i_next = (Number) f2.getFieldValue(attributeToSplit);
+
+ Object cut_point = (cp_i.doubleValue() + cp_i_next.doubleValue()) / 2;
+ // calculate the gain of the cut point
+
+ /*
+ * 3. Evaluate your favourite disparity measure (info gain, gain
+ * ratio, gini coefficient, chi-squared test) on each of the
+ * cutpoints, and choose the one with the maximum value (I think
+ * Fayyad and Irani used info gain).
+ */
+ // double sum = 0.0;
+ // //for (Object attr : attributeValues) {
+ // for (int i = 1; i<2; i++) {
+ //
+ // int total_num_attr =
+ // facts_of_attribute.get(attr).get(attr_sum).intValue();
+ //
+ // double sum_attr = 0.0;
+ // if (total_num_attr > 0)
+ // for (Object t : targetValues) {
+ // int num_attr_target =
+ // facts_of_attribute.get(attr).get(t).intValue();
+ //
+ // double prob = (double) num_attr_target/ total_num_attr;
+ // // System.out.println("prob "+ prob);
+ // sum_attr += (prob == 0.0) ? 0.0 : (-1 * prob * Util.log2(prob));
+ // }
+ // sum += ((double) total_num_attr / (double) total_num_facts)*
+ // sum_attr;
+ // }
+ // getContinuousGain(facts, split_facts.subList(fromIndex,
+ // centerIndex), begin_index, middle_index,
+ // facts_in_class1, attributeToSplit);
+ //
+ // getContinuousGain(facts, split_facts.subList(centerIndex,
+ // toIndex), middle_index+1, end_index,
+ // facts_in_class2, attributeToSplit);
+ f1_index = split_ite.next().intValue();
+ f1 = facts.get(f1_index);
+ }
+
+ List<?> targetValues = getPossibleValues(target);
+ // Hashtable<Object, Integer> facts_in_class = new Hashtable<Object,
+ // Integer>(targetValues.size());
+
+ return 1.0;
+ }
+
+ // *OPT* public double getInformation(List<FactSet> facts) {
+ public Hashtable<Object, Integer> getStatistics(List<Fact> facts, String target) {
+
+ List<?> targetValues = getPossibleValues(this.target);
+ Hashtable<Object, Integer> facts_in_class = new Hashtable<Object, Integer>(
+ targetValues.size());
+
+ for (Object t : targetValues) {
+ facts_in_class.put(t, 0);
+ }
+
+ int total_num_facts = 0;
+ // *OPT* for (FactSet fs: facts) {
+ // *OPT* for (Fact f: fs.getFacts()) {
+ for (Fact f : facts) {
+ total_num_facts++;
+ Object key = f.getFieldValue(target);
+ // System.out.println("My key: "+ key.toString());
+ facts_in_class.put(key, facts_in_class.get(key).intValue() + 1); // bocuk
+ // kafa
+ // :P
+ }
+ FACTS_READ += facts.size();
+ // *OPT* }
+ // *OPT* }
+ return facts_in_class;
+ }
+
+ // *OPT* public double getInformation(List<FactSet> facts) {
+ /**
+ * it returns the information value of facts entropy that characterizes the
+ * (im)purity of an arbitrary collection of examples
+ *
+ * @param facts
+ * list of facts
+ */
+ public double getInformation_old(List<Fact> facts) {
+
+ List<?> targetValues = getPossibleValues(this.target);
+ Hashtable<Object, Integer> facts_in_class = getStatistics(facts,
+ getTarget()); // , targetValues)
+ // Hashtable<Object, Integer> facts_in_class = getStatistics(facts,
+ // getTarget(), targetValues);
+ int total_num_facts = facts.size();
+ double sum = 0;
+ for (Object key : targetValues) {
+ int num_in_class = facts_in_class.get(key).intValue();
+ // System.out.println("num_in_class : "+ num_in_class + " key "+ key
+ // + " and the total num "+ total_num_facts);
+ double prob = (double) num_in_class / (double) total_num_facts;
+
+ // double log2= Util.log2(prob);
+ // double plog2p= prob*log2;
+ sum += (prob == 0.0) ? 0.0 : -1 * prob * Util.log2(prob);
+ // System.out.println("prob "+ prob +" and the plog(p)"+plog2p+"
+ // where the sum: "+sum);
+ }
+ return sum;
+ }
+
+ public double getAttrInformation( Hashtable<Object, Hashtable<Object, Integer>> facts_of_attribute, int fact_size) {
+
+ Collection<Object> attributeValues = facts_of_attribute.keySet();
+ String attr_sum = Util.getSum();
+ double sum = 0.0;
+ for (Object attr : attributeValues) {
+ int total_num_attr = facts_of_attribute.get(attr).get(attr_sum).intValue();
+ //double sum_attr = 0.0;
+ if (total_num_attr > 0) {
+ sum += ((double) total_num_attr / (double) fact_size)*
+ getInformation(facts_of_attribute.get(attr), total_num_attr);
+ }
+ }
+ return sum;
+ }
+
+ public double getInformation(Hashtable<Object, Integer> facts_in_class, int total_num_facts) {
+
+ // List<?> targetValues = getPossibleValues(this.target);
+ // Hashtable<Object, Integer> facts_in_class = getStatistics(facts,
+ // getTarget()); //, targetValues);
+ Collection<Object> targetValues = facts_in_class.keySet();
+ double sum = 0;
+ for (Object key : targetValues) {
+ int num_in_class = facts_in_class.get(key).intValue();
+ // System.out.println("num_in_class : "+ num_in_class + " key "+ key
+ // + " and the total num "+ total_num_facts);
+ double prob = (double) num_in_class / (double) total_num_facts;
+
+ // double log2= Util.log2(prob);
+ // double plog2p= prob*log2;
+ sum += (prob == 0.0) ? 0.0 : -1 * prob * Util.log2(prob);
+ // System.out.println("prob "+ prob +" and the plog(p)"+plog2p+"
+ // where the sum: "+sum);
+ }
+ return sum;
+ }
+
+ public void setTarget(String targetField) {
+ target = targetField;
+ attrsToClassify.remove(target);
+ }
+
+ public void addDomain(Domain<?> domain) {
+ domainSet.put(domain.getName(), domain);
+ if (!domain.getName().equals(this.target))
+ attrsToClassify.add(domain.getName());
+
+ }
+
+ public List<?> getPossibleValues(String fieldName) {
+ return domainSet.get(fieldName).getValues();
+ }
+
+ public List<String> getAttributes() {
+ return attrsToClassify;
+ }
+
+ public String getTarget() {
+ return target;
+ }
+
+ public String getName() {
+ return className;
+ }
+
+ public Domain<?> getDomain(String key) {
+ return domainSet.get(key);
+ }
+
+ public TreeNode getRoot() {
+ return (root);
+
+ }
+
+ public void setRoot(TreeNode root) {
+ this.root = root;
+
+ }
+
+ public long getNumRead() {
+ return FACTS_READ;
+ }
+
+ @Override
+ public String toString() {
+ return "Facts scanned " + FACTS_READ + "\n" + root.toString();
+ }
+
+ /*
+ * **OPT int getTotalSize(List<FactSet> facts) {
+ *
+ * int num = 0; for(FactSet fs : facts) { num += fs.getSize(); }
+ *
+ * return num; }
+ */
+
+}
Copied: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java (from rev 19045, labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/LeafNode.java)
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/LeafNode.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,46 @@
+package dt;
+
+import dt.memory.Domain;
+import dt.tools.Util;
+
+
+public class LeafNode extends TreeNode {
+ //represents leaf nodes with the target value
+ private Object targetValue;
+ private double rank;
+
+ public LeafNode(Domain<?> targetDomain, Object value){
+ super(targetDomain);
+ this.targetValue = value;
+ }
+
+ public void addNode(Object attributeValue, TreeNode node) {
+ throw new RuntimeException("cannot add Node to a leaf node");
+ }
+
+ public void addLeaf(Object attributeValue, String target, Boolean targetValue) {
+ throw new RuntimeException("cannot add Leaf to a final node");
+ }
+
+ public Object getValue() {
+ return targetValue;
+ }
+
+ public double getRank() {
+ return rank;
+ }
+
+ public void setRank(double rank) {
+ this.rank = rank;
+ }
+
+ public String toString(){
+ return "DECISION -> " + targetValue.toString();
+ }
+
+ public String toString(int depth, StringBuffer buf) {
+ buf.append(Util.ntimes("\t",depth+1));
+ buf.append("DECISION -> " +targetValue.toString()+"\n");
+ return buf.toString();
+ }
+}
Copied: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/TreeNode.java (from rev 19045, labs/jbossrules/contrib/machinelearning/decisiontree/src/id3/TreeNode.java)
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/TreeNode.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/TreeNode.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,61 @@
+package dt;
+import java.util.Hashtable;
+
+import dt.memory.Domain;
+import dt.tools.Util;
+
+
+public class TreeNode {
+
+ private Domain<?> domain;
+ private Hashtable<Object, TreeNode> children;
+
+
+ public TreeNode(Domain<?> domain)
+ {
+ this.domain = domain;
+ this.children = new Hashtable<Object, TreeNode>();
+ }
+
+
+ public void addNode(Object attributeValue, TreeNode node) {
+ children.put(attributeValue, node);
+ }
+
+ public Domain<?> getDomain() {
+ return domain;
+ }
+
+ public void setDomain(Domain<?> domain) {
+ this.domain = domain;
+ }
+
+ public Hashtable<Object, TreeNode> getChildren() {
+ return children;
+ }
+
+ public void setChildren(Hashtable<Object, TreeNode> children) {
+ this.children = children;
+ }
+
+ public String toString() {
+ return toString(1, new StringBuffer());
+ }
+
+ public String toString(int depth, StringBuffer buf) {
+ if (domain != null) {
+ buf.append(Util.ntimes("\t", depth));
+ buf.append(Util.ntimes("***",1));
+ buf.append( domain.getName() + " \n");
+ for (Object attributeValue : children.keySet()) {
+ buf.append(Util.ntimes("\t", depth + 1));
+ buf.append("+" + attributeValue );
+ buf.append("\n");
+ TreeNode child = children.get(attributeValue);
+ buf.append(child.toString(depth + 1, new StringBuffer()));
+ }
+ }
+ return buf.toString();
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilderMT.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,328 @@
+package dt.builder;
+
+
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+
+import dt.DecisionTree;
+import dt.LeafNode;
+import dt.TreeNode;
+import dt.memory.Domain;
+import dt.memory.Fact;
+import dt.memory.FactSet;
+import dt.memory.OOFactSet;
+import dt.memory.WorkingMemory;
+import dt.tools.Util;
+
+public class DecisionTreeBuilderMT {
+
+ class MyThread extends Thread {
+ DecisionTreeBuilderMT builder;
+ DecisionTree dt;
+ List<Fact> facts;
+ List<String> attributeNames;
+ TreeNode currentNode = null;
+ Object value = null;
+ TreeNode result = null;
+ @Override
+ public void run() {
+ result = builder.id3(dt, facts, attributeNames);
+ currentNode.addNode(value, result);
+ }
+ }
+
+ MyThread helper;
+ private int FUNC_CALL = 0;
+ private int num_fact_processed = 0;
+
+ /*
+ * treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+ */
+
+ public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass.getName());
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ Iterator<FactSet> it_fs= wm.getFactsets();
+ while (it_fs.hasNext()) {
+ FactSet fs = it_fs.next();
+ if (fs instanceof OOFactSet) {
+ if (klass.isAssignableFrom(((OOFactSet)fs).getFactClass())) {
+// **OPT facts.add(fs);
+ ((OOFactSet)fs).assignTo(facts); // adding all facts of fs to "facts"
+
+ if (klass == ((OOFactSet)fs).getFactClass()) {
+ klass_fs = fs;
+ }
+ }
+ } else if (klass.getName()== fs.getClassName()) {
+
+ }
+
+ }
+ dt.FACTS_READ += facts.size();
+
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ helper = new MyThread();
+// System.out.println("IS ALIVE"+helper.isAlive());
+ TreeNode root = id3(dt, facts, attrs);
+ try {
+ helper.join();
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+ public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass);
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ Iterator<FactSet> it_fs= wm.getFactsets();
+ while (it_fs.hasNext()) {
+ FactSet fs = it_fs.next();
+ if (klass == fs.getClassName()) {
+// **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts"
+
+ klass_fs = fs;
+ break;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ System.out.println("Bok degil "+ attr);
+ if (attr =="aratio") {
+ System.out.println("Bok");
+ System.exit(0);
+ }
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ helper = new MyThread();
+ //System.out.println("IS ALIVE"+helper.isAlive());
+ TreeNode root = id3(dt, facts, attrs);
+ try {
+ helper.join();
+ } catch (InterruptedException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+ /*
+ function ID3
+ Input: (R: a set of non-target attributes,
+ C: the target attribute,
+ S: a training set) returns a decision tree;
+ begin
+ If S is empty, return a single node with
+ value Failure;
+ If S consists of records all with the same
+ value for the target attribute,
+ return a single leaf node with that value;
+ If R is empty,
+ then return a single node with the value of the most frequent of the values of the target attribute
+ that are found in records of S; [in that case there may be be errors,
+ examples that will be improperly classified];
+ Let A be the attribute with largest
+ Gain(A,S) among attributes in R;
+ Let {aj| j=1,2, .., m} be the values of attribute A;
+ Let {Sj| j=1,2, .., m} be the subsets of S consisting respectively of records with value aj for A;
+ Return a tree with root labeled A and arcs labeled a1, a2, .., am going respectively
+ to the trees (ID3(R-{A}, C, S1), ID3(R-{A}, C, S2),.....,ID3(R-{A}, C, Sm);
+ Recursively apply ID3 to subsets {Sj| j=1,2, .., m} until they are empty
+ end
+
+
+ */
+ //*OPT* private TreeNode decisionTreeLearning(List<FactSet> facts,
+ //*OPT* List<String> attributeNames) {
+ //*OPT* private TreeNode decisionTreeLearning(List<FactSet> facts,
+ //*OPT* List<String> attributeNames) {
+ private TreeNode id3(DecisionTree dt, List<Fact> facts, List<String> attributeNames) {
+
+ FUNC_CALL ++;
+ if (facts.size() == 0) {
+ throw new RuntimeException("Nothing to classify, factlist is empty");
+ }
+ /* let's get the statistics of the results */
+ List<?> targetValues = dt.getPossibleValues(dt.getTarget());
+ Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt.getTarget());//,targetValues
+
+ int winner_vote = 0;
+ int num_supporters = 0;
+ Object winner = null;
+ for (Object key: targetValues) {
+
+ int num_in_class = stats.get(key).intValue();
+ if (num_in_class>0)
+ num_supporters ++;
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+
+ /* if all elements are classified to the same value */
+ if (num_supporters == 1) {
+ //*OPT* return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
+ LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+ return classifiedNode;
+ }
+
+ /* if there is no attribute left in order to continue */
+ if (attributeNames.size() == 0) {
+ /* an heuristic of the leaf classification*/
+ LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+ return noAttributeLeftNode;
+ }
+
+ /* id3 starts */
+ String chosenAttribute = attributeWithGreatestGain(dt, facts, stats, attributeNames);
+
+ System.out.println(Util.ntimes("*", 20)+" 1st best attr: "+ chosenAttribute);
+
+ TreeNode currentNode = new TreeNode(dt.getDomain(chosenAttribute));
+ //ConstantDecisionTree m = majorityValue(ds);
+ /* the majority */
+
+ List<?> attributeValues = dt.getPossibleValues(chosenAttribute);
+ Hashtable<Object, List<Fact> > filtered_facts = splitFacts(facts, chosenAttribute, attributeValues);
+ dt.FACTS_READ += facts.size();
+
+
+// if (FUNC_CALL ==5) {
+// System.out.println("FUNC_CALL:" +FUNC_CALL);
+// System.exit(0);
+// }
+ for (int i = 0; i < attributeValues.size(); i++) {
+ /* split the last two class at the same time */
+ Object value = attributeValues.get(i);
+
+ ArrayList<String> attributeNames_copy = new ArrayList<String>(attributeNames);
+ attributeNames_copy.remove(chosenAttribute);
+
+ if (filtered_facts.get(value).isEmpty()) {
+ /* majority !!!! */
+ LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ majorityNode.setRank(0.0);
+ currentNode.addNode(value, majorityNode);
+ } else {
+// TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
+// currentNode.addNode(value, newNode);
+ if (helper.isAlive()) {
+ TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
+ currentNode.addNode(value, newNode);
+ }
+ else {
+ helper.attributeNames = attributeNames_copy;
+ helper.builder = this;
+ helper.dt = dt;
+ helper.facts = filtered_facts.get(value);
+ helper.value = value;
+ helper.currentNode = currentNode;
+ helper.start();
+ System.out.println("helper thread launched");
+ }
+ }
+ }
+
+ return currentNode;
+ }
+
+ //String chooseAttribute(List<FactSet> facts, List<String> attrs) {
+ public String attributeWithGreatestGain(DecisionTree dt, List<Fact> facts, Hashtable<Object, Integer> facts_in_class, List<String> attrs) {
+
+ double dt_info = dt.getInformation(facts_in_class, facts.size());
+ double greatestGain = 0.0;
+ String attributeWithGreatestGain = attrs.get(0);
+ for (String attr : attrs) {
+ double gain = dt_info - dt.getGain(facts, attr);
+ System.out.println("Attribute: "+attr +" the gain: "+gain);
+ if (gain > greatestGain) {
+ greatestGain = gain;
+ attributeWithGreatestGain = attr;
+ }
+ }
+
+ return attributeWithGreatestGain;
+ }
+
+ public Hashtable<Object, List<Fact> > splitFacts(List<Fact> facts, String attributeName,
+ List<?> attributeValues) {
+ Hashtable<Object, List<Fact> > factLists = new Hashtable<Object, List<Fact> >(attributeValues.size());
+ for (Object v: attributeValues) {
+ factLists.put(v, new ArrayList<Fact>());
+ }
+ for (Fact f : facts) {
+ factLists.get(f.getFieldValue(attributeName)).add(f);
+ }
+ return factLists;
+ }
+
+ public void testEntropy(DecisionTree dt, List<Fact> facts) {
+ Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt.getTarget());
+
+ double initial_info = dt.getInformation(stats, facts.size()); //entropy value
+
+ System.out.println("initial_information: "+ initial_info);
+
+ String first_attr = attributeWithGreatestGain(dt, facts, stats, dt.getAttributes());
+
+ System.out.println("best attr: "+ first_attr);
+ }
+
+ public int getNumCall() {
+ return FUNC_CALL;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/IDTreeBuilder.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,421 @@
+package dt.builder;
+
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+
+import dt.DecisionTree;
+import dt.LeafNode;
+import dt.TreeNode;
+
+import dt.memory.WorkingMemory;
+import dt.memory.Fact;
+import dt.memory.FactSet;
+import dt.memory.OOFactSet;
+import dt.memory.Domain;
+import dt.tools.Util;
+
+public class IDTreeBuilder implements DecisionTreeBuilder {
+
+ class MyThread extends Thread {
+ IDTreeBuilder builder;
+ DecisionTree dt;
+ List<Fact> facts;
+ List<String> attributeNames;
+ TreeNode currentNode = null;
+ Object value = null;
+ TreeNode result = null;
+ @Override
+ public void run() {
+ result = builder.id3(dt, facts, attributeNames);
+ currentNode.addNode(value, result);
+ }
+ }
+
+ MyThread helper;
+ private int FUNC_CALL = 0;
+ private int num_fact_processed = 0;
+
+ /*
+ * treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+ */
+
+ public DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass.getName());
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ Iterator<FactSet> it_fs= wm.getFactsets();
+ while (it_fs.hasNext()) {
+ FactSet fs = it_fs.next();
+ if (fs instanceof OOFactSet) {
+ if (klass.isAssignableFrom(((OOFactSet) fs).getFactClass())) {
+// **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts
+ }
+ }
+ if (klass.getName() == fs.getClassName()) {
+ klass_fs = fs;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ TreeNode root = id3(dt, facts, attrs);
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+
+ public DecisionTree build(WorkingMemory wm, String klass, String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass);
+// **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ Iterator<FactSet> it_fs= wm.getFactsets();
+ while (it_fs.hasNext()) {
+ FactSet fs = it_fs.next();
+ if (klass == fs.getClassName()) {
+// **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts"
+
+ klass_fs = fs;
+ break;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr: workingAttributes) {
+ System.out.println("Bok degil "+ attr);
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d: klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ TreeNode root = id3(dt, facts, attrs);
+ dt.setRoot(root);
+
+ return dt;
+ }
+ //*OPT* private TreeNode decisionTreeLearning(List<FactSet> facts,
+ //*OPT* List<String> attributeNames) {
+ private TreeNode id3(DecisionTree dt, List<Fact> facts, List<String> attributeNames) {
+
+ FUNC_CALL ++;
+ if (facts.size() == 0) {
+ throw new RuntimeException("Nothing to classify, factlist is empty");
+ }
+ /* let's get the statistics of the results */
+ //List<?> targetValues = dt.getPossibleValues(dt.getTarget());
+ Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt.getTarget());//targetValues
+ Collection<Object> targetValues = stats.keySet();
+ int winner_vote = 0;
+ int num_supporters = 0;
+ Object winner = null;
+ for (Object key: targetValues) {
+
+ int num_in_class = stats.get(key).intValue();
+ if (num_in_class>0)
+ num_supporters ++;
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+
+ /* if all elements are classified to the same value */
+ if (num_supporters == 1) {
+ //*OPT* return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
+ LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+ return classifiedNode;
+ }
+
+ /* if there is no attribute left in order to continue */
+ if (attributeNames.size() == 0) {
+ /* an heuristic of the leaf classification*/
+ LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+ return noAttributeLeftNode;
+ }
+
+ /* id3 starts */
+ String chosenAttribute = attributeWithGreatestGain_discrete(dt, facts, stats, attributeNames);
+
+ System.out.println(Util.ntimes("*", 20)+" 1st best attr: "+ chosenAttribute);
+
+ TreeNode currentNode = new TreeNode(dt.getDomain(chosenAttribute));
+ //ConstantDecisionTree m = majorityValue(ds);
+ /* the majority */
+
+ List<?> attributeValues = dt.getPossibleValues(chosenAttribute);
+ Hashtable<Object, List<Fact> > filtered_facts = splitFacts(facts, chosenAttribute, attributeValues);
+ dt.FACTS_READ += facts.size();
+
+
+// if (FUNC_CALL ==5) {
+// System.out.println("FUNC_CALL:" +FUNC_CALL);
+// System.exit(0);
+// }
+ for (int i = 0; i < attributeValues.size(); i++) {
+ /* split the last two class at the same time */
+ Object value = attributeValues.get(i);
+
+ ArrayList<String> attributeNames_copy = new ArrayList<String>(attributeNames);
+ attributeNames_copy.remove(chosenAttribute);
+
+ if (filtered_facts.get(value).isEmpty()) {
+ /* majority !!!! */
+ LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ majorityNode.setRank(0.0);
+ currentNode.addNode(value, majorityNode);
+ } else {
+ TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
+ currentNode.addNode(value, newNode);
+ }
+ }
+
+ return currentNode;
+ }
+
+private TreeNode c4_5(DecisionTree dt, List<Fact> facts, List<String> attributeNames) {
+
+ FUNC_CALL ++;
+ if (facts.size() == 0) {
+ throw new RuntimeException("Nothing to classify, factlist is empty");
+ }
+ /* let's get the statistics of the results */
+ //List<?> targetValues = dt.getPossibleValues(dt.getTarget());
+ Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt.getTarget());//targetValues
+ Collection<Object> targetValues = stats.keySet();
+ int winner_vote = 0;
+ int num_supporters = 0;
+ Object winner = null;
+ for (Object key: targetValues) {
+
+ int num_in_class = stats.get(key).intValue();
+ if (num_in_class>0)
+ num_supporters ++;
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+
+ /* if all elements are classified to the same value */
+ if (num_supporters == 1) {
+ //*OPT* return new LeafNode(facts.get(0).getFact(0).getFieldValue(target));
+ LeafNode classifiedNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ classifiedNode.setRank((double)facts.size()/(double)num_fact_processed);
+ return classifiedNode;
+ }
+
+ /* if there is no attribute left in order to continue */
+ if (attributeNames.size() == 0) {
+ /* an heuristic of the leaf classification*/
+ LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ noAttributeLeftNode.setRank((double)winner_vote/(double)num_fact_processed);
+ return noAttributeLeftNode;
+ }
+
+ /* id3 starts */
+ String chosenAttribute = attributeWithGreatestGain(dt, facts, stats, attributeNames);
+
+ System.out.println(Util.ntimes("*", 20)+" 1st best attr: "+ chosenAttribute);
+
+ TreeNode currentNode = new TreeNode(dt.getDomain(chosenAttribute));
+ //ConstantDecisionTree m = majorityValue(ds);
+ /* the majority */
+
+ List<?> attributeValues = dt.getPossibleValues(chosenAttribute);
+ Hashtable<Object, List<Fact> > filtered_facts = splitFacts(facts, chosenAttribute, attributeValues);
+ dt.FACTS_READ += facts.size();
+
+
+// if (FUNC_CALL ==5) {
+// System.out.println("FUNC_CALL:" +FUNC_CALL);
+// System.exit(0);
+// }
+ for (int i = 0; i < attributeValues.size(); i++) {
+ /* split the last two class at the same time */
+ Object value = attributeValues.get(i);
+
+ ArrayList<String> attributeNames_copy = new ArrayList<String>(attributeNames);
+ attributeNames_copy.remove(chosenAttribute);
+
+ if (filtered_facts.get(value).isEmpty()) {
+ /* majority !!!! */
+ LeafNode majorityNode = new LeafNode(dt.getDomain(dt.getTarget()), winner);
+ majorityNode.setRank(0.0);
+ currentNode.addNode(value, majorityNode);
+ } else {
+ TreeNode newNode = id3(dt, filtered_facts.get(value), attributeNames_copy);
+ currentNode.addNode(value, newNode);
+ }
+ }
+
+ return currentNode;
+ }
+
+ //String chooseAttribute(List<FactSet> facts, List<String> attrs) {
+ public String attributeWithGreatestGain(DecisionTree dt, List<Fact> facts,
+ Hashtable<Object, Integer> facts_in_class, List<String> attrs) {
+
+ double dt_info = dt.getInformation(facts_in_class, facts.size());
+ double greatestGain = 0.0;
+ String attributeWithGreatestGain = attrs.get(0);
+ for (String attr : attrs) {
+ double gain = 0;
+ if (dt.getDomain(attr).isDiscrete()) {
+ gain = dt_info - dt.getGain(facts, attr);
+ } else {
+ /* 1. sort the values */
+ int begin_index = 0;
+ int end_index = facts.size();
+ Collections.sort(facts, new FactNumericAttributeComparator(attr));
+ List<Integer> splits = getSplitPoints(facts, dt.getTarget());
+ gain = dt_info - dt.getContinuousGain(facts, splits,
+ begin_index, end_index,
+ facts_in_class, attr);
+ //gain = dt_info - dt.getContinuousGain(facts, facts_in_class, attr);
+ }
+
+ System.out.println("Attribute: "+attr +" the gain: "+gain);
+ if (gain > greatestGain) {
+ greatestGain = gain;
+ attributeWithGreatestGain = attr;
+ }
+ }
+
+ return attributeWithGreatestGain;
+ }
+ /*
+ * id3 uses that function because it can not classify continuous attributes
+ */
+
+ public String attributeWithGreatestGain_discrete(DecisionTree dt, List<Fact> facts,
+ Hashtable<Object, Integer> facts_in_class, List<String> attrs) {
+
+ double dt_info = dt.getInformation(facts_in_class, facts.size());
+ double greatestGain = 0.0;
+ String attributeWithGreatestGain = attrs.get(0);
+ for (String attr : attrs) {
+ double gain = 0;
+ if (!dt.getDomain(attr).isDiscrete()) {
+ System.err.println("Ignoring the attribute:" +attr+ " the id3 can not classify continuous attributes");
+ continue;
+ } else {
+ gain = dt_info - dt.getGain(facts, attr);
+ }
+ System.out.println("Attribute: " + attr + " the gain: " + gain);
+ if (gain > greatestGain) {
+ greatestGain = gain;
+ attributeWithGreatestGain = attr;
+ }
+
+
+ }
+
+ return attributeWithGreatestGain;
+ }
+
+ private List<Integer> getSplitPoints(List<Fact> facts, String target) {
+ List<Integer> splits = new ArrayList<Integer>();
+ Iterator<Fact> it_f = facts.iterator();
+ Fact f1 = it_f.next();
+ int index = 0;
+ while(it_f.hasNext()){
+ Fact f2 = it_f.next();
+ if (f1.getFieldValue(target) != f2.getFieldValue(target))
+ splits.add(Integer.valueOf(index));
+
+ f1= f2;
+ index++;
+ }
+ return splits;
+ }
+
+
+ public Hashtable<Object, List<Fact> > splitFacts(List<Fact> facts, String attributeName,
+ List<?> attributeValues) {
+ Hashtable<Object, List<Fact> > factLists = new Hashtable<Object, List<Fact> >(attributeValues.size());
+ for (Object v: attributeValues) {
+ factLists.put(v, new ArrayList<Fact>());
+ }
+ for (Fact f : facts) {
+ factLists.get(f.getFieldValue(attributeName)).add(f);
+ }
+ return factLists;
+ }
+
+ public void testEntropy(DecisionTree dt, List<Fact> facts) {
+ Hashtable<Object, Integer> facts_in_class = dt.getStatistics(facts, dt.getTarget());//, targetValues
+ double initial_info = dt.getInformation(facts_in_class, facts.size()); //entropy value
+
+ System.out.println("initial_information: "+ initial_info);
+
+ String first_attr = attributeWithGreatestGain(dt, facts, facts_in_class, dt.getAttributes());
+
+ System.out.println("best attr: "+ first_attr);
+ }
+
+ public int getNumCall() {
+ return FUNC_CALL;
+ }
+
+ private class FactNumericAttributeComparator implements Comparator<Fact> {
+ private String attr_name;
+
+ public FactNumericAttributeComparator(String _attr_name) {
+ attr_name = _attr_name;
+ }
+
+ public int compare(Fact f0, Fact f1) {
+ Number n0 = (Number) f0.getFieldValue(attr_name);
+ Number n1 = (Number) f1.getFieldValue(attr_name);
+ if (n0.doubleValue() < n1.doubleValue())
+ return -1;
+ else if (n0.doubleValue() > n1.doubleValue())
+ return 1;
+ else
+ return 0;
+ }
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/BooleanDomain.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,98 @@
+package dt.memory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class BooleanDomain implements Domain<Boolean> {
+
+ private String fName;
+ private ArrayList<Boolean> fValues;
+ private boolean constant;
+ private int readingSeq;
+
+
+ public BooleanDomain(String _name) {
+ fName = _name.trim();
+ fValues = new ArrayList<Boolean>();
+ fValues.add(Boolean.TRUE);
+ fValues.add(Boolean.FALSE);
+ }
+
+ public boolean isDiscrete() {
+ return true;
+ }
+
+ public String getName() {
+ return fName;
+ }
+
+ public boolean contains(Boolean value) {
+ return true;
+ }
+
+ public void addValue(Boolean value) {
+ // TODO Auto-generated method stub
+
+ }
+
+ public List<Boolean> getValues() {
+ return fValues;
+ }
+
+ public int hashCode() {
+ return fName.hashCode();
+ }
+
+ public boolean isConstant() {
+ return this.constant;
+ }
+
+ public void setConstant() {
+ this.constant = true;
+ }
+
+ public Object readString(String data) {
+ if (isValid(data))
+ return Boolean.parseBoolean(data);
+ else
+ return null;
+ }
+
+ public boolean isValid(String string) {
+ try{
+ Boolean.parseBoolean(string);
+ return true;
+ }
+ catch (Exception e){
+ return false;
+ }
+ }
+
+ public boolean isPossible(Object value) {
+ //if (isDiscrete() && constant)
+ if (value instanceof Boolean && fValues.contains(value))
+ return true;
+ return false;
+ }
+
+ public String toString() {
+ String out = fName;
+ return out;
+ }
+
+ public void setReadingSeq(int readingSeq) {
+ this.readingSeq = readingSeq;
+
+ }
+
+ public int getReadingSeq() {
+ return this.readingSeq;
+
+ }
+
+ public void setDiscrete(boolean disc) {
+ // TODO Auto-generated method stub
+
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DBFactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DBFactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DBFactSet.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,32 @@
+package dt.memory;
+
+import java.util.Collection;
+
+public class DBFactSet implements FactSet{
+
+ public void assignTo(Collection<Fact> c) {
+ // TODO Auto-generated method stub
+
+ }
+
+ public String getClassName() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public Domain<?> getDomain(String attr) {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public Collection<Domain<?>> getDomains() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ public int getSize() {
+ // TODO Auto-generated method stub
+ return 0;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Domain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Domain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Domain.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,71 @@
+package dt.memory;
+
+import java.util.List;
+
+public interface Domain<T> {
+
+ boolean isConstant();
+ void setConstant();
+
+ boolean isDiscrete();
+ void setDiscrete(boolean disc);
+
+ boolean contains(T value);
+
+ String getName();
+
+ void addValue(T value);
+
+ List<T> getValues();
+
+ Object readString(String data);
+
+ String toString();
+ boolean isPossible(Object value) throws Exception;
+
+ void setReadingSeq(int readingSeq);
+ int getReadingSeq();
+}
+
+
+
+/*
+workingmemory.insert(object)
+
+ factset f = factsets_hashtable[object.class]
+ if f == null
+ f = createnew_factset(object.class);
+ f.insert(object)
+
+
+factset workingmemory.createnew_factset(class)
+
+ factset newfs = new newfactset(class)
+ foreach field in class
+ domain d = domainset_hashtable[field]
+ if d == null
+ d = createnew_domain(field)
+ newfs.adddomain(d)
+
+
+factset.insert(object)
+
+ fact f;
+ foreach field in object
+ domain d = domainset_hashtable[field];
+ attribute attr = d.createattribute(field.value)
+ f.add(attr)
+ addfact(f)
+
+
+treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+
+
+*/
+
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainFactory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainFactory.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainFactory.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,61 @@
+package dt.memory;
+
+public class DomainFactory {
+ public static BooleanDomain createBooleanDomain(String name) {
+ return new BooleanDomain(name);
+
+ }
+
+ public static NumericDomain createNumericDomain(String name) {
+ return new NumericDomain(name);
+ }
+
+ public static LiteralDomain createLiteralDomain(String name) {
+ return new LiteralDomain(name);
+ }
+
+ public static Domain<?> createDomainFromClass(Class<?> c, String domainName) {
+ if (c.isPrimitive())
+ if (c.getName().equalsIgnoreCase("boolean")) {
+ System.out.println("Yuuuupiii boolean");
+ return createBooleanDomain(domainName);
+ } else if (c.getName().equalsIgnoreCase("int") ||
+ c.getName().equalsIgnoreCase("double") ||
+ c.getName().equalsIgnoreCase("float")) {
+ System.out.println("Yuuuupiii number");
+ return createNumericDomain(domainName);
+ } else
+ return createComplexDomain(c,"kicimi ye simple: "+domainName);
+ else if (c.isAssignableFrom(String.class)) {
+ System.out.println("Yuuuupiii string");
+ return createLiteralDomain(domainName);
+ } else if (c.isAssignableFrom(Integer.class) ||
+ c.isAssignableFrom(Double.class) ||
+ c.isAssignableFrom(Float.class)) {
+ return createNumericDomain(domainName);
+ } else if (c.isAssignableFrom(Boolean.class))
+ return createBooleanDomain(domainName);
+ else
+ return createComplexDomain(c,domainName);
+ }
+
+ private static Domain<?> createComplexDomain(Class<?> c, String domainName) {
+ System.out.println("Bok ye this is complex type: "+ c);
+ return null;
+ }
+
+// public static Domain<?> createDomainFromString(String data, String domainName) {
+// if (c.isNumeric()) {
+// System.out.println("Yuuuupiii string");
+// return createNumericDomain(domainName);
+// } else if (c.true/false ||
+// c.isAssignableFrom(Double.class) ||
+// c.isAssignableFrom(Float.class)) {
+// return createNumericDomain(domainName);
+// } else if (c.is literal )
+// return createLiteral(domainName);
+// else
+// return createComplexDomain(c,domainName);
+// }
+
+}
\ No newline at end of file
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/DomainSpec.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,12 @@
+package dt.memory;
+
+import java.lang.annotation.*;
+
+ at Retention(RetentionPolicy.RUNTIME)
+ at Target({ElementType.METHOD, ElementType.FIELD})
+public @interface DomainSpec {
+ int readingSeq();
+ boolean target() default false;
+ boolean discrete() default true;
+ String[] values() default {"bok"};
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FSFactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FSFactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FSFactSet.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,133 @@
+package dt.memory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+
+public class FSFactSet implements FactSet{
+
+ private List<Fact> facts;
+
+ /* set of attributes defining the type of the fact */
+ //private Set<T> validDomains;
+ private Hashtable<String, Domain<?>> validDomains;
+
+ private String fs_class;
+
+
+ public FSFactSet(String element_class) {
+ this.facts = new ArrayList<Fact>();
+ this.validDomains = new Hashtable<String, Domain<?>>();
+ this.fs_class = element_class;
+ }
+
+
+ public FSFactSet(String element_class, List<Domain<?>> domains) {
+ this.facts = new ArrayList<Fact>();
+ this.validDomains = new Hashtable<String, Domain<?>>(domains.size());
+ this.fs_class = element_class;
+
+ for (Domain<?> d: domains) {
+ //d.setConstant();
+ validDomains.put(d.getName(), d);
+ }
+
+ }
+ public boolean insert(String data, List<Domain<?>> domains, String separator){
+ // assume the domains are in the same order with value
+ Fact newfact = new Fact();
+ //Hashtable<String,Object> attributes = new Hashtable<String,Object>();
+ if (data.endsWith("."))
+ data = data.substring(0, data.length()-1);
+ List<String> attributeValues = Arrays.asList(data.split(separator));
+
+ if (domains.size()== attributeValues.size()){
+
+ Iterator<Domain<?>> domain_it = domains.iterator();
+ Iterator<String> value_it = attributeValues.iterator();
+ while(domain_it.hasNext() && value_it.hasNext()){
+ Domain attr_domain = domain_it.next();
+ //String name = attr_domain.getName();
+
+ Object value = attr_domain.readString(value_it.next());
+
+ //System.out.println("Domain "+ name+ " and the value"+value);
+ try {
+ if (value == null) {
+ value = new Double(-1);
+ } else {
+ if (attr_domain.isPossible(value))
+ attr_domain.addValue(value);
+ }
+ newfact.add(attr_domain, value);
+ } catch (Exception e) {
+ System.out.println(e+ " the domain: "+attr_domain.getName()+ " does not accept "+ value);
+ //e.printStackTrace();
+ }
+ }
+ //String targetAttributeName = dataSetSpec.getTarget();
+ //AttributeSpecification attributeSpec =dataSetSpec.getAttributeSpecFor(targetAttributeName );
+ //System.out.println("Fact: "+newfact);
+ boolean result = facts.add(newfact);
+ return result;
+ }
+ else{
+ throw new RuntimeException("Unable to construct Example from " + data);
+ }
+ }
+
+
+
+ public void add(Fact newFact) {
+ facts.add(newFact);
+ }
+
+
+ public Fact getFact(int index) {
+ return facts.get(index);
+ }
+
+ public void assignTo(Collection<Fact> c) {
+ c.addAll(facts);
+ }
+
+ public int getSize() {
+ return facts.size();
+ }
+
+ /* TODO iterator */
+ public Collection<Domain<?>> getDomains() {
+ return validDomains.values();
+ }
+
+ /* TODO iterator */
+ public Collection<String> getDomainKeys() {
+ return validDomains.keySet();
+ }
+
+ public Domain<?> getDomain(String field) {
+ return validDomains.get(field);
+ }
+
+ public void addDomain(String field, Domain<?> fieldDomain) {
+ validDomains.put(field, fieldDomain);
+ }
+
+
+ public String getClassName() {
+ return fs_class;
+ }
+
+ public String toString() {
+ String out = "";
+ for (Fact f: facts) {
+ out += f.toString() +"\n";
+ }
+ return out;
+ }
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Fact.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Fact.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/Fact.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,77 @@
+package dt.memory;
+
+import java.util.Hashtable;
+import java.util.Set;
+
+
+public class Fact {
+
+ private Hashtable<String, Domain<?>> fields;
+ private Hashtable<String, Object> values;
+
+ public Fact() {
+ this.values = new Hashtable<String, Object>();
+ this.fields = new Hashtable<String, Domain<?>>();
+ /* while creating the fact i should add the possible keys, the valid domains */
+ }
+
+ public Fact(Set<Domain<?>> domains) {
+ this.fields = new Hashtable<String, Domain<?>>();
+ for (Domain<?> d: domains)
+ this.fields.put(d.getName(), d);
+ this.values = new Hashtable<String, Object>();
+ //this.attributes. of the keys are only these domains
+ /* while creating the fact i should add the possible keys, the valid domains */
+ }
+
+ /*public Fact(Hashtable<Domain<?>, Attribute<?>> attributes) {
+ this.attributes = attributes;
+ }*/
+
+ /*
+ * TODO do i need to check anything before adding
+ * maybe i should check if the domain specifications are written somewhere
+ *
+ */
+ public void add(Domain<?> its_domain, Object value) throws Exception {
+ if (!its_domain.isPossible(value))
+ throw new Exception("The value "+value +" is not possible what is going on in domain: "+ its_domain.getName());
+ //System.out.println("Bocuk wants to see the names of the domains "+ its_domain.getName());
+ fields.put(its_domain.getName(), its_domain);
+ values.put(its_domain.getName(), value);
+ }
+
+ public Object getFieldValue(String field_name) {
+ return values.get(field_name);
+ }
+
+ public String getAttributeValueAsString(String name) {
+ Object attr = getFieldValue(name);
+ return (attr != null) ? attr.toString() : null;
+ }
+
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if ((o == null) || (this.getClass() != o.getClass())) {
+ return false;
+ }
+ Fact other = (Fact) o;
+ return fields.equals(other.fields); //TODO work on the equals() fnc
+ }
+
+ public int hashCode() {
+ return fields.hashCode();
+ }
+
+ public String toString() {
+ String out = "";
+ for (String key: fields.keySet())
+ {
+ out += fields.get(key) +"="+values.get(key)+",";
+ }
+ return out;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSet.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,19 @@
+package dt.memory;
+
+import java.util.Collection;
+
+public interface FactSet {
+
+ String getClassName();
+
+ void assignTo(Collection<Fact> c);
+
+ Domain<?> getDomain(String attr);
+
+ /* TODO iterator */
+ public Collection<Domain<?>> getDomains();
+
+ public int getSize();
+
+ public String toString();
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/FactSetFactory.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,268 @@
+package dt.memory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+
+import dt.tools.ObjectReader;
+
+public class FactSetFactory {
+
+ public static String insertNurserySet(WorkingMemory simple) {
+ /*
+ * not_recom, recommend, very_recom, priority, spec_prior | attributes
+ *
+ * parents: usual, pretentious, great_pret. has_nurs: proper,
+ * less_proper, improper, critical, very_crit. form: complete,
+ * completed, incomplete, foster. children: 1, 2, 3, more. housing:
+ * convenient, less_conv, critical. finance: convenient, inconv. social:
+ * nonprob, slightly_prob, problematic. health: recommended, priority,
+ * not_recom.
+ *
+ */
+
+ String filename = "../data/nursery/nursery.data.txt";
+ String separator = ",";
+ String klass = "Nursery";
+ ArrayList<Domain<?>> domains = new ArrayList<Domain<?>>();
+ domains.add(new LiteralDomain("parents", new String[] { "usual",
+ "pretentious", "great_pret" }));
+ domains.add(new LiteralDomain("has_nurs", new String[] { "proper",
+ "less_proper", "improper", "critical", "very_crit" }));
+ domains.add(new LiteralDomain("form", new String[] { "complete",
+ "completed", "incomplete", "foster" }));
+ domains.add(new LiteralDomain("children", new String[] { "1", "2", "3",
+ "more" }));
+ domains.add(new LiteralDomain("housing", new String[] { "convenient",
+ "less_conv", "critical" }));
+ domains.add(new LiteralDomain("finance", new String[] { "convenient",
+ "inconv" }));
+ domains.add(new LiteralDomain("social", new String[] { "nonprob",
+ "slightly_prob", "problematic" }));
+ domains.add(new LiteralDomain("health", new String[] { "recommended",
+ "priority", "not_recom" }));
+ domains.add(new LiteralDomain("classnursery", new String[] {
+ "not_recom", "recommend", "very_recom", "priority",
+ "spec_prior" }));
+
+ for (Domain<?> d : domains) {
+ d.setConstant();
+ }
+
+ try {
+ FactSetFactory
+ .fromFile(simple, filename, klass, domains, separator);
+ // simple.insert(facts);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ return klass;
+ }
+
+ public static String insertCarSet(WorkingMemory simple) {
+ /*
+ * | class values
+ *
+ * unacc, acc, good, vgood | attributes
+ *
+ * buying: vhigh, high, med, low. maint: vhigh, high, med, low. doors:
+ * 2, 3, 4, 5, more. persons: 2, 4, more. lug_boot: small, med, big.
+ * safety: low, med, high.
+ *
+ */
+
+ String filename = "../data/car/car.data.txt";
+ String separator = ",";
+ String klass = "Car";
+ ArrayList<Domain<?>> domains = new ArrayList<Domain<?>>();
+ domains.add(new LiteralDomain("buying", new String[] { "vhigh", "high",
+ "med", "low" }));
+ domains.add(new LiteralDomain("maint", new String[] { "vhigh", "high",
+ "med", "low" }));
+ domains.add(new LiteralDomain("doors", new String[] { "2", "3", "4",
+ "5more" }));
+ domains.add(new LiteralDomain("persons", new String[] { "2", "4",
+ "more" }));
+ domains.add(new LiteralDomain("lug_boot", new String[] { "small",
+ "med", "big" }));
+ domains.add(new LiteralDomain("safety", new String[] { "low", "med",
+ "high" }));
+ domains.add(new LiteralDomain("classCar", new String[] { "unacc",
+ "acc", "good", "vgood" }));
+
+ for (Domain<?> d : domains) {
+ d.setConstant();
+ }
+
+ try {
+ FactSetFactory
+ .fromFile(simple, filename, klass, domains, separator);
+ // simple.insert(facts);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ return klass;
+ }
+
+ public static String insertAdvertisementSet(WorkingMemory simple) {
+
+ String filename = "../data/advertisement/ad.data.txt";
+ String separator = ",";
+ String klass = "Advertisement";
+
+ String domainFileName = "../data/advertisement/data_domains.txt";
+ String separatorDomain = ":";
+ ArrayList<Domain<?>> domains;
+ // FSFactSet facts;
+ try {
+ domains = FactSetFactory.fromFileDomain(domainFileName,
+ separatorDomain);
+
+ FactSetFactory
+ .fromFile(simple, filename, klass, domains, separator);
+ // simple.insert(facts);
+ } catch (Exception e1) {
+ // TODO Auto-generated catch block
+ e1.printStackTrace();
+ }
+ return klass;
+
+ }
+
+ public static ArrayList<String> attributesOfAdvertisement = new ArrayList<String>();
+
+ public static ArrayList<Domain<?>> fromFileDomain(String domainFileName,
+ String separator) throws Exception {
+
+ ArrayList<Domain<?>> domains = new ArrayList<Domain<?>>();
+ NumericDomain height = new NumericDomain("height");
+ height.setDiscrete(false);
+
+ NumericDomain width = new NumericDomain("width");
+ height.setDiscrete(false);
+
+ NumericDomain aratio = new NumericDomain("aratio");
+ height.setDiscrete(false);
+ domains.add(height);
+ domains.add(width);
+ domains.add(aratio);
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ FactSetFactory.class.getResourceAsStream(domainFileName)));// "../data/"
+ // +
+ String line;
+ while ((line = reader.readLine()) != null) {
+ if (!line.startsWith("|")) {
+ List<String> attributeValues = Arrays.asList(line.split(
+ separator, 2));
+ // BooleanDomain newDomain =
+ attributesOfAdvertisement.add(attributeValues.get(0));
+ domains.add(new BooleanDomain(attributeValues.get(0)));
+ }
+ }
+
+ domains.add(new LiteralDomain("classAdvertisement", new String[] {
+ "ad", "nonad" }));
+ attributesOfAdvertisement.add("classAdvertisement");
+ System.out.println("# of domains:" + domains.size());
+
+ return domains;
+
+ }
+
+ public static void fromFile(WorkingMemory wm, String filename,
+ String klass, List<Domain<?>> domains, String separator)
+ throws Exception {
+ // FSFactSet fs = new FSFactSet(klass, domains);
+ //
+ // for (Domain<?> d: domains) {
+ // fs.addDomain(d.getName(), d);
+ // }
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ FactSetFactory.class.getResourceAsStream(filename)));// "../data/"
+ // +
+ String line;
+ while ((line = reader.readLine()) != null) {
+ // Fact newFact = fromString(line,domains,separator);
+ // fs.add(newFact);
+ // String element, String name, String separator, List<Domain<?>>
+ // domains
+ line = line.trim();
+ if (line.length() == 0)
+ break;
+ wm.insert(line, klass, separator, domains);
+ }
+ }
+
+ public static boolean readObjectData(WorkingMemory simple, String filename,
+ String separator, Object nullObj) {
+ /*
+ * | class values
+ *
+ * unacc, acc, good, vgood
+ * | attributes
+ *
+ * buying: vhigh, high, med, low.
+ * maint: vhigh, high, med, low.
+ * doors: 2, 3, 4, 5, more.
+ * persons: 2, 4, more.
+ * lug_boot: small, med, big.
+ * safety: low, med, high.
+ *
+ */
+ // String[] attr_order = {"buying", "maint", "doors", "persons", "lug_boot", "safety"
+ // String filename = "../data/car/car.data.txt";
+ // String separator = ",";
+ // Car nullCar = new Car();
+
+
+ try {
+ FactSetFactory.fromFileAsObject(simple, nullObj.getClass(), filename, separator);
+
+ // simple.insert(facts);
+ return true;
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ return false;
+ }
+
+ public static List<Object> fromFileAsObject(WorkingMemory wm, Class<?> klass, String filename, String separator)
+ throws IOException {
+ List<Object> obj_read = new ArrayList<Object>();
+ OOFactSet fs = wm.getFactSet(klass);
+ Collection<Domain<?>> domains = fs.getDomains();
+
+ BufferedReader reader = new BufferedReader(new InputStreamReader(
+ klass.getResourceAsStream(filename)));// "../data/"
+ // +
+ String line;
+ while ((line = reader.readLine()) != null) {
+ // Fact newFact = fromString(line,domains,separator);
+ // fs.add(newFact);
+ // String element, String name, String separator, List<Domain<?>>
+ // domains
+ line = line.trim();
+ if (line.length() == 0)
+ break;
+ Object element = ObjectReader.read(klass, domains, line, separator);
+ //System.out.println("New object "+ element);
+ obj_read.add(element);
+ fs.insert(element);
+
+ }
+ return obj_read;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/LiteralDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/LiteralDomain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/LiteralDomain.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,105 @@
+package dt.memory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class LiteralDomain implements Domain<String> {
+
+ private String fName;
+ private List<String> fValues;
+ private boolean constant;
+ private boolean discrete;
+ private int readingSeq;
+
+
+ public LiteralDomain(String _name) {
+ fName = _name.trim();
+ fValues = new ArrayList<String>();
+ discrete = true;
+ }
+
+ public LiteralDomain(String _name, String[] possibleValues) {
+ fName = _name;
+ fValues = Arrays.asList(possibleValues);
+ discrete = true;
+ }
+
+ public void setDiscrete(boolean d) {
+ this.discrete = d;
+ }
+
+
+ public boolean isDiscrete() {
+ return this.discrete;
+ }
+
+ public String getName() {
+ return fName;
+ }
+
+ public void addValue(String value) {
+ if (constant)
+ return;
+ //if (discrete) {
+ if (!fValues.contains(value))
+ fValues.add(value);
+// } else {
+// fValues.add(value);
+// }
+
+ }
+
+ public boolean contains(String value) {
+ for(String n: fValues) {
+ if (value.equalsIgnoreCase(n))
+ return true;
+ }
+ return false;
+ }
+
+ public List<String> getValues() {
+ return fValues;
+ }
+
+ public int hashCode() {
+ return fName.hashCode();
+ }
+
+ public boolean isConstant() {
+ return this.constant;
+ }
+
+ public void setConstant() {
+ this.constant = true;
+
+ }
+
+ public Object readString(String data) {
+ return data.trim();
+ }
+
+ public boolean isPossible(Object value) {
+ if (!(value instanceof String))
+ return false;
+ if (constant && !fValues.contains(value))
+ return false;
+ return true;
+ }
+
+ public void setReadingSeq(int readingSeq) {
+ this.readingSeq = readingSeq;
+
+ }
+
+ public int getReadingSeq() {
+ return this.readingSeq;
+
+ }
+
+ public String toString() {
+ String out = fName;
+ return out;
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/NumericDomain.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/NumericDomain.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/NumericDomain.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,164 @@
+package dt.memory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class NumericDomain implements Domain<Number> {
+
+ private String fName;
+ private ArrayList<Number> fValues;
+ private boolean constant;
+ private boolean discrete;
+ private int readingSeq;
+
+
+ public NumericDomain(String _name) {
+ fName = _name.trim();
+ fValues = new ArrayList<Number>();
+ discrete = true;
+ }
+ public void setDiscrete(boolean d) {
+ this.discrete = d;
+ }
+
+ public boolean isDiscrete() {
+ return discrete;
+ }
+
+ public String getName() {
+ return fName;
+ }
+
+ public void addValue(Number value) {
+ if (constant)
+ return;
+ if (discrete) {
+ if (!fValues.contains(value))
+ fValues.add(value);
+ } else {
+ if (fValues.isEmpty()) {
+ fValues.add(value);
+ return;
+ } else if (fValues.size()==1) {
+ if (value.doubleValue() < fValues.get(0).doubleValue()) {
+ Number first = fValues.remove(0);
+ fValues.add(value);
+ fValues.add(first);
+ } else if (value.doubleValue() > fValues.get(0).doubleValue()) {
+ fValues.add(value);
+ }
+ return;
+ } else {
+ if (value.doubleValue() > fValues.get(1).doubleValue()) {
+ fValues.remove(1);
+ fValues.add(1, value);
+ return;
+ }
+ if (value.doubleValue() < fValues.get(0).doubleValue()) {
+ fValues.remove(0);
+ fValues.add(0, value);
+ return;
+ }
+ }
+ }
+
+ }
+
+ public boolean contains(Number value) {
+ for(Number n: fValues) {
+ if (value.intValue() == n.intValue() ||
+ value.doubleValue() == n.doubleValue() ||
+ value.floatValue() == n.floatValue())
+ return true;
+ }
+ return false;
+ }
+
+ public List<Number> getValues() {
+ return fValues;
+ }
+
+ public int hashCode() {
+ return fName.hashCode();
+ }
+
+ public boolean isConstant() {
+ return this.constant;
+ }
+
+ public void setConstant() {
+ this.constant = true;
+ }
+
+ public Object readString(String data) {
+ if (isValid(data))
+ return Double.parseDouble(data);
+ else
+ return null;
+ }
+
+ public boolean isValid(String string) {
+ if (string == null)
+ return true;
+ try{
+ Double.parseDouble(string);
+ return true;
+ }
+ catch (Exception e){
+ return false;
+ }
+ }
+
+ public boolean isPossible(Object value) throws Exception {
+ //System.out.println("NumericDomain.isPossible() start "+ value+ " ?");
+
+ if (!(value instanceof Number))
+ return false;
+ //System.exit(0);
+ if (constant) {
+ //System.out.println("NumericDomain.isPossible() constant "+ value+ " ?");
+ //System.exit(0);
+
+ if (discrete) {
+ if (fValues.contains(value))
+ return true;
+
+ //System.out.println("NumericDomain.isPossible() constant && discrete "+ value+ " ?");
+ //System.exit(0);
+ } else {
+ if (fValues.isEmpty() || fValues.size()==1)
+ throw new Exception("Numerical domain "+fName+" is constant and not discrete but bounds are not set: possible values size: "+ fValues.size());
+ if (((Number)value).doubleValue() >= fValues.get(0).doubleValue() &&
+ ((Number)value).doubleValue() <= fValues.get(1).doubleValue()) {
+ return true;
+ }
+ //System.out.println("NumericDomain.isPossible() "+ value+ " ?");
+ }
+ } else {
+ return true;
+ }
+
+ //System.out.println("NumericDomain.isPossible() end "+ value+ " ?");
+ //System.exit(0);
+
+ return false;
+ }
+
+ public void setReadingSeq(int readingSeq) {
+ this.readingSeq = readingSeq;
+
+ }
+
+ public int getReadingSeq() {
+ return this.readingSeq;
+
+ }
+
+ public String toString() {
+ String out = fName;
+ return out;
+ }
+
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/OOFactSet.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/OOFactSet.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/OOFactSet.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,136 @@
+package dt.memory;
+
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Hashtable;
+import java.util.List;
+
+import dt.tools.Util;
+
+public class OOFactSet implements FactSet{
+
+ private List<Fact> facts;
+
+ /* set of attributes defining the type of the fact */
+ //private Set<T> validDomains;
+ private Hashtable<String, Domain<?>> validDomains;
+
+ private Class<?> fs_class;
+
+ public OOFactSet(Class<?> fact_class) {//Class<? extends Object>
+ this.facts = new ArrayList<Fact>();
+ this.validDomains = new Hashtable<String, Domain<?>>();
+ this.fs_class = fact_class;
+ }
+
+ /*
+ factset.insert(object)
+ fact f;
+ foreach field in object
+ domain d = domainset_hashtable[field];
+ attribute attr = d.createattribute(field.value)
+ f.add(attr)
+ addfact(f)
+ */
+ public boolean insert(Object element) {
+ Fact f = new Fact();
+
+ Class<?> element_class = element.getClass();
+ Method [] element_methods = element_class.getDeclaredMethods();
+ for (Method m: element_methods) {
+ String m_name = m.getName();
+ //Class<?> return_type_name = ;
+ Class<?>[] returns = {m.getReturnType()};
+ if (Util.isGetter(m_name) & Util.isSimpleType(returns) ) {
+// if (!Util.isSimpleType(return_type_name))
+// continue; // in the future we should support classes
+ String field = Util.getAttributeName(m_name);
+
+ /*
+ * when u first read the element
+ * if the domain specifications are already given
+ * then read from there and
+ * dont add each new value you read, just check if it is valid
+ * otherwise you create a new domain for that attribute
+ * Domain attributeSpec = dataSetSpec.getDomain(attr_name);
+ */
+ Domain fieldDomain = validDomains.get(field);
+
+ //String
+ Object field_value;
+ try {
+ field_value = m.invoke(element);
+
+ //Object attribute = fieldDomain.createAttribute(field_value);
+ if (fieldDomain.isPossible(field_value))
+ fieldDomain.addValue(field_value);
+ f.add(fieldDomain, field_value);
+ //System.out.println("FactSet.insert f "+ f + " fielddomain name "+fieldDomain.getName()+" value: "+field_value+".");
+
+ } catch (IllegalArgumentException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (InvocationTargetException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+
+
+ }
+ }
+
+ boolean result = facts.add(f);
+ //System.out.println("FactSet.insert f "+ f + " result "+result+" facts.size(): "+facts.size()+".");
+ return result;
+
+
+ }
+
+ public Fact getFact(int index) {
+ return facts.get(index);
+ }
+
+ public void assignTo(Collection<Fact> c) {
+ c.addAll(facts);
+ }
+
+ public int getSize() {
+ return facts.size();
+ }
+
+ /* TODO iterator */
+ public Collection<Domain<?>> getDomains() {
+ return validDomains.values();
+ }
+
+ /* TODO iterator */
+ public Collection<String> getDomainKeys() {
+ return validDomains.keySet();
+ }
+
+ public Domain<?> getDomain(String field) {
+ return validDomains.get(field);
+ }
+
+ public void addDomain(String field, Domain<?> fieldDomain) {
+ validDomains.put(field, fieldDomain);
+ }
+
+ public Class<?> getFactClass() {
+ return fs_class;
+ }
+
+ public String getClassName() {
+ return fs_class.getName();
+ }
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/WorkingMemory.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/WorkingMemory.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/memory/WorkingMemory.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,239 @@
+package dt.memory;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+
+import dt.tools.Util;
+
+public class WorkingMemory {
+
+ private Hashtable<String, FactSet> factsets;
+
+ private Hashtable<String, Domain<?>> domainset;
+
+ public WorkingMemory() {
+ factsets = new Hashtable<String, FactSet>();
+ domainset = new Hashtable<String, Domain<?>>();
+ }
+
+ public OOFactSet getFactSet(Class<?> klass) {
+ String element_class = klass.getName();
+ //System.out.println("Get the keys:"+ factsets.keys());
+ //System.out.println("WorkingMemory.get class "+ element_class + " exist? "+ factsets.containsKey(element_class));
+
+ OOFactSet fs;
+ if (!factsets.containsKey(element_class))
+ fs = create_factset(klass);
+ else
+ fs = (OOFactSet) factsets.get(element_class);//TODO should i cast
+
+ System.out.println("WorkingMemory.getFactSet(objClass) inserted element new fs "+ klass.getName());
+ return fs;
+ }
+
+ public void insert(Object element) {
+ String element_class = element.getClass().getName();
+ //System.out.println("Get the keys:"+ factsets.keys());
+ //System.out.println("WorkingMemory.get class "+ element_class + " exist? "+ factsets.containsKey(element_class));
+
+ OOFactSet fs;
+ if (!factsets.containsKey(element_class))
+ fs = create_factset(element.getClass());
+ else
+ fs = (OOFactSet) factsets.get(element_class);//TODO should i cast
+
+ fs.insert(element);
+ System.out.println("WorkingMemory.insert(object) inserted element fs.size() "+ fs.getSize());
+ }
+
+ public void insert(String element, String name, String separator, List<Domain<?>> domains) {
+
+ FSFactSet fs;
+ if (!factsets.containsKey(name)) {
+ fs = new FSFactSet(name, domains);
+ for (Domain<?> d: domains) {
+ fs.addDomain(d.getName(), d);
+ if (domainset.containsKey(d.getName()) || domainset.contains(d)) {
+ System.out.println("WorkingMemory.insert Already exist domain bla????? name: "+name+ " domain: "+d.getName());
+ System.exit(0);
+ } else
+ domainset.put(d.getName(), d);
+ }
+ factsets.put(name, fs);
+ } else
+ fs = (FSFactSet) factsets.get(name);//TODO should i cast
+
+ fs.insert(element, domains, separator);
+ //System.out.println("WorkingMemory.insert(string) inserted element fs.size() "+ fs.getSize());
+ }
+
+
+ /* factset workingmemory.createnew_factset(class)
+ * => instead of the class i have to pass the object itself because i am going to invoke the method
+ * => no actually i will not invoke
+ * factset newfs = new newfactset(class)
+ * foreach field in class
+ * domain d = domainset_hashtable[field]
+ * if d == null
+ * d = createnew_domain(field)
+ * newfs.adddomain(d)=> why do you add this the factset?
+ * we said that the domains should be independent from the factset
+ */
+
+ private OOFactSet create_factset(Class<?> classObj) {
+ //System.out.println("WorkingMemory.create_factset element "+ element );
+
+ OOFactSet newfs = new OOFactSet(classObj);
+
+ Field [] element_fields = classObj.getDeclaredFields();
+ for( Field f: element_fields) {
+ String f_name = f.getName();
+ Class<?>[] f_class = {f.getType()};
+ System.out.println("WHat is this f: " +f.getType()+" the name "+f_name+" class "+ f.getClass() + " and the name"+ f.getClass().getName());
+ if (Util.isSimpleType(f_class)) {
+
+ Annotation[] annotations = f.getAnnotations();
+
+ // iterate over the annotations to locate the MaxLength constraint if it exists
+ DomainSpec spec = null;
+ for (Annotation a : annotations) {
+ if (a instanceof DomainSpec) {
+ spec = (DomainSpec)a; // here it is !!!
+ break;
+ }
+ }
+
+ Domain<?> fieldDomain;
+ if (!domainset.containsKey(f_name))
+ fieldDomain = DomainFactory.createDomainFromClass(f.getType(), f_name);
+ else
+ fieldDomain = domainset.get(f_name);
+
+ //System.out.println("WorkingMemory.create_factset field "+ field + " fielddomain name "+fieldDomain.getName()+" return_type_name: "+return_type_name+".");
+ if (spec != null) {
+ fieldDomain.setReadingSeq(spec.readingSeq());
+ fieldDomain.setDiscrete(spec.discrete());
+ }
+ domainset.put(f_name, fieldDomain);
+ newfs.addDomain(f_name, fieldDomain);
+
+ }
+ }
+ factsets.put(classObj.getName(), newfs);
+ return newfs;
+ }
+
+ private OOFactSet create_factset_(Class<?> classObj) {
+ //System.out.println("WorkingMemory.create_factset element "+ element );
+
+ OOFactSet newfs = new OOFactSet(classObj);
+
+ Method [] element_methods = classObj.getDeclaredMethods();
+ for( Method m: element_methods) {
+
+ String m_name = m.getName();
+ Class<?>[] returns = {m.getReturnType()};
+ //System.out.println("WorkingMemory.create_factset m "+ m + " method name "+m_name+" return_type_name: "+return_type_name+".");
+ if (Util.isGetter(m_name) & Util.isSimpleType(returns)) {
+ String field = Util.getAttributeName(m_name);
+ /*
+ * when u first read the element
+ * if the domain specifications are already given
+ * then read from there and
+ * dont add each new value you read, just check if it is valid
+ * otherwise you create a new domain for that attribute
+ * Domain attributeSpec = dataSetSpec.getDomain(attr_name);
+ */
+
+ Domain<?> fieldDomain;
+ if (!domainset.containsKey(field))
+ fieldDomain = DomainFactory.createDomainFromClass(m.getReturnType(), field);
+ else
+ fieldDomain = domainset.get(field);
+
+ //System.out.println("WorkingMemory.create_factset field "+ field + " fielddomain name "+fieldDomain.getName()+" return_type_name: "+return_type_name+".");
+
+ domainset.put(field, fieldDomain);
+ newfs.addDomain(field, fieldDomain);
+
+ //System.out.println("START: WorkingMemory.create_factset domainset size "+ domainset.size() + " newfs size "+newfs.getFacts().size()+".");
+
+ }
+ }
+
+ factsets.put(classObj.getName(), newfs);
+ return newfs;
+ }
+
+ /* TODO: is there a better way of doing this iterator? */
+ public Iterator<FactSet> getFactsets() {
+ return factsets.values().iterator();
+ //return factsets.values();
+ }
+
+ public Domain<?> getDomain(String field) {
+ return domainset.get(field);
+ }
+
+ public boolean containsDomainKey(String field) {
+ return domainset.containsKey(field);
+ }
+
+ public void putDomain(String field, Domain<?> fieldDomain) {
+ this.domainset.put(field, fieldDomain);
+
+ }
+
+ public void putFactSet(String klass_name, FactSet newfs) {
+ factsets.put(klass_name, newfs);
+ }
+
+ public boolean containsFactSetKey(String field) {
+ return factsets.containsKey(field);
+ }
+}
+
+
+/*
+workingmemory.insert(object)
+
+ factset fs = factsets_hashtable[object.class]
+ if fs == null
+ fs = createnew_factset(object.class);
+ fs.insert(object)
+
+
+factset workingmemory.createnew_factset(class)
+
+ factset newfs = new newfactset(class)
+ foreach field in class
+ domain d = domainset_hashtable[field]
+ if d == null
+ d = createnew_domain(field)
+ newfs.adddomain(d)
+
+
+factset.insert(object)
+
+ fact f;
+ foreach field in object
+ domain d = domainset_hashtable[field];
+ attribute attr = d.createattribute(field.value)
+ f.add(attr)
+ addfact(f)
+
+
+treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+
+ foreach factset in workingmemory
+ if classtoexecute.isAssignableFrom( factset.class )
+ internaladd(factset)
+
+ internalprocess(attributestoprocess)
+
+
+*/
\ No newline at end of file
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/ObjectReader.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,444 @@
+package dt.tools;
+
+import java.io.IOException;
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+
+import dt.memory.Domain;
+import dt.memory.BooleanDomain;
+import dt.memory.NumericDomain;
+import dt.memory.LiteralDomain;
+import dt.memory.DomainSpec;
+
+public class ObjectReader {
+
+ private static final boolean DEBUG = false;
+
+ public static Object read(Class<?> element_class,
+ Collection<Domain<?>> domains, String data, String separator) {
+
+ // System.out.println("BOK BOK domains: "+ domains.size());
+ Object element = null;
+ try {
+ // element = Class.forName(element_class.getName());
+
+ element = element_class.newInstance();
+
+ Method[] element_methods = element_class.getDeclaredMethods();
+
+ if (data.endsWith("."))
+ data = data.substring(0, data.length() - 1);
+ List<String> attributeValues = Arrays.asList(data.split(separator));
+
+ for (Method m : element_methods) {
+ String m_name = m.getName();
+ Class<?>[] param_type_name = m.getParameterTypes();
+ if (Util.isSetter(m_name) & Util.isSimpleType(param_type_name)) {
+ // if (!Util.isSimpleType(return_type_name))
+ // continue; // in the future we should support classes
+ /*
+ * Annotation[] annotations = m.getAnnotations();
+ * // iterate over the annotations to locate the MaxLength
+ * constraint if it exists DomainSpec spec = null; for
+ * (Annotation a : annotations) { if (a instanceof
+ * DomainSpec) { spec = (DomainSpec)a; // here it is !!!
+ * break; } } if (DEBUG) System.out.println("What annotation
+ * i found: "+ spec + " for method "+ m); String fieldString =
+ * attributeValues.get(spec.readingSeq());
+ *
+ */
+
+ String field = Util.getAttributeName(m_name);
+
+ Iterator<Domain<?>> domain_it = domains.iterator();
+ // Iterator<String> value_it = attributeValues.iterator();
+ while (domain_it.hasNext()) {
+ Domain<?> attr_domain = domain_it.next();
+ // String name = attr_domain.getName();
+ if (field.equalsIgnoreCase(attr_domain.getName())) {
+
+ String fieldString = attributeValues
+ .get(attr_domain.getReadingSeq());
+ Object fieldValue = attr_domain
+ .readString(fieldString);
+
+ if (attr_domain instanceof NumericDomain) {
+ if (param_type_name[0].getName()
+ .equalsIgnoreCase("int")) {
+ fieldValue = ((Number) fieldValue)
+ .intValue();
+
+ } else if (param_type_name[0].getName()
+ .equalsIgnoreCase("float")) {
+ fieldValue = ((Number) fieldValue)
+ .floatValue();
+
+ } else if (!param_type_name[0].getName()
+ .equalsIgnoreCase("double")) {
+ System.out
+ .println("What the hack, which type of number is this??");
+ fieldValue = ((Number) fieldValue)
+ .doubleValue();
+ System.exit(0);
+ }
+ } else if (attr_domain instanceof LiteralDomain) {
+ if (param_type_name[0].getName()
+ .equalsIgnoreCase("java.lang.String")) {
+ } else {
+ System.out
+ .println("What the hack, which type of string is this?? "
+ + fieldValue);
+ System.exit(0);
+ }
+ } else if (attr_domain instanceof BooleanDomain) {
+ if (param_type_name[0].getName()
+ .equalsIgnoreCase("boolean")) {
+ } else {
+ System.out
+ .println("What the hack, which type of boolean is this?? "
+ + fieldValue);
+ System.exit(0);
+ }
+ } else {
+ System.out
+ .println("What the hack, which type of object is this?? "
+ + fieldValue);
+ System.exit(0);
+ }
+
+ // String fieldValue = fieldString;
+
+ try {
+
+ if (DEBUG)
+ System.out.println("ObjectReader.read obj "
+ + element.getClass()
+ + " fielddomain name "
+ + attr_domain.getName()
+ + " value: " + fieldValue);
+ if (DEBUG)
+ System.out
+ .println("ObjectReader.read method "
+ + m
+ + " the parameter type:"
+ + fieldValue.getClass());
+ m.invoke(element, fieldValue);
+
+ } catch (IllegalArgumentException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (InvocationTargetException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ break;
+ }
+
+ }
+ }
+ }
+
+ } catch (InstantiationException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ return element;
+
+ }
+ //read(Class<?> element_class, Collection<Domain<?>> collection, String data, String separator)
+ public static Object read_(Class<?> element_class, Collection<Domain<?>> domains, String data, String separator) {
+
+ Object element= null;
+ try {
+ //element = Class.forName(element_class.getName());
+
+ element = element_class.newInstance();
+ } catch (InstantiationException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ Method [] element_methods = element_class.getDeclaredMethods();
+
+ if (data.endsWith("."))
+ data = data.substring(0, data.length()-1);
+ List<String> attributeValues = Arrays.asList(data.split(separator));
+
+
+ for (Method m: element_methods) {
+ String m_name = m.getName();
+ Class<?>[] param_type_name = m.getParameterTypes();
+ if (Util.isSetter(m_name) & Util.isSimpleType(param_type_name) ) {
+// if (!Util.isSimpleType(return_type_name))
+// continue; // in the future we should support classes
+ Annotation[] annotations = m.getAnnotations();
+
+ // iterate over the annotations to locate the MaxLength constraint if it exists
+ DomainSpec spec = null;
+ for (Annotation a : annotations) {
+ if (a instanceof DomainSpec) {
+ spec = (DomainSpec)a; // here it is !!!
+ break;
+ }
+ }
+ if (DEBUG) System.out.println("What annotation i found: "+ spec + " for method "+ m);
+ String fieldString = attributeValues.get(spec.readingSeq());
+ String field = Util.getAttributeName(m_name);
+
+ Iterator<Domain<?>> domain_it = domains.iterator();
+ //Iterator<String> value_it = attributeValues.iterator();
+ while(domain_it.hasNext()){
+ Domain<?> attr_domain = domain_it.next();
+ //String name = attr_domain.getName();
+ if (field.equalsIgnoreCase(attr_domain.getName())) {
+
+ Object fieldValue = attr_domain.readString(fieldString);
+
+ if (attr_domain instanceof NumericDomain) {
+ if (param_type_name[0].getName().equalsIgnoreCase("int")) {
+ fieldValue = ((Number)fieldValue).intValue();
+
+ } else if (param_type_name[0].getName().equalsIgnoreCase("float")) {
+ fieldValue = ((Number)fieldValue).floatValue();
+
+ } else if (!param_type_name[0].getName().equalsIgnoreCase("double")) {
+ System.out.println("What the hack, which type of number is this??");
+ fieldValue = ((Number)fieldValue).doubleValue();
+ System.exit(0);
+ }
+ } else if (attr_domain instanceof LiteralDomain) {
+ if (param_type_name[0].getName().equalsIgnoreCase("java.lang.String")) {
+ } else {
+ System.out.println("What the hack, which type of string is this?? " + fieldValue);
+ System.exit(0);
+ }
+ } else if (attr_domain instanceof BooleanDomain) {
+ if (param_type_name[0].getName().equalsIgnoreCase("boolean")) {
+ } else {
+ System.out.println("What the hack, which type of boolean is this?? " + fieldValue);
+ System.exit(0);
+ }
+ } else {
+ System.out.println("What the hack, which type of object is this?? " + fieldValue);
+ System.exit(0);
+ }
+
+
+ // String fieldValue = fieldString;
+
+ try {
+
+ if (DEBUG) System.out.println("ObjectReader.read obj "+ element.getClass() + " fielddomain name "+attr_domain.getName()+" value: "+fieldValue);
+ if (DEBUG) System.out.println("ObjectReader.read method "+ m + " the parameter type:"+ fieldValue.getClass());
+ m.invoke(element, fieldValue);
+
+ } catch (IllegalArgumentException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IllegalAccessException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (InvocationTargetException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ break;
+ }
+
+
+ }
+ }
+ }
+
+ return element;
+
+
+ }
+
+
+ // ------------------------------------------------------------------
+ void read_fields(Object obj, Object classobj) throws Exception {
+ Class<?> cl;
+ Field fields[];
+
+ //level++;
+
+ // Get a handle to the class of the object.
+ cl = (classobj instanceof Class) ? (Class) classobj : classobj
+ .getClass();
+
+ // detect when we've reached out limits. This is particularly
+ // the case when we're traversing the superclass chain.
+ if ((cl == null) || (cl.isInstance(new Object()))) {
+ //level--;
+ return;
+ }
+
+ // comment is silently skipped so nothing to do here.
+
+ // process each field in turn.
+ fields = cl.getDeclaredFields();
+ for (int i = 0; i < fields.length; i++) {
+ Class ctype = fields[i].getType();
+ int mod;
+ String typeName = null, varName = null;
+
+ mod = fields[i].getModifiers();
+ if (Modifier.isStatic(mod))
+ continue;
+
+ Object varValue = null;
+ // primitive types are handled directly.
+ if (ctype.isPrimitive()) {
+ varName = fields[i].getName();
+
+ if (ctype.equals(Boolean.TYPE) || ctype.equals(Character.TYPE)
+ || ctype.equals(Byte.TYPE) || ctype.equals(Short.TYPE)
+ || ctype.equals(Integer.TYPE)
+ || ctype.equals(Long.TYPE) || ctype.equals(Float.TYPE)
+ || ctype.equals(Double.TYPE))
+ handle_instance_field(ctype.getName(), varName, obj, fields[i],
+ varValue );
+ else if (ctype.equals(Void.TYPE))
+ throw new IOException("read_fields: VOID type field found!");
+ else
+ throw new IOException("read_fields: unknown primitive type");
+
+ // for arrays we need to extract the underlying type 1st
+ }
+ // else if (ctype.isArray()) {
+ // StringTokenizer st = null;
+ // int dim;
+ // Object nval = null;
+ //
+ // typeName = readToken();
+ // if (typeName.equals("class"))
+ // typeName = readToken();
+ // varName = readToken();
+ // readToken(); // skip "="
+ // readToken(); // skip curly brace
+ //
+ // st = new StringTokenizer(varName, "[]");
+ // st.nextToken(); // skip var name
+ //
+ // dim = Integer.parseInt(st.nextToken());
+ //
+ // // no support for multi-dim arrays yet
+ // if (st.hasMoreTokens())
+ // throw new IOException(
+ // "multi-dimensional array found - only one dimensional arrays
+ // supported");
+ //
+ // // if the constructor didn't make one for us...
+ // if (fields[i].get(obj) == null) {
+ // fields[i].set(obj, Array.newInstance(ctype
+ // .getComponentType(), dim));
+ // }
+ //
+ // // pull in each element of the array
+ // for (int j = 0; j < dim; j++) {
+ // handle_array_field(typeName, varName, obj, fields[i], j);
+ // }
+ // readToken(); // skip curly brace
+ // }
+ else {
+
+ // Strings need special care
+ if (ctype.isInstance(new String())) {
+ typeName = ctype.getName();
+ varName = fields[i].getName();
+ handle_instance_field(typeName, varName, obj, fields[i],
+ varValue);
+
+ // recurse as everything else is another class
+ }
+ // else {
+ // Object nval = readAsciiObject();
+ //
+ // handle_instance_field(typeName, varName, obj, fields[i],
+ // nval);
+ // }
+ else {
+ continue;
+ }
+ }
+ }
+
+ //level--;
+ }
+
+ // ------------------------------------------------------------------
+ // pull in a single field which isn't an array or a non-string class
+ public static void handle_instance_field(String tname, String fname,
+ Object obj, Field fl, Object value) throws IOException {
+
+ String svalue = null;
+
+ // Some sanity and 'do nothing' tests
+ if (value == null)
+ return;
+ if (value == null)
+ return;
+
+ // Convenience to save lots of casts later.
+ if (value instanceof String) {
+ svalue = (String) value;
+
+ if (svalue.equals("null"))
+ return;
+ }
+
+ // now try to run the assignments
+ try {
+ if (fl.getType().equals(Boolean.TYPE))
+ fl.set(obj, new Boolean(svalue));
+ else if (fl.getType().equals(Character.TYPE)) {
+ char[] onechar = new char[1];
+ svalue.getChars(0, 1, onechar, 0);
+ fl.set(obj, new Character(onechar[0]));
+ } else if (fl.getType().equals(Byte.TYPE))
+ fl.set(obj, new Byte(svalue));
+ else if (fl.getType().equals(Short.TYPE))
+ fl.set(obj, new Short(svalue));
+ else if (fl.getType().equals(Integer.TYPE))
+ fl.set(obj, new Integer(svalue));
+ else if (fl.getType().equals(Long.TYPE))
+ fl.set(obj, new Long(svalue));
+ else if (fl.getType().equals(Float.TYPE))
+ fl.set(obj, new Float(svalue));
+ else if (fl.getType().equals(Double.TYPE))
+ fl.set(obj, new Double(svalue));
+ else if (fl.getType().equals((new String("").getClass())))
+ fl.set(obj, value);
+ else
+ fl.set(obj, value);
+ } catch (Exception e) {
+ System.err.println("ERROR: assigning to " + fl + "\n" + "\tread: "
+ + tname + " " + fname + " = " + value);
+ throw new IOException("field assignment failure:" + e);
+ }
+ }
+
+
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/RulePrinter.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,330 @@
+package dt.tools;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.Stack;
+
+import dt.DecisionTree;
+import dt.LeafNode;
+import dt.TreeNode;
+import dt.memory.LiteralDomain;
+
+public class RulePrinter {
+
+ private ArrayList<String> ruleText;
+ //private ArrayList<ArrayList<NodeValue>> rule_list;
+ private ArrayList<Rule> rules;
+
+ private Stack<NodeValue> nodes;
+
+ private Object ruleObject;
+ //private RuleComparator rule_comp = new RuleComparator();
+
+ public RulePrinter() {
+ ruleText = new ArrayList<String>();
+ //rule_list = new ArrayList<ArrayList<NodeValue>>();
+ rules = new ArrayList<Rule>();
+
+ /* most important */
+ nodes = new Stack<NodeValue>();
+ }
+
+ public void printer(DecisionTree dt, String packageName, String outputFile) {//, PrintStream object
+ ruleObject = dt.getName();
+ dfs(dt.getRoot());
+
+// int j = 0;
+// for( String rule: ruleText) {
+// j++;
+// System.out.println("Rule " +j + " suggests that \n"+ rule +".\n");
+// }
+
+ //String outputFile = new String("src/id3/rules"+".drl");
+ if (outputFile!=null) {
+ if (packageName != null)
+ write("package " + packageName +";\n\n", false, outputFile);
+ else
+ try {
+ throw new Exception("The package is not specified");
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+// write("/* \n", false, outputFile);
+// write(" * Spitting the rules= \n", true, outputFile);
+// write(" */ \n", true, outputFile);
+ }
+
+ int i = 0;
+ Collections.sort(rules, Rule.getRankComparator());
+ for( Rule rule: rules) {
+ i++;
+ System.out.println("//rule " +i + " write to drl \n"+ rule +"\n");
+ if (outputFile!=null) {
+ write(rule.toString(), true, outputFile);
+ write("\n", true, outputFile);
+ }
+ }
+ }
+ public Object getRuleObject() {
+ return ruleObject;
+ }
+
+ private void dfs(TreeNode my_node) {
+ NodeValue node_value = new NodeValue(my_node);
+ nodes.push(node_value);
+
+ if (my_node instanceof LeafNode) {
+ node_value.setNodeValue(((LeafNode) my_node).getValue());
+ ruleText.add(print(nodes));
+ //rule_list.add(spit(nodes));
+ // what if more than one condition (more than one leafNode)
+
+ Rule newRule = spitRule(nodes);
+ newRule.setId(rules.size());
+ rules.add(newRule);
+ return;
+ }
+
+ Hashtable<Object,TreeNode> children = my_node.getChildren();
+ for (Object attributeValue : children.keySet()) {
+ //System.out.println("Domain: "+ my_node.getDomain().getName() + " the value:"+ attributeValue);
+ node_value.setNodeValue(attributeValue);
+ TreeNode child = children.get(attributeValue);
+ dfs(child);
+ nodes.pop();
+ }
+ return;
+
+
+
+
+ }
+
+ private Rule spitRule(Stack<NodeValue> nodes) {
+ //, Stack<NodeValue> leaves // if more than one leaf
+ Rule newRule = new Rule(nodes.size());// (nodes, leaves) //if more than one leaf
+ newRule.setObject(getRuleObject().toString());
+ Iterator<NodeValue> it = nodes.iterator();
+
+ while (it.hasNext()) {
+
+ NodeValue current = it.next();
+ if (it.hasNext()) {
+ newRule.addCondition(current);
+ } else {
+ newRule.addAction(current);
+ }
+ }
+ return newRule;
+ }
+
+ private String print(Stack<NodeValue> nodes) {
+ Iterator<NodeValue> it = nodes.iterator();
+
+ String out = "rule \"1 rank:\" \n";
+ out += "\t when";
+ out += "\t\t "+ruleObject+"Object("+ "";
+ while (it.hasNext()) {
+
+ NodeValue current = it.next();
+ if (it.hasNext()) {
+ out += "" + current.getDomain() + " == "+ current.getNodeValue() +" & " ;
+ } else {
+ out = out.substring(0, out.length()-2) + ")\n";
+ out += "\n\t then ";
+ out += "\n\t\t System.out.println(\"Decision (\"" + current.getDomain() + "\") = \""+ current.getNodeValue()+");";
+ }
+ }
+
+ /*
+
+ rule "Good Bye"
+ dialect "java"
+ when
+ Message( status == Message.GOODBYE, message : message )
+ then
+ System.out.println( "Goodbye: " + message );
+ end
+ */
+ return out;
+ }
+
+ // --------------------------------------------------------------------------------
+ // Saves the string
+ //--------------------------------------------------------------------------------
+ public void write(String toWrite, boolean append, String data) {
+ //String data = new String("data/results_"+System.currentTimeMillis()+".m");
+ File file =new File(data);
+ if (append)
+ {
+ if(!file.exists())
+ System.out.println("File doesnot exit, creating...");
+ try {
+ BufferedWriter out = new BufferedWriter(new FileWriter(data, true));
+ out.write(toWrite);
+ out.close();
+ //System.out.println("I wrote "+ toWrite);
+ } catch (IOException e) {
+ System.out.println("No I cannot write to the file");
+ System.exit(0);
+ }
+
+ } else {
+ if(file.exists()&& (file.length()>0))
+ file.delete();
+ try {
+ BufferedWriter out = new BufferedWriter(new FileWriter(data));
+ out.write(toWrite);
+ out.close();
+ System.out.println("I wrote "+ toWrite);
+ } catch (IOException e) {
+ System.out.println("No I cannot write to the file");
+ }
+ }
+ }
+}
+
+class Rule {
+ private int id;
+ private String attr_obj;
+ private double rank;
+ private ArrayList<NodeValue> conditions;
+ private ArrayList<NodeValue> actions;
+
+ Rule(int numCond) {
+ conditions = new ArrayList<NodeValue>(numCond);
+ actions = new ArrayList<NodeValue>(1);
+ }
+
+ public double getRank() {
+ return rank;
+ }
+
+ public void addCondition(NodeValue current) {
+ conditions.add(new NodeValue(current.getNode(), current.getNodeValue()));
+ }
+ public void addAction(NodeValue current) {
+ actions.add(new NodeValue(current.getNode(), current.getNodeValue()));
+ rank = ((LeafNode)current.getNode()).getRank();
+ }
+ public void setObject(String obj) {
+ attr_obj= obj;
+ }
+
+ public String getObject() {
+ return attr_obj;
+ }
+
+ private int getId() {
+ // TODO Auto-generated method stub
+ return id;
+ }
+
+ public void setId(int id) {
+ this.id= id;
+ }
+
+
+
+ public String toString() {
+ /*
+ rule "Good Bye"
+ dialect "java"
+ when
+ Message( status == Message.GOODBYE, message : message )
+ then
+ System.out.println( "Goodbye: " + message );
+ end
+ */
+
+ String out = ""; //"rule \"#"+getId()+" "+decision+" rank:"+rank+"\" \n";
+ out += "\t when";
+ out += "\n\t\t "+getObject() +"("+ "";
+ for (NodeValue cond: conditions) {
+ out += cond + ", ";
+ }
+
+ String action = "";
+ String decision = "";
+ for (NodeValue act: actions) {
+ out += act.getDomain() + " : "+act.getDomain()+" , ";
+ action += act.getNodeValue() + " , ";
+ decision += act.getDomain() + " ";
+ }
+ action = action.substring(0, action.length()-3);
+ out = out.substring(0, out.length()-3) + ")\n";
+
+ out += "\t then ";
+ out += "\n\t\t System.out.println(\"Decision on "+decision+"= \"+" + decision + "+\": ("+action+")\");\n";
+
+ out = "rule \"#"+getId()+" "+decision+ "= "+action+" with rank:"+rank+"\" \n" + out;
+
+ out += "end\n";
+
+ return out;
+ }
+
+
+
+
+ public static Comparator<Rule> getRankComparator() {
+ return new RuleComparator();
+ }
+
+ private static class RuleComparator implements Comparator<Rule>{
+ public int compare(Rule r1, Rule r2) {
+ if (r1.getRank() < r2.getRank())
+ return -1;
+ else if (r1.getRank() > r2.getRank())
+ return 1;
+ else
+ return 0;
+ }
+ }
+}
+
+
+class NodeValue {
+
+ private TreeNode node;
+ private Object nodeValue;
+
+
+ NodeValue(TreeNode n) {
+ this.node = n;
+ }
+
+ NodeValue(TreeNode n, Object value) {
+ this.node = n;
+ this.nodeValue = value;
+ }
+ public String getDomain() {
+ return node.getDomain().getName();
+ }
+
+ public TreeNode getNode() {
+ return node;
+ }
+ public void setNode(TreeNode node) {
+ this.node = node;
+ }
+ public Object getNodeValue() {
+ return nodeValue;
+ }
+ public void setNodeValue(Object nodeValue) {
+ this.nodeValue = nodeValue;
+ }
+ public String toString() {
+ if (node.getDomain() instanceof LiteralDomain)
+ return node.getDomain() + " == "+ "\""+nodeValue+ "\"";
+ else
+ return node.getDomain() + " == "+ nodeValue;
+ }
+
+}
+
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/Util.java 2008-03-30 02:50:49 UTC (rev 19317)
@@ -0,0 +1,109 @@
+package dt.tools;
+
+import java.lang.annotation.Annotation;
+import java.lang.reflect.Field;
+import java.util.Hashtable;
+import java.util.List;
+
+import dt.memory.DomainSpec;
+
+public class Util {
+
+ public static String ntimes(String s,int n){
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < n; i++) {
+ buf.append(s);
+ }
+ return buf.toString();
+ }
+
+ //private static HashSet<String> simpletype = new HashSet<String>(0);
+ public static boolean isSimpleType(Class<?>[] type_name) {
+// simpletype.contains(type_name)
+ if (type_name.length==1 && (type_name[0].getName().equalsIgnoreCase("boolean") ||
+ type_name[0].getName().equalsIgnoreCase("int") ||
+ type_name[0].getName().equalsIgnoreCase("double") ||
+ type_name[0].getName().equalsIgnoreCase("float") ||
+ type_name[0].getName().equalsIgnoreCase("java.lang.String")))
+ return true;
+ else
+ return false;
+ }
+
+// public static boolean castType(Object value, Class<?>[] type_name) {
+//// simpletype.contains(type_name)
+// if (type_name.length!=1)
+// return false;
+//
+// if (type_name[0].getName().equalsIgnoreCase("boolean")) {
+//
+// } else if (type_name[0].getName().equalsIgnoreCase("int")) {
+//
+// } else if (type_name[0].getName().equalsIgnoreCase("double")) {
+// } else if (type_name[0].getName().equalsIgnoreCase("float")){
+//
+// } else if (type_name[0].getName().equalsIgnoreCase("java.lang.String")){
+// return true;
+// }else
+// return false;
+// }
+
+ public static boolean isGetter(String method_name) {
+ if (method_name.startsWith("get") || method_name.startsWith("is") )
+ return true;
+ return false;
+ }
+
+ public static boolean isSetter(String m_name) {
+ if (m_name.startsWith("set") )
+ return true;
+ return false;
+ }
+
+ public static String getAttributeName(String method_name) {
+ if (method_name.startsWith("get") || method_name.startsWith("set"))
+ return method_name.substring(3, method_name.length()).toLowerCase();
+ else if (method_name.startsWith("is"))
+ return method_name.substring(2, method_name.length()).toLowerCase();
+ return null;
+ }
+
+ public static double log2(double prob) {
+ return Math.log(prob) / Math.log(2);
+ }
+
+ public static int getDividingSize() {
+ return 2;
+ }
+
+ public static String getTargetAnnotation(Class<? extends Object> classObj) {
+
+ Field [] element_fields = classObj.getDeclaredFields();
+ for( Field f: element_fields) {
+ String f_name = f.getName();
+ Class<?>[] f_class = {f.getType()};
+ if (Util.isSimpleType(f_class)) {
+ Annotation[] annotations = f.getAnnotations();
+
+ // iterate over the annotations to locate the MaxLength constraint if it exists
+ DomainSpec spec = null;
+ for (Annotation a : annotations) {
+ if (a instanceof DomainSpec) {
+ spec = (DomainSpec)a; // here it is !!!
+ if (spec.target())
+ return f_name;
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+
+ public static String getSum() {
+ return "sum";
+ }
+
+
+
+}
\ No newline at end of file
More information about the jboss-svn-commits
mailing list