[jboss-svn-commits] JBL Code SVN: r19318 - in labs/jbossrules/contrib/machinelearning/decisiontree/src/dt: tools and 1 other directory.
jboss-svn-commits at lists.jboss.org
jboss-svn-commits at lists.jboss.org
Sat Mar 29 22:52:49 EDT 2008
Author: gizil
Date: 2008-03-29 22:52:49 -0400 (Sat, 29 Mar 2008)
New Revision: 19318
Added:
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java
labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java
Log:
new file system
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/C45TreeBuilder.java 2008-03-30 02:52:49 UTC (rev 19318)
@@ -0,0 +1,360 @@
+package dt.builder;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
+
+import dt.DecisionTree;
+import dt.LeafNode;
+import dt.TreeNode;
+
+import dt.memory.WorkingMemory;
+import dt.memory.Fact;
+import dt.memory.FactSet;
+import dt.memory.OOFactSet;
+import dt.memory.Domain;
+import dt.tools.Util;
+
+public class C45TreeBuilder implements DecisionTreeBuilder {
+
+ class MyThread extends Thread {
+ C45TreeBuilder builder;
+ DecisionTree dt;
+ List<Fact> facts;
+ List<String> attributeNames;
+ TreeNode currentNode = null;
+ Object value = null;
+ TreeNode result = null;
+
+ @Override
+ public void run() {
+ result = builder.c45(dt, facts, attributeNames);
+ currentNode.addNode(value, result);
+ }
+ }
+
+ MyThread helper;
+ private int FUNC_CALL = 0;
+ private int num_fact_processed = 0;
+
+ /*
+ * treebuilder.execute(workingmemory, classtoexecute, attributestoprocess)
+ *
+ * foreach factset in workingmemory if classtoexecute.isAssignableFrom(
+ * factset.class ) internaladd(factset)
+ *
+ * internalprocess(attributestoprocess)
+ */
+
+ public DecisionTree build(WorkingMemory wm, Class<?> klass,
+ String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass.getName());
+ // **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ Iterator<FactSet> it_fs = wm.getFactsets();
+ while (it_fs.hasNext()) {
+ FactSet fs = it_fs.next();
+ if (fs instanceof OOFactSet) {
+ if (klass.isAssignableFrom(((OOFactSet) fs).getFactClass())) {
+ // **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts
+ }
+ }
+ if (klass.getName() == fs.getClassName()) {
+ klass_fs = fs;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr : workingAttributes) {
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d : klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ TreeNode root = c45(dt, facts, attrs);
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+ public DecisionTree build(WorkingMemory wm, String klass,
+ String targetField, Collection<String> workingAttributes) {
+
+ DecisionTree dt = new DecisionTree(klass);
+ // **OPT List<FactSet> facts = new ArrayList<FactSet>();
+ ArrayList<Fact> facts = new ArrayList<Fact>();
+ FactSet klass_fs = null;
+ Iterator<FactSet> it_fs = wm.getFactsets();
+ while (it_fs.hasNext()) {
+ FactSet fs = it_fs.next();
+ if (klass == fs.getClassName()) {
+ // **OPT facts.add(fs);
+ fs.assignTo(facts); // adding all facts of fs to "facts"
+
+ klass_fs = fs;
+ break;
+ }
+ }
+ dt.FACTS_READ += facts.size();
+ num_fact_processed = facts.size();
+
+ if (workingAttributes != null)
+ for (String attr : workingAttributes) {
+ System.out.println("Bok degil " + attr);
+ dt.addDomain(klass_fs.getDomain(attr));
+ }
+ else
+ for (Domain<?> d : klass_fs.getDomains())
+ dt.addDomain(d);
+
+ dt.setTarget(targetField);
+
+ ArrayList<String> attrs = new ArrayList<String>(dt.getAttributes());
+ Collections.sort(attrs);
+
+ TreeNode root = c45(dt, facts, attrs);
+ dt.setRoot(root);
+
+ return dt;
+ }
+
+ private TreeNode c45(DecisionTree dt, List<Fact> facts,
+ List<String> attributeNames) {
+
+ FUNC_CALL++;
+ if (facts.size() == 0) {
+ throw new RuntimeException("Nothing to classify, factlist is empty");
+ }
+ /* let's get the statistics of the results */
+ // List<?> targetValues = dt.getPossibleValues(dt.getTarget());
+ Hashtable<Object, Integer> stats = dt.getStatistics(facts, dt
+ .getTarget());// targetValues
+ Collection<Object> targetValues = stats.keySet();
+ int winner_vote = 0;
+ int num_supporters = 0;
+ Object winner = null;
+ for (Object key : targetValues) {
+
+ int num_in_class = stats.get(key).intValue();
+ if (num_in_class > 0)
+ num_supporters++;
+ if (num_in_class > winner_vote) {
+ winner_vote = num_in_class;
+ winner = key;
+ }
+ }
+
+ /* if all elements are classified to the same value */
+ if (num_supporters == 1) {
+ // *OPT* return new
+ // LeafNode(facts.get(0).getFact(0).getFieldValue(target));
+ LeafNode classifiedNode = new LeafNode(
+ dt.getDomain(dt.getTarget()), winner);
+ classifiedNode.setRank((double) facts.size()
+ / (double) num_fact_processed);
+ return classifiedNode;
+ }
+
+ /* if there is no attribute left in order to continue */
+ if (attributeNames.size() == 0) {
+ /* an heuristic of the leaf classification */
+ LeafNode noAttributeLeftNode = new LeafNode(dt.getDomain(dt
+ .getTarget()), winner);
+ noAttributeLeftNode.setRank((double) winner_vote
+ / (double) num_fact_processed);
+ return noAttributeLeftNode;
+ }
+
+ /* id3 starts */
+ String chosenAttribute = attributeWithGreatestGain(dt, facts, stats,
+ attributeNames);
+
+ System.out.println(Util.ntimes("*", 20) + " 1st best attr: "
+ + chosenAttribute);
+
+ TreeNode currentNode = new TreeNode(dt.getDomain(chosenAttribute));
+ // ConstantDecisionTree m = majorityValue(ds);
+ /* the majority */
+
+ List<?> attributeValues = dt.getPossibleValues(chosenAttribute);
+ Hashtable<Object, List<Fact>> filtered_facts = splitFacts(facts,
+ chosenAttribute, attributeValues);
+ dt.FACTS_READ += facts.size();
+
+ // if (FUNC_CALL ==5) {
+ // System.out.println("FUNC_CALL:" +FUNC_CALL);
+ // System.exit(0);
+ // }
+ for (int i = 0; i < attributeValues.size(); i++) {
+ /* split the last two class at the same time */
+ Object value = attributeValues.get(i);
+
+ ArrayList<String> attributeNames_copy = new ArrayList<String>(
+ attributeNames);
+ attributeNames_copy.remove(chosenAttribute);
+
+ if (filtered_facts.get(value).isEmpty()) {
+ /* majority !!!! */
+ LeafNode majorityNode = new LeafNode(dt.getDomain(dt
+ .getTarget()), winner);
+ majorityNode.setRank(0.0);
+ currentNode.addNode(value, majorityNode);
+ } else {
+ TreeNode newNode = c45(dt, filtered_facts.get(value),
+ attributeNames_copy);
+ currentNode.addNode(value, newNode);
+ }
+ }
+
+ return currentNode;
+ }
+
+ // String chooseAttribute(List<FactSet> facts, List<String> attrs) {
+ public String attributeWithGreatestGain(DecisionTree dt, List<Fact> facts,
+ Hashtable<Object, Integer> facts_in_class, List<String> attrs) {
+
+ double dt_info = dt.getInformation(facts_in_class, facts.size());
+ double greatestGain = 0.0;
+ String attributeWithGreatestGain = attrs.get(0);
+ for (String attr : attrs) {
+ double gain = 0;
+ if (dt.getDomain(attr).isDiscrete()) {
+ gain = dt_info - dt.getGain(facts, attr);
+ } else {
+ /* 1. sort the values */
+ int begin_index = 0;
+ int end_index = facts.size();
+ Collections.sort(facts,
+ new FactNumericAttributeComparator(attr));
+ List<Integer> splits = getSplitPoints(facts, dt.getTarget());
+ gain = dt_info
+ - dt.getContinuousGain(facts, splits, begin_index,
+ end_index, facts_in_class, attr);
+ // gain = dt_info - dt.getContinuousGain(facts, facts_in_class,
+ // attr);
+ }
+
+ System.out.println("Attribute: " + attr + " the gain: " + gain);
+ if (gain > greatestGain) {
+ greatestGain = gain;
+ attributeWithGreatestGain = attr;
+ }
+ }
+
+ return attributeWithGreatestGain;
+ }
+
+ /*
+ * id3 uses that function because it can not classify continuous attributes
+ */
+
+ public String attributeWithGreatestGain_discrete(DecisionTree dt,
+ List<Fact> facts, Hashtable<Object, Integer> facts_in_class,
+ List<String> attrs) {
+
+ double dt_info = dt.getInformation(facts_in_class, facts.size());
+ double greatestGain = 0.0;
+ String attributeWithGreatestGain = attrs.get(0);
+ for (String attr : attrs) {
+ double gain = 0;
+ if (!dt.getDomain(attr).isDiscrete()) {
+ System.err.println("Ignoring the attribute:" + attr
+ + " the id3 can not classify continuous attributes");
+ continue;
+ } else {
+ gain = dt_info - dt.getGain(facts, attr);
+ }
+ System.out.println("Attribute: " + attr + " the gain: " + gain);
+ if (gain > greatestGain) {
+ greatestGain = gain;
+ attributeWithGreatestGain = attr;
+ }
+
+ }
+
+ return attributeWithGreatestGain;
+ }
+
+ private List<Integer> getSplitPoints(List<Fact> facts, String target) {
+ List<Integer> splits = new ArrayList<Integer>();
+ Iterator<Fact> it_f = facts.iterator();
+ Fact f1 = it_f.next();
+ int index = 0;
+ while (it_f.hasNext()) {
+ Fact f2 = it_f.next();
+ if (f1.getFieldValue(target) != f2.getFieldValue(target))
+ splits.add(Integer.valueOf(index));
+
+ f1 = f2;
+ index++;
+ }
+ return splits;
+ }
+
+ public Hashtable<Object, List<Fact>> splitFacts(List<Fact> facts,
+ String attributeName, List<?> attributeValues) {
+ Hashtable<Object, List<Fact>> factLists = new Hashtable<Object, List<Fact>>(
+ attributeValues.size());
+ for (Object v : attributeValues) {
+ factLists.put(v, new ArrayList<Fact>());
+ }
+ for (Fact f : facts) {
+ factLists.get(f.getFieldValue(attributeName)).add(f);
+ }
+ return factLists;
+ }
+
+ public void testEntropy(DecisionTree dt, List<Fact> facts) {
+ Hashtable<Object, Integer> facts_in_class = dt.getStatistics(facts, dt
+ .getTarget());// , targetValues
+ double initial_info = dt.getInformation(facts_in_class, facts.size()); // entropy
+ // value
+
+ System.out.println("initial_information: " + initial_info);
+
+ String first_attr = attributeWithGreatestGain(dt, facts,
+ facts_in_class, dt.getAttributes());
+
+ System.out.println("best attr: " + first_attr);
+ }
+
+ public int getNumCall() {
+ return FUNC_CALL;
+ }
+
+ private class FactNumericAttributeComparator implements Comparator<Fact> {
+ private String attr_name;
+
+ public FactNumericAttributeComparator(String _attr_name) {
+ attr_name = _attr_name;
+ }
+
+ public int compare(Fact f0, Fact f1) {
+ Number n0 = (Number) f0.getFieldValue(attr_name);
+ Number n1 = (Number) f1.getFieldValue(attr_name);
+ if (n0.doubleValue() < n1.doubleValue())
+ return -1;
+ else if (n0.doubleValue() > n1.doubleValue())
+ return 1;
+ else
+ return 0;
+ }
+ }
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/builder/DecisionTreeBuilder.java 2008-03-30 02:52:49 UTC (rev 19318)
@@ -0,0 +1,15 @@
+package dt.builder;
+
+import java.util.Collection;
+
+import dt.DecisionTree;
+import dt.memory.WorkingMemory;
+
+public interface DecisionTreeBuilder {
+
+
+ DecisionTree build(WorkingMemory wm, Class<?> klass, String targetField, Collection<String> workingAttributes);
+
+ DecisionTree build(WorkingMemory simple, String klass_name, String target_attr,Collection<String> workingAttributes);
+
+}
Added: labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java
===================================================================
--- labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java (rev 0)
+++ labs/jbossrules/contrib/machinelearning/decisiontree/src/dt/tools/FileProcessor.java 2008-03-30 02:52:49 UTC (rev 19318)
@@ -0,0 +1,66 @@
+package dt.tools;
+
+import java.util.List;
+
+import dt.DecisionTree;
+import dt.builder.C45TreeBuilder;
+import dt.builder.DecisionTreeBuilder;
+import dt.builder.IDTreeBuilder;
+import dt.memory.FactSetFactory;
+import dt.memory.WorkingMemory;
+
+public class FileProcessor {
+ public static List<Object> processFileExmID3(WorkingMemory simple, Object emptyObject, String drlfile, String datafile, String separator) {
+
+ try {
+ List<Object> obj_read=FactSetFactory.fromFileAsObject(simple, emptyObject.getClass(), datafile, separator);
+ DecisionTreeBuilder bocuk = new IDTreeBuilder();
+
+ long dt = System.currentTimeMillis();
+ String target_attr = Util.getTargetAnnotation(emptyObject.getClass());
+
+ DecisionTree bocuksTree = bocuk.build(simple, emptyObject.getClass().getName(), target_attr, null);
+ dt = System.currentTimeMillis() - dt;
+ System.out.println("Time" + dt + "\n" + bocuksTree);
+
+ RulePrinter my_printer = new RulePrinter();
+ my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile);
+
+ return obj_read;
+
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return null;
+
+
+ }
+
+ public static List<Object> processFileExmC45(WorkingMemory simple, Object emptyObject, String drlfile, String datafile, String separator) {
+
+ try {
+ List<Object> obj_read=FactSetFactory.fromFileAsObject(simple, emptyObject.getClass(), datafile, separator);
+ C45TreeBuilder bocuk = new C45TreeBuilder();
+
+ long dt = System.currentTimeMillis();
+ String target_attr = Util.getTargetAnnotation(emptyObject.getClass());
+
+ DecisionTree bocuksTree = bocuk.build(simple, emptyObject.getClass().getName(), target_attr, null);
+ dt = System.currentTimeMillis() - dt;
+ System.out.println("Time" + dt + "\n" + bocuksTree);
+
+ RulePrinter my_printer = new RulePrinter();
+ my_printer.printer(bocuksTree, "examples", "src/rules/examples/"+drlfile);
+
+ return obj_read;
+
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ return null;
+
+
+ }
+}
More information about the jboss-svn-commits
mailing list