C45算法建立決策樹JAVA練習(xí)_第1頁
C45算法建立決策樹JAVA練習(xí)_第2頁
C45算法建立決策樹JAVA練習(xí)_第3頁
C45算法建立決策樹JAVA練習(xí)_第4頁
C45算法建立決策樹JAVA練習(xí)_第5頁
已閱讀5頁,還剩6頁未讀, 繼續(xù)免費閱讀

下載本文檔

版權(quán)說明:本文檔由用戶提供并上傳,收益歸屬內(nèi)容提供方,若內(nèi)容存在侵權(quán),請進行舉報或認領(lǐng)

文檔簡介

1、【決策樹】 C4.5算法建立決策樹JAVA練習(xí)以下程序是我練習(xí)寫的,不一定正確也沒做存儲優(yōu)化。有問題請留言交流。轉(zhuǎn)載請掛連接。當前的屬性為:age income student credit_rating當前的數(shù)據(jù)集為(最后一列是TARGET_VALUE):-youth high no fair noyouth high no excellent nomiddle_aged high no fair yessenior low yes fair yessenior low yes excellent nomiddle_aged low yes excellent yesyouth medium

2、 no fair noyouth low yes fair yessenior medium yes fair yesyouth mediumyes excellent yesmiddle_aged high yes fair yessenior medium no excellent no-C4.5建立樹類package C45Test;import java.util.ArrayList;import java.util.List;import java.util.Map;public class DecisionTree public TreeNode createDT(ListArra

3、yList data,List attributeList) System.out.println(當前的DATA為); for(int i=0;idata.size();i+) ArrayList temp = data.get(i); for(int j=0;jtemp.size();j+) System.out.print(temp.get(j)+ ); System.out.println(); System.out.println(-); System.out.println(當前的ATTR為); for(int i=0;iattributeList.size();i+) Syste

4、m.out.print(attributeList.get(i)+ ); System.out.println(); System.out.println(-); TreeNode node = new TreeNode(); String result = InfoGain.IsPure(InfoGain.getTarget(data); if(result != null) node.setNodeName(leafNode); node.setTargetFunValue(result); return node; if(attributeList.size() = 0) node.se

5、tTargetFunValue(result); return node; else InfoGain gain = new InfoGain(data,attributeList); double maxGain = 0.0; int attrIndex = -1; for(int i=0;iattributeList.size();i+) double tempGain = gain.getGainRatio(i); if(maxGain tempGain) maxGain = tempGain; attrIndex = i; System.out.println(選擇出的最大增益率屬性為

6、: + attributeList.get(attrIndex); node.setAttributeValue(attributeList.get(attrIndex); ListArrayList resultData = null; Map attrvalueMap = gain.getAttributeValue(attrIndex); for(Map.Entry entry : attrvalueMap.entrySet() resultData = gain.getData4Value(entry.getKey(), attrIndex); TreeNode leafNode =

7、null; System.out.println(當前為+attributeList.get(attrIndex)+的+entry.getKey()+分支。); if(resultData.size() = 0) leafNode = new TreeNode(); leafNode.setNodeName(attributeList.get(attrIndex); leafNode.setTargetFunValue(result); leafNode.setAttributeValue(entry.getKey(); else for (int j = 0; j resultData.si

8、ze(); j+) resultData.get(j).remove(attrIndex); ArrayList resultAttr = new ArrayList(attributeList); resultAttr.remove(attrIndex); leafNode = createDT(resultData,resultAttr); node.getChildTreeNode().add(leafNode); node.getPathName().add(entry.getKey(); return node; class TreeNode private String attri

9、buteValue; private List childTreeNode; private List pathName; private String targetFunValue; private String nodeName; public TreeNode(String nodeName) this.nodeName = nodeName; this.childTreeNode = new ArrayList(); this.pathName = new ArrayList(); public TreeNode() this.childTreeNode = new ArrayList

10、(); this.pathName = new ArrayList(); public String getAttributeValue() return attributeValue; public void setAttributeValue(String attributeValue) this.attributeValue = attributeValue; public List getChildTreeNode() return childTreeNode; public void setChildTreeNode(List childTreeNode) this.childTre

11、eNode = childTreeNode; public String getTargetFunValue() return targetFunValue; public void setTargetFunValue(String targetFunValue) this.targetFunValue = targetFunValue; public String getNodeName() return nodeName; public void setNodeName(String nodeName) this.nodeName = nodeName; public List getPa

12、thName() return pathName; public void setPathName(List pathName) this.pathName = pathName; 增益率計算類(取log的時候底用的是e,沒用2)package C45Test;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;/C

13、 4.5 實現(xiàn)public class InfoGain private ListArrayList data; private List attribute; public InfoGain(ListArrayList data,List attribute) this.data = new ArrayListArrayList(); for(int i=0;idata.size();i+) List temp = data.get(i); ArrayList t = new ArrayList(); for(int j=0;jtemp.size();j+) t.add(temp.get(j

14、); this.data.add(t); this.attribute = new ArrayList(); for(int k=0;kattribute.size();k+) this.attribute.add(attribute.get(k); /*this.data = data; this.attribute = attribute;*/ /獲得熵 public double getEntropy() Map targetValueMap = getTargetValue(); Set targetkey = targetValueMap.keySet(); double entro

15、py = 0.0; for(String key : targetkey) double p = MathUtils.div(double)targetValueMap.get(key), (double)data.size(); entropy += (-1) * p * Math.log(p); return entropy; /獲得InfoA public double getInfoAttribute(int attributeIndex) Map attributeValueMap = getAttributeValue(attributeIndex); double infoA =

16、 0.0; for(Map.Entry entry : attributeValueMap.entrySet() int size = data.size(); double attributeP = MathUtils.div(double)entry.getValue() , (double) size); Map targetValueMap = getAttributeValueTargetValue(entry.getKey(),attributeIndex); long totalCount = 0L; for(Map.Entry entryValue :targetValueMa

17、p.entrySet() totalCount += entryValue.getValue(); double valueSum = 0.0; for(Map.Entry entryTargetValue : targetValueMap.entrySet() double p = MathUtils.div(double)entryTargetValue.getValue(), (double)totalCount); valueSum += Math.log(p) * p; infoA += (-1) * attributeP * valueSum; return infoA; /得到屬

18、性值在決策空間的比例 public Map getAttributeValueTargetValue(String attributeName,int attributeIndex) Map targetValueMap = new HashMap(); IteratorArrayList iterator = data.iterator(); while(iterator.hasNext() List tempList = iterator.next(); if(attributeName.equalsIgnoreCase(tempList.get(attributeIndex) int s

19、ize = tempList.size(); String key = tempList.get(size - 1); Long value = targetValueMap.get(key); targetValueMap.put(key, value != null ? +value :1L); return targetValueMap; /得到屬性在決策空間上的數(shù)量 public Map getAttributeValue(int attributeIndex) Map attributeValueMap = new HashMap(); for(ArrayList note : da

20、ta) String key = note.get(attributeIndex); Long value = attributeValueMap.get(key); attributeValueMap.put(key, value != null ? +value :1L); return attributeValueMap; public ListArrayList getData4Value(String attrValue,int attrIndex) ListArrayList resultData = new ArrayListArrayList(); IteratorArrayL

21、ist iterator = data.iterator(); for(;iterator.hasNext();) ArrayList templist = iterator.next(); if(templist.get(attrIndex).equalsIgnoreCase(attrValue) ArrayList temp = (ArrayList) templist.clone(); resultData.add(temp); return resultData; /獲得增益率 public double getGainRatio(int attributeIndex) return

22、MathUtils.div(getGain(attributeIndex), getSplitInfo(attributeIndex); /獲得增益量 public double getGain(int attributeIndex) return getEntropy() - getInfoAttribute(attributeIndex); /得到懲罰因子 public double getSplitInfo(int attributeIndex) Map attributeValueMap = getAttributeValue(attributeIndex); double split

23、A = 0.0; for(Map.Entry entry : attributeValueMap.entrySet() int size = data.size(); double attributeP = MathUtils.div(double)entry.getValue() , (double) size); splitA += attributeP * Math.log(attributeP) * (-1); return splitA; /得到目標函數(shù)在當前集合范圍內(nèi)的離散的值 public Map getTargetValue() Map targetValueMap = new

24、 HashMap(); IteratorArrayList iterator = data.iterator(); while(iterator.hasNext() List tempList = iterator.next(); String key = tempList.get(tempList.size() - 1); Long value = targetValueMap.get(key); targetValueMap.put(key, value != null ? +value : 1L); return targetValueMap; /獲得TARGET值 public sta

25、tic List getTarget(ListArrayList data) List list = new ArrayList(); for(ArrayList temp : data) int index = temp.size() -1; String value = temp.get(index); list.add(value); return list; /判斷當前純度是否100% public static String IsPure(List list) Set set = new HashSet(); for(String name :list) set.add(name);

26、 if(set.size() 1) return null; Iterator iterator = set.iterator(); return iterator.next(); 測試類,數(shù)據(jù)集讀取以上的分別放到2個List中。package C45Test;import java.util.ArrayList;import java.util.List;import C45Test.DecisionTree.TreeNode;public class MainC45 private static final ListArrayList dataList = new ArrayListArr

27、ayList(); private static final List attributeList = new ArrayList(); public static void main(String args) DecisionTree dt = new DecisionTree(); TreeNode node = dt.createDT(configData(),configAttribute(); System.out.println(); 大數(shù)運算工具類package C45Test;import java.math.BigDecimal;public abstract class M

28、athUtils /默認余數(shù)長度 private static final int DIV_SCALE = 10; /受限于DOUBLE長度 public static double add(double value1,double value2) BigDecimal big1 = new BigDecimal(String.valueOf(value1); BigDecimal big2 = new BigDecimal(String.valueOf(value2); return big1.add(big2).doubleValue(); /大數(shù)加法 public static double add(String value1,String value2) BigDecimal big1 = new BigD

溫馨提示

  • 1. 本站所有資源如無特殊說明,都需要本地電腦安裝OFFICE2007和PDF閱讀器。圖紙軟件為CAD,CAXA,PROE,UG,SolidWorks等.壓縮文件請下載最新的WinRAR軟件解壓。
  • 2. 本站的文檔不包含任何第三方提供的附件圖紙等,如果需要附件,請聯(lián)系上傳者。文件的所有權(quán)益歸上傳用戶所有。
  • 3. 本站RAR壓縮包中若帶圖紙,網(wǎng)頁內(nèi)容里面會有圖紙預(yù)覽,若沒有圖紙預(yù)覽就沒有圖紙。
  • 4. 未經(jīng)權(quán)益所有人同意不得將文件中的內(nèi)容挪作商業(yè)或盈利用途。
  • 5. 人人文庫網(wǎng)僅提供信息存儲空間,僅對用戶上傳內(nèi)容的表現(xiàn)方式做保護處理,對用戶上傳分享的文檔內(nèi)容本身不做任何修改或編輯,并不能對任何下載內(nèi)容負責。
  • 6. 下載文件中如有侵權(quán)或不適當內(nèi)容,請與我們聯(lián)系,我們立即糾正。
  • 7. 本站不保證下載資源的準確性、安全性和完整性, 同時也不承擔用戶因使用這些下載資源對自己和他人造成任何形式的傷害或損失。

評論

0/150

提交評論