漢字字頻統(tǒng)計(jì)_第1頁(yè)
漢字字頻統(tǒng)計(jì)_第2頁(yè)
漢字字頻統(tǒng)計(jì)_第3頁(yè)
漢字字頻統(tǒng)計(jì)_第4頁(yè)
漢字字頻統(tǒng)計(jì)_第5頁(yè)
已閱讀5頁(yè),還剩2頁(yè)未讀, 繼續(xù)免費(fèi)閱讀

付費(fèi)下載

下載本文檔

版權(quán)說(shuō)明:本文檔由用戶提供并上傳,收益歸屬內(nèi)容提供方,若內(nèi)容存在侵權(quán),請(qǐng)進(jìn)行舉報(bào)或認(rèn)領(lǐng)

文檔簡(jiǎn)介

1、import java.awt.List;import java.io.*;import jxl.*;import jxl.write.*;import java.text.DecimalFormat;import java.util.ArrayList;public class statistics public static void main(String args) / 讀字表ArrayList chtable = readFromTable("CHTable.txt");System.out.println("字表大小為:" + chtable

2、.size();/ 讀文件ArrayList numlist = readFromFile("10.txt", chtable);/ 排序ArrayList chlist = sort(chtable, numlist);/ 計(jì)算漢字的總數(shù)int sum = 0;for (int i = 0; i < numlist.size(); i+) sum = sum + (Integer) numlist.get(i);System.out.println("-顯示結(jié)果-");/ 返回指定個(gè)數(shù)的漢字頻率統(tǒng)計(jì)結(jié)果ArrayList freqlist = f

3、requency(chlist, numlist, sum, 100);/ 計(jì)算熵值float sh = entropy(freqlist);/ 計(jì)算指定個(gè)漢字的字頻總和float fre1 = freqSum(freqlist, 1);float fre2 = freqSum(freqlist, 20);float fre3 = freqSum(freqlist, 100);float fre4 = freqSum(freqlist, 600);float fre5 = freqSum(freqlist, 2000);float fre6 = freqSum(freqlist, 3000);

4、float fre7 = freqSum(freqlist, 6000);ArrayList freal = new ArrayList();freal.add(fre1);freal.add(fre2);freal.add(fre3);freal.add(fre4);freal.add(fre5);freal.add(fre6);freal.add(fre7);ArrayList nal = new ArrayList();nal.add(1);nal.add(20);nal.add(100);nal.add(600);nal.add(2000);nal.add(3000);nal.add(

5、6000);System.out.println("-程序結(jié)束-");/ 生成Excel的類 try / 打開文件 WritableWorkbook book = Workbook.createWorkbook(new File("統(tǒng)計(jì)結(jié)果.xls");/ 生成工作表,參數(shù)0表示這是第一頁(yè) WritableSheet sheet = book.createSheet(sum+"字", 0);/* * 生成一個(gè)保存數(shù)字的單元格 必須使用Number的完整包路徑,否則有語(yǔ)法歧義 */表頭Label label1 = new Label(0

6、, 0, "字符");sheet.addCell(label1);Label label2 = new Label(1, 0, "頻率");sheet.addCell(label2);for(int i=0;i<100;i+)/ 中文字符 Label label = new Label(0, i+1, chlist.get(i).toString();sheet.addCell(label);/ 出現(xiàn)的頻率 jxl.write.Number number = new jxl.write.Number(1, i+1, (Float)freqlist.

7、get(i);sheet.addCell(number);/寫入熵值Label lsh = new Label(0, 101, "熵值");sheet.addCell(lsh);jxl.write.Number nsh = new jxl.write.Number(1, 101, sh);sheet.addCell(nsh);/寫入字頻總和for(int i=0;i<freal.size();i+)if(Float)freal.get(i) != 0f)Label lfreq = new Label(0, 102+i, "前"+nal.get(i)

8、.toString()+"個(gè)漢字字頻總和");sheet.addCell(lfreq);jxl.write.Number nfreq = new jxl.write.Number(1, 102+i, (Float)freal.get(i);sheet.addCell(nfreq);/寫入數(shù)據(jù)book.write();/ 并關(guān)閉文件 book.close(); catch (Exception e) System.out.println(e);public static ArrayList readFromTable(String filename) ArrayList ch

9、list = new ArrayList();File file = new File(filename);Reader reader = null;try / 一次讀一個(gè)字符reader = new InputStreamReader(new FileInputStream(file);int tempint;while (tempint = reader.read() != -1) / 判斷讀到的字符是否是中文if (tempint >= 'u4e00' && tempint <= 'u9fa5')| (tempint >=

10、 'uf900' && tempint <= 'ufa2d') char tempchar = (char) tempint;/ System.out.println(tempchar);/ System.out.println("list.size:" + chlist.size();/ 判斷該字符是否出現(xiàn)過(guò)int i = 0;for (i = 0; i < chlist.size(); i+) / 一旦重復(fù),跳出循環(huán)char c = ' 'Object ob = chlist.get(i);if

11、 (ob instanceof Character) c = (Character) ob;/ System.out.println("c:" + c);if (tempchar = c) / System.out.println("重復(fù)!");break;/ 字符從未出現(xiàn)過(guò)if (i = chlist.size() / System.out.println("新字符!");chlist.add(tempchar);reader.close(); catch (Exception e) e.printStackTrace();retu

12、rn chlist;/* * 該函數(shù)用于從文件中讀取中文字符,并返回它出現(xiàn)的次數(shù) * * param filename * return */public static ArrayList readFromFile(String filename, ArrayList chtable) File file = new File(filename);Reader reader = null;ArrayList numlist = new ArrayList();/ 初始化字符出現(xiàn)的次數(shù)集合for (int i = 0; i < chtable.size(); i+) numlist.add

13、(0);try / 一次讀一個(gè)字符reader = new InputStreamReader(new FileInputStream(file);int tempint;int sum = 0;while (tempint = reader.read() != -1) / 判斷讀到的字符是否是中文if (tempint >= 'u4e00' && tempint <= 'u9fa5')| (tempint >= 'uf900' && tempint <= 'ufa2d')

14、char tempchar = (char) tempint;/ System.out.println(tempchar);/ System.out.println("list.size:" + chlist.size();/ 判斷該字符是否在字表里int i = 0;for (i = 0; i < chtable.size(); i+) / 在字表里,統(tǒng)計(jì)重復(fù)次數(shù)并跳出循環(huán)char c = ' 'Object ob = chtable.get(i);if (ob instanceof Character) c = (Character) ob;/ S

15、ystem.out.println("c:" + c);if (tempchar = c) int num = (Integer) numlist.get(i) + 1;numlist.set(i, num);break;reader.close(); catch (Exception e) e.printStackTrace();return numlist;/* * 該函數(shù)用來(lái)對(duì)漢字出現(xiàn)的次數(shù)進(jìn)行從大到小的排序,返回排序結(jié)果 * * param chlist * param numlist */public static ArrayList sort(ArrayList

16、 chtable, ArrayList numlist) ArrayList chlist = chtable;for (int i = 0; i < numlist.size(); i+) for (int j = i + 1; j < numlist.size(); j+) int listi = (Integer) numlist.get(i);int listj = (Integer) numlist.get(j);if (listi < listj) numlist.set(i, listj);numlist.set(j, listi);char chi = (Ch

17、aracter) chlist.get(i);char chj = (Character) chlist.get(j);chlist.set(i, chj);chlist.set(j, chi);return chlist;/* * 該函數(shù)用來(lái)計(jì)算各個(gè)漢字出現(xiàn)的頻率,并且顯示出指定個(gè)數(shù)的結(jié)果 * * param chlist * param numlist * param sum * param count */public static ArrayList frequency(ArrayList chlist, ArrayList numlist, int sum,int count) Ar

18、rayList freqlist = new ArrayList();/ 計(jì)算頻率for (int j = 0; j < chlist.size(); j+) float freq = (Integer) numlist.get(j) / (float) sum;freqlist.add(freq);/ 按指定格式輸出(保留6位有效數(shù)字)for (int j = 0; j < freqlist.size() && j < count; j+) System.out.println("字符:" + chlist.get(j);System.o

19、ut.println("出現(xiàn)次數(shù):" + numlist.get(j);System.out.println("頻率:" + freqlist.get(j);System.out.println("-");System.out.println("中文字符總數(shù):" + sum);return freqlist;/* * 該函數(shù)用來(lái)計(jì)算熵值 * * param freqlist */public static float entropy(ArrayList freqlist) float sum = 0f;for (int i = 0; i < freqlist.size(); i+) float freq = (Float) freqlist.

溫馨提示

  • 1. 本站所有資源如無(wú)特殊說(shuō)明,都需要本地電腦安裝OFFICE2007和PDF閱讀器。圖紙軟件為CAD,CAXA,PROE,UG,SolidWorks等.壓縮文件請(qǐng)下載最新的WinRAR軟件解壓。
  • 2. 本站的文檔不包含任何第三方提供的附件圖紙等,如果需要附件,請(qǐng)聯(lián)系上傳者。文件的所有權(quán)益歸上傳用戶所有。
  • 3. 本站RAR壓縮包中若帶圖紙,網(wǎng)頁(yè)內(nèi)容里面會(huì)有圖紙預(yù)覽,若沒有圖紙預(yù)覽就沒有圖紙。
  • 4. 未經(jīng)權(quán)益所有人同意不得將文件中的內(nèi)容挪作商業(yè)或盈利用途。
  • 5. 人人文庫(kù)網(wǎng)僅提供信息存儲(chǔ)空間,僅對(duì)用戶上傳內(nèi)容的表現(xiàn)方式做保護(hù)處理,對(duì)用戶上傳分享的文檔內(nèi)容本身不做任何修改或編輯,并不能對(duì)任何下載內(nèi)容負(fù)責(zé)。
  • 6. 下載文件中如有侵權(quán)或不適當(dāng)內(nèi)容,請(qǐng)與我們聯(lián)系,我們立即糾正。
  • 7. 本站不保證下載資源的準(zhǔn)確性、安全性和完整性, 同時(shí)也不承擔(dān)用戶因使用這些下載資源對(duì)自己和他人造成任何形式的傷害或損失。

評(píng)論

0/150

提交評(píng)論