Commit 53d6f7ef authored by 刘天航's avatar 刘天航
Browse files

frist step of project

parents
Loading
Loading
Loading
Loading

src/frty/123.txt

0 → 100644
+4 −0
Original line number Diff line number Diff line
你好,世界,
今天是美好的一天
你好吗世界,世界很美好
 No newline at end of file

src/frty/onr.java

0 → 100644
+58 −0
Original line number Diff line number Diff line
package frty;
import java.io.*;
import java.util.*;

import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
public class onr {
	public static void main(String[] args) throws IOException {
		File file = new File("../123.txt");
		//the address of file which you need read
		System.out.println(onr.devideword(onr.read(file)));
		System.out.println(onr.fre(onr.devideword(onr.read(file))));
		//just two simple test
	}
	public static String read(File file) throws IOException{
		BufferedReader br=new BufferedReader(new InputStreamReader(new FileInputStream(file),"UTF-8"));
		String line = null;
		String lines = "";
		while ((line = br.readLine()) != null) {   
			lines+=line;  
		}
		return lines.replaceAll
				("[`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……& amp;*()——+|{}【】‘;:”“’。,、?|-]", ""); 
		//lines is a string of your flie without space and punctuation.
	}
	public static ArrayList<String>  devideword(String read) {
		List<Term> outer = StandardTokenizer.segment(read);
		ArrayList<Integer> k=new ArrayList<>();
		ArrayList<String> s=new ArrayList<>();
		for(int j=0;j<outer.size();j++) {
			for(int i=0;i<outer.get(j).toString().length();i++) {
				if(outer.get(j).toString().charAt(i)=='/') {k.add(i);}
			}
			s.add(outer.get(j).toString().substring(0, k.get(j)));
		}
		return s;
		// s is a string array include the words of sentence
	}
	public static ArrayList<Integer> fre(ArrayList<String> s) {
		ArrayList<Integer> n=new ArrayList<>();
		for(int i=0;i<s.size();i++) {
			int count=1;
			int j=i+1;
			while(j<s.size()) {
				if(s.get(i).equals(s.get(j))) {
					s.remove(j);
					count++;
				}
				else {j++;}
			}
			n.add(count);
		}
		//System.out.println(s);
		//s is an arraylist which just has one of each word
		return n;
		//n is the frequency of s
	}
}