正则表达式,模拟网络爬虫小例子

来源:互联网 发布:喜马拉雅电台 知乎 编辑:程序博客网 时间:2024/06/11 02:10
package cn.zhengze;import java.io.BufferedReader;import java.io.File;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;public class netbug {/** * @param args * @throws IOException */public static void main(String[] args) throws IOException {File file = new File("mail.html");String regex = "\\w+@[a-zA-Z0-9]+(\\.[a-zA-Z]{2,3}){1,3}";List<String> mailList = getMails(file, regex);for (String mail : mailList) {System.out.println(mail);}}private static List<String> getMails(File file, String regex)throws IOException {BufferedReader bufr = new BufferedReader(new FileReader(file));Pattern p = Pattern.compile(regex);List<String> list = new ArrayList<String>();String line = null;while ((line = bufr.readLine()) != null) {Matcher m = p.matcher(line);while (m.find()) {list.add(m.group());}}return list;}}

0 0
原创粉丝点击