斯坦福词性标注Demo
来源:互联网 发布:java url 编码 编辑:程序博客网 时间:2024/06/11 19:49
第一种:针对单个单词进行词性标注
import java.io.IOException;import edu.stanford.nlp.tagger.maxent.MaxentTagger; public class tagger { public static void main(String[] args) throws IOException,ClassNotFoundException { // Initialize the tagger MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger"); // The sample string String sample = "text"; // The tagged string String tagged = tagger.tagString(sample); // Output the result System.out.println(tagged); }}
第二种:对单句话进行词性标注
class TaggerDemo {private TaggerDemo() {}public static void main(String[] args) throws Exception {if (args.length != 2) {System.err.println("usage: java TaggerDemo modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger("F:\\stanford-postagger-2014-06-16\\models\\english-bidirectional-distsim.tagger"); List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt"))); for (List<HasWord> sentence : sentences) { List<TaggedWord> tSentence = tagger.tagSentence(sentence); System.out.println(Sentence.listToString(tSentence, false)); } }}第三种:读取文本文件,对文件进行词性标注
import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.io.PrintWriter;import java.util.List;import edu.stanford.nlp.ling.Sentence;import edu.stanford.nlp.ling.TaggedWord;import edu.stanford.nlp.ling.HasWord;import edu.stanford.nlp.ling.CoreLabel;import edu.stanford.nlp.process.CoreLabelTokenFactory;import edu.stanford.nlp.process.DocumentPreprocessor;import edu.stanford.nlp.process.PTBTokenizer;import edu.stanford.nlp.process.TokenizerFactory;import edu.stanford.nlp.tagger.maxent.MaxentTagger;/** This demo shows user-provided sentences (i.e., {@code List<HasWord>}) * being tagged by the tagger. The sentences are generated by direct use * of the DocumentPreprocessor class. * * @author Christopher Manning */class TaggerDemo2 {private TaggerDemo2() {} public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("usage: java TaggerDemo2 modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger(args[0]); TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8")); PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8")); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r); documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory); for (List<HasWord> sentence : documentPreprocessor) { List<TaggedWord> tSentence = tagger.tagSentence(sentence); pw.println(Sentence.listToString(tSentence, false)); } // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence. List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", "."); List<TaggedWord> taggedSent = tagger.tagSentence(sent); for (TaggedWord tw : taggedSent) { if (tw.tag().startsWith("JJ")) { pw.println(tw.word()); } } pw.close(); }}
0 0
- 斯坦福词性标注Demo
- 斯坦福Stanford coreNLP宾州树库的词性标注规范
- 词性标注
- 词性标注
- 词性标注
- 词性标注
- 词性标注
- 如何使用斯坦福pos tagger进行词性标注[转—英文]
- 词性标注(1)
- 一阶HMM词性标注
- 词性标注类函数
- 词性标注说明
- 计算所词性标注集
- 词性标注POS tagging
- 北大词性标注集
- 中文词性标注
- Stanford Tagger 词性标注
- 结巴分词--词性标注
- delphi的取整函数round、trunc、ceil和floor
- FireWRT(MT7621)远程视频监控-1
- 学习的开始
- 我是如何学习用C#和MapInfo,MapX制作GIS系统软件的
- 天声人語 20150326
- 斯坦福词性标注Demo
- iOS开发网络篇—GET请求和POST请求
- android usb挂载分析---MountService启动
- arm交叉编译器的区分
- FP Growth算法
- Android自定义属性 类型详解!
- CALayer 详细介绍
- npapi插件调试
- 310实验室一点补充