斯坦福词性标注Demo

来源：互联网发布：java url 编码编辑：程序博客网时间：2024/06/11 19:49

第一种：针对单个单词进行词性标注

import java.io.IOException;import edu.stanford.nlp.tagger.maxent.MaxentTagger; public class tagger {    public static void main(String[] args) throws IOException,ClassNotFoundException {       // Initialize the tagger        MaxentTagger tagger = new MaxentTagger("F:/stanford-postagger-2014-06-16/models/english-left3words-distsim.tagger");         // The sample string        String sample = "text";         // The tagged string        String tagged = tagger.tagString(sample); // Output the result        System.out.println(tagged);       }}

第二种：对单句话进行词性标注

class TaggerDemo {private TaggerDemo() {}public static void main(String[] args) throws Exception     {if (args.length != 2)     {System.err.println("usage: java TaggerDemo modelFile fileToTag");    return;        }        MaxentTagger tagger = new MaxentTagger("F:\\stanford-postagger-2014-06-16\\models\\english-bidirectional-distsim.tagger");              List<List<HasWord>> sentences = MaxentTagger.tokenizeText(new BufferedReader(new FileReader("F:\trigger.txt")));                for (List<HasWord> sentence : sentences)         {        List<TaggedWord> tSentence = tagger.tagSentence(sentence);              System.out.println(Sentence.listToString(tSentence, false));        }    }}

第三种：读取文本文件，对文件进行词性标注

import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.io.PrintWriter;import java.util.List;import edu.stanford.nlp.ling.Sentence;import edu.stanford.nlp.ling.TaggedWord;import edu.stanford.nlp.ling.HasWord;import edu.stanford.nlp.ling.CoreLabel;import edu.stanford.nlp.process.CoreLabelTokenFactory;import edu.stanford.nlp.process.DocumentPreprocessor;import edu.stanford.nlp.process.PTBTokenizer;import edu.stanford.nlp.process.TokenizerFactory;import edu.stanford.nlp.tagger.maxent.MaxentTagger;/** This demo shows user-provided sentences (i.e., {@code List<HasWord>}) *  being tagged by the tagger. The sentences are generated by direct use *  of the DocumentPreprocessor class. * *  @author Christopher Manning */class TaggerDemo2 {private TaggerDemo2() {}    public static void main(String[] args) throws Exception     {    if (args.length != 2)     {    System.err.println("usage: java TaggerDemo2 modelFile fileToTag");                return;        }        MaxentTagger tagger = new MaxentTagger(args[0]);        TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),   "untokenizable=noneKeep");        BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));        PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));        DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);        documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);        for (List<HasWord> sentence : documentPreprocessor)    {    List<TaggedWord> tSentence = tagger.tagSentence(sentence);        pw.println(Sentence.listToString(tSentence, false));    }    // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.    List<HasWord> sent = Sentence.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");        List<TaggedWord> taggedSent = tagger.tagSentence(sent);        for (TaggedWord tw : taggedSent)     {    if (tw.tag().startsWith("JJ"))     {    pw.println(tw.word());    }    }    pw.close();    }}

0 0