- BrillTagger class is a transformation-based tagger. It is not a subclass of SequentialBackoffTagger.
- Moreover, it uses a series of rules to correct the results of an initial tagger.
- These rules it follows are scored based. This score is equal to the no. of errors they correct minus the no. of new errors they produce.
Code #1 : Training a BrillTagger class
# Loading Libraries from nltk.tag import brill, brill_trainer
def train_brill_tagger(initial_tagger, train_sents, * * kwargs):
templates = [
brill.Template(brill.Pos([ - 1 ])),
brill.Template(brill.Pos([ 1 ])),
brill.Template(brill.Pos([ - 2 ])),
brill.Template(brill.Pos([ 2 ])),
brill.Template(brill.Pos([ - 2 , - 1 ])),
brill.Template(brill.Pos([ 1 , 2 ])),
brill.Template(brill.Pos([ - 3 , - 2 , - 1 ])),
brill.Template(brill.Pos([ 1 , 2 , 3 ])),
brill.Template(brill.Pos([ - 1 ]), brill.Pos([ 1 ])),
brill.Template(brill.Word([ - 1 ])),
brill.Template(brill.Word([ 1 ])),
brill.Template(brill.Word([ - 2 ])),
brill.Template(brill.Word([ 2 ])),
brill.Template(brill.Word([ - 2 , - 1 ])),
brill.Template(brill.Word([ 1 , 2 ])),
brill.Template(brill.Word([ - 3 , - 2 , - 1 ])),
brill.Template(brill.Word([ 1 , 2 , 3 ])),
brill.Template(brill.Word([ - 1 ]), brill.Word([ 1 ])),
]
# Using BrillTaggerTrainer to train
trainer = brill_trainer.BrillTaggerTrainer(
initial_tagger, templates, deterministic = True )
return trainer.train(train_sents, * * kwargs)
|
Code #2 : Let’s use the trained BrillTagger
from nltk.tag import brill, brill_trainer
from nltk.tag import DefaultTagger
from nltk.corpus import treebank
from tag_util import train_brill_tagger
# Initializing default_tag = DefaultTagger( 'NN' )
# initializing training and testing set train_data = treebank.tagged_sents()[: 3000 ]
test_data = treebank.tagged_sents()[ 3000 :]
initial_tag = backoff_tagger(
train_data, [UnigramTagger, BigramTagger,
TrigramTagger], backoff = default_tagger)
a = initial_tag.evaluate(test_data)
print ( "Accuracy of Initial Tag : " , a)
|
Output :
Accuracy of Initial Tag : 0.8806820634578028
Code #3 :
brill_tag = train_brill_tagger(initial_tag, train_data)
b = brill_tag.evaluate(test_data)
print ( "Accuracy of brill_tag : " , b)
|
Output :
Accuracy of brill_tag : 0.8827541549751781