Skip to content
This repository was archived by the owner on Jan 7, 2019. It is now read-only.

bureaucratic-labs/crfsuite

Folders and files

NameName
Last commit message
Last commit date
Jul 21, 2017
Jul 21, 2017
Jul 21, 2017
Jun 3, 2017
Jul 21, 2017
Jul 21, 2017
Jun 3, 2017
Jul 26, 2017
Jul 29, 2017
Jul 22, 2017
Jul 22, 2017

Repository files navigation

CRFSuite Build Status

Go bindings for CRFSuite

Things to be done:

  • Training support
  • Evaluation support (?)
  • Tagging support

Tagging example

package main

import (
	"fmt"
	"strings"
	"github.com/bureaucratic-labs/crfsuite"
)

// User-defined function, that returns features for each item in input sequence
// Interface is very similar (and based on) to python-crfsuite
func getFeatures(items []string, position int) []crfsuite.Feature {
	result := make([]crfsuite.Feature, 0)
	// Include lowercased value of item (actually, just char) as feature
	result = append(result, crfsuite.Feature{
		Key:   fmt.Sprintf("lower=%v", strings.ToLower(items[position])),
		Value: 1.0,
	})
	// There also can be more features, depending on your task
	return result
}

func main() {
	// Load pre-trained model for tokenization (see b-labs/models repo)
	model := NewModelFromFile("test_data/tokenization-model.crfsuite")
	tagger := model.GetTagger()
	// Input data must be an array of strings, but that can be changed in future
	input := []string{"т", "е", "с", "т", "."}
	ids := tagger.Tag(input, getFeatures)
	labels := tagger.IDsToLabels(ids)
	fmt.Println(labels) // will output some BIO labels
}