Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: src-d/enry
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 9e2a176ac6635f634dd9a68fd9b812fdab554c10
Choose a base ref
..
head repository: src-d/enry
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 0a267e9168d155aac82a963820f74174a6fc4a4f
Choose a head ref
Showing with 479 additions and 259 deletions.
  1. +14 −7 cli/enry/main.go
  2. +17 −13 common.go
  3. +0 −29 common_test.go
  4. +251 −0 gitattributes.go
  5. +197 −0 gitattributes_test.go
  6. +0 −193 utils.go
  7. +0 −17 utils_test.go
21 changes: 14 additions & 7 deletions cli/enry/main.go
Original file line number Diff line number Diff line change
@@ -24,7 +24,11 @@ func main() {
log.Fatal(err)
}

enry.LoadGitAttributes(".gitattributes")
gitAttributes := enry.NewGitAttributes()
reader, err := os.Open(".gitattributes")
if err == nil {
gitAttributes.LoadGitAttributes("", reader)
}

errors := false
out := make(map[string][]string, 0)
@@ -50,8 +54,9 @@ func main() {
relativePath = relativePath + "/"
}

if enry.IsVendor(relativePath) || enry.IsDotFile(relativePath) ||
enry.IsDocumentation(relativePath) || enry.IsConfiguration(relativePath) {
if gitAttributes.IsVendor(relativePath) || enry.IsDotFile(relativePath) ||
gitAttributes.IsDocumentation(relativePath) || enry.IsConfiguration(relativePath) ||
gitAttributes.IsGenerated(path) {
if f.IsDir() {
return filepath.SkipDir
}
@@ -69,10 +74,12 @@ func main() {
log.Println(err)
return nil
}

language := enry.GetLanguage(filepath.Base(path), content)
if language == enry.OtherLanguage {
return nil
language := gitAttributes.GetLanguage(filepath.Base(path))
if len(language) == 0 {
language = enry.GetLanguage(filepath.Base(path), content)
if language == enry.OtherLanguage {
return nil
}
}

out[language] = append(out[language], relativePath)
30 changes: 17 additions & 13 deletions common.go
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@ package enry
import (
"bufio"
"bytes"
"os"
"path/filepath"
"regexp"
"strings"
@@ -18,7 +19,6 @@ type Strategy func(filename string, content []byte, candidates []string) (langua

// DefaultStrategies is the strategies' sequence GetLanguage uses to detect languages.
var DefaultStrategies = []Strategy{
GetLanguagesByGitAttributes,
GetLanguagesByModeline,
GetLanguagesByFilename,
GetLanguagesByShebang,
@@ -96,9 +96,9 @@ func GetLanguageByClassifier(content []byte, candidates []string) (language stri
return getLanguageByStrategy(GetLanguagesByClassifier, "", content, candidates)
}

// GetLanguageByGitAttributes returns the language assigned to a file for a given regular expresion in .gitattributes.
// This strategy needs to be initialized calling LoadGitAttributes
func GetLanguageByGitAttributes(filename string) (language string, safe bool) {
// GetLanguageByGitattributes returns the language assigned to a file for a given regular expresion in .gitattributes.
// This strategy needs to be initialized calling LoadGitattributes
func GetLanguageByGitattributes(filename string) (language string, safe bool) {
return getLanguageByStrategy(GetLanguagesByGitAttributes, filename, nil, nil)
}

@@ -450,16 +450,20 @@ func GetLanguageByAlias(alias string) (lang string, ok bool) {
return
}

// GetLanguagesByGitAttributes returns either a string slice with the lenguage if the filename match with a regExp in .gitattributes
//or return nil in case of none regexp matchs the filename . It complies with the signature to be a Strategy type.
// GetLanguagesByGitAttributes returns either a string slice with the language if the filename matches with a regExp in .gitattributes
//or returns a empty slice in case no regexp matches the filename. It complies with the signature to be a Strategy type.
func GetLanguagesByGitAttributes(filename string, content []byte, candidates []string) []string {
if loadedGitAttributes != nil {
for regExp, language := range loadedGitAttributes.regExpAttributes[language].attributes {
if regExp.MatchString(filename) {
return []string{language}
}
}
gitAttributes := NewGitAttributes()
reader, err := os.Open(".gitattributes")
if err != nil {
return nil
}

return nil
gitAttributes.LoadGitAttributes("", reader)
lang := gitAttributes.GetLanguage(filename)
if len(lang) == 0 {
return []string{}
}

return []string{lang}
}
29 changes: 0 additions & 29 deletions common_test.go
Original file line number Diff line number Diff line change
@@ -368,35 +368,6 @@ func (s *EnryTestSuite) TestGetLanguageByAlias() {
}
}

func (s *EnryTestSuite) TestGetLanguageByGitAttributes() {
tests := []struct {
name string
filename string
expectedLang string
}{
{name: "TestGetLanguageByGitAttributes_1", filename: "test.go", expectedLang: "Go"},
{name: "TestGetLanguageByGitAttributes_1", filename: "test.java", expectedLang: "notJava"},
{name: "TestGetLanguageByGitAttributes_1", filename: "test.md", expectedLang: ""},
}

tmpGitAttributes, err := ioutil.TempFile("/tmp", "gitattributes")
assert.NoError(s.T(), err)
data := []byte(".*\\.java linguist-language=notJava\n.*\\.go linguist-language=go\n")
tmpGitAttributes.Write(data)
tmpGitAttributes.Close()
LoadGitAttributes(tmpGitAttributes.Name())

for _, test := range tests {
langs, _ := GetLanguageByGitAttributes(test.filename)
assert.Equal(s.T(), test.expectedLang, langs, fmt.Sprintf("%v: lang = %v, expected: %v", test.name, langs, test.expectedLang))
}

err = os.RemoveAll(tmpGitAttributes.Name())
assert.NoError(s.T(), err)

loadedGitAttributes = nil
}

func (s *EnryTestSuite) TestLinguistCorpus() {
const filenamesDir = "filenames"
var samplesDir = filepath.Join(s.repoLinguist, "samples")
251 changes: 251 additions & 0 deletions gitattributes.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
package enry

import (
"errors"
"fmt"
"io"
"io/ioutil"
"regexp"
"strings"

"gopkg.in/src-d/enry.v1/data"
)

type attrType int

const (
vendor attrType = iota
documentation
generated
language
)

const _attrType_name = "vendordocumentationgeneratedlanguage"

var _attrType_index = [...]uint8{0, 6, 19, 28, 36}

func (i attrType) String() string {
if i < 0 || i >= attrType(len(_attrType_index)-1) {
return fmt.Sprintf("attrType(%d)", i)
}
return _attrType_name[_attrType_index[i]:_attrType_index[i+1]]
}

type boolAttribute struct {
kind attrType
matchers []string
attributes map[string]bool
}

type regExpAttribute struct {
kind attrType
matchers []string
attributes map[*regexp.Regexp]string
}

// GitAttributes is a struct that contains two maps, boolAttributes contains all the attributes that works like a boolean condition,
// regExpAttributes contains all the attributes that match a regExp to choose if an attribute is applied or not
type GitAttributes struct {
boolAttributes map[attrType]boolAttribute
regExpAttributes map[attrType]regExpAttribute
}

type overrideError struct {
attribute attrType
path string
}

func (e *overrideError) Error() string {
return fmt.Sprintf("gitattributes: You are overriding a %v attribute of one of your previous lines %s\n", e.attribute, e.path)
}

// IsVendor returns whether or not path is a vendor path.
func (gitAttrs *GitAttributes) IsVendor(path string) bool {
if val, ok := gitAttrs.boolAttributes[vendor].attributes[path]; ok {
return val
}

return data.VendorMatchers.Match(path)
}

// IsVendor returns whether or not path is a documentation path.
func (gitAttrs *GitAttributes) IsDocumentation(path string) bool {
if val, ok := gitAttrs.boolAttributes[documentation].attributes[path]; ok {
return val
}

return data.DocumentationMatchers.Match(path)
}

// IsVendor returns whether or not path is a generated path.
func (gitAttrs *GitAttributes) IsGenerated(path string) bool {
if val, ok := gitAttrs.boolAttributes[generated].attributes[path]; ok {
return val
}
return false
}

// GetLanguage get the language of a file matching the langauge attributes given.
// Returns either a empty string or the language if the regExp matches
func (gitAttrs *GitAttributes) GetLanguage(filename string) string {
for regExp, language := range gitAttrs.regExpAttributes[language].attributes {
if regExp.MatchString(filename) {
return language
}
}

return ""
}

// NewGitAttributes initialize a Gitattributes object
func NewGitAttributes() *GitAttributes {
gitAttrs := GitAttributes{
boolAttributes: map[attrType]boolAttribute{
vendor: boolAttribute{kind: vendor, matchers: []string{"linguist-vendored", "linguist-vendored=false"}, attributes: map[string]bool{}},
documentation: boolAttribute{kind: documentation, matchers: []string{"linguist-documentation", "linguist-documentation=false"}, attributes: map[string]bool{}},
generated: boolAttribute{kind: generated, matchers: []string{"linguist-generated", "linguist-generated=false"}, attributes: map[string]bool{}},
},
regExpAttributes: map[attrType]regExpAttribute{
language: regExpAttribute{kind: language, matchers: []string{"linguist-language="}, attributes: map[*regexp.Regexp]string{}},
},
}

return &gitAttrs
}

// LoadGitattributes reads and parses the file .gitattributes which overrides the standard strategies
// Returns slice of errors that have may ocurred in the load
func (gitAttrs *GitAttributes) LoadGitAttributes(path string, reader io.Reader) []error {
rawAttributes, errArr := loadRawGitAttributes(reader)
if len(rawAttributes) == 0 {
return []error{}
}

return append(gitAttrs.parseAttributes(path, rawAttributes), errArr...)
}

func (gitAttrs *GitAttributes) String() string {
out := ""
for key, val := range gitAttrs.boolAttributes {
out += fmt.Sprintf("Type: %s Attributes: %v\n", key, val.attributes)
}

for key, val := range gitAttrs.regExpAttributes {
out += fmt.Sprintf("Type: %s Attributes: %v\n", key, val.attributes)
}
return out
}

func loadRawGitAttributes(reader io.Reader) (map[string][]string, []error) {
rawAttributes := map[string][]string{}
var errArr []error
data, err := ioutil.ReadAll(reader)
if err != nil {
errArr = append(errArr, err)
return nil, errArr
}

if len(data) > 0 {
lines := strings.Split(string(data), "\n")
for _, line := range lines {
err := loadLine(line, rawAttributes)
if err != nil {
errArr = append(errArr, err)
}
}
}

return rawAttributes, errArr
}

func loadLine(line string, gitattributes map[string][]string) error {
tokens := strings.Fields(line)
if len(tokens) == 2 {
gitattributes[tokens[0]] = append(gitattributes[tokens[0]], tokens[1])
return nil
} else if len(tokens) != 0 {
err := errors.New("gitattributes: Each line only can have a pair of elements E.g. path/to/file attribute")
return err
}

return nil
}

func (gitAttrs *GitAttributes) parseAttributes(path string, attributes map[string][]string) []error {
errArray := []error{}
for key, values := range attributes {
for _, val := range values {
err := gitAttrs.parseAttribute(path+key, val)
if err != nil {
errArray = append(errArray, err)
}
}
}

return errArray
}

func (gitAttrs *GitAttributes) matches(kind attrType, str string) bool {
if bollAttrs, ok := gitAttrs.boolAttributes[kind]; ok && strings.Contains(str, bollAttrs.matchers[0]) {
return true
} else if regExpAttrs, ok := gitAttrs.regExpAttributes[kind]; ok && strings.Contains(str, regExpAttrs.matchers[0]) {
return true
}

return false
}

func (gitAttrs *GitAttributes) parseAttribute(key string, attribute string) error {
var err error
matched := false
for kind := vendor; kind <= language; kind++ {
if gitAttrs.matches(kind, attribute) {
matched = true
if kind < language {
err = gitAttrs.processBoolAttr(kind, key, attribute)
} else {
err = gitAttrs.processRegExpAttr(kind, key, attribute)
}
}
}

if matched == false {
err = errors.New(fmt.Sprintf("gitattributes: The matcher %s doesn't exists\n", attribute))
}

return err
}

func (gitAttrs *GitAttributes) processBoolAttr(kind attrType, key string, attribute string) error {
var err error
if _, ok := gitAttrs.boolAttributes[kind].attributes[key]; ok {
err = &overrideError{attribute: kind, path: key}
}
switch {
case attribute == gitAttrs.boolAttributes[kind].matchers[0]:
gitAttrs.boolAttributes[kind].attributes[key] = true
case attribute == gitAttrs.boolAttributes[kind].matchers[1]:
gitAttrs.boolAttributes[kind].attributes[key] = false
default:
err = errors.New(fmt.Sprintf("gitattributes: The matcher %s doesn't exists\n", attribute))
}

return err
}

func (gitAttrs *GitAttributes) processRegExpAttr(kind attrType, regExpString string, attribute string) error {
tokens := strings.SplitN(attribute, "=", 2)
regExp, err := regexp.Compile(regExpString)
if err != nil {
return err
}

lang, _ := GetLanguageByAlias(tokens[1])
if lang != OtherLanguage {
gitAttrs.regExpAttributes[kind].attributes[regExp] = lang
} else {
gitAttrs.regExpAttributes[kind].attributes[regExp] = tokens[1]
}

return nil
}
Loading