From 5de1f4fd4544e7189bd7b719ca551d57adfb81b4 Mon Sep 17 00:00:00 2001 From: David Paz Date: Thu, 22 Jun 2017 11:04:58 +0200 Subject: [PATCH 1/9] Added support for vendor and documentation attributes --- utils.go | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/utils.go b/utils.go index 1569eae2..dc22b4df 100644 --- a/utils.go +++ b/utils.go @@ -2,6 +2,7 @@ package enry import ( "bytes" + "io/ioutil" "path/filepath" "strings" @@ -23,6 +24,9 @@ var ( configurationLanguages = map[string]bool{ "XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true, } + + gitAttributes = map[string]bool{} + languageAttributes = map[string]string{} ) // IsAuxiliaryLanguage returns whether or not lang is an auxiliary language. @@ -45,12 +49,30 @@ func IsDotFile(path string) bool { // IsVendor returns whether or not path is a vendor path. func IsVendor(path string) bool { +<<<<<<< HEAD return data.VendorMatchers.Match(path) +======= + val, ok := gitAttributes[path] + if ok { + return val + } + + return vendorMatchers.Match(path) +>>>>>>> 132a9bb... Added support for vendor and documentation attributes } // IsDocumentation returns whether or not path is a documentation path. func IsDocumentation(path string) bool { +<<<<<<< HEAD return data.DocumentationMatchers.Match(path) +======= + val, ok := gitAttributes[path] + if ok { + return val + } + + return documentationMatchers.Match(path) +>>>>>>> 132a9bb... Added support for vendor and documentation attributes } const sniffLen = 8000 @@ -68,3 +90,43 @@ func IsBinary(data []byte) bool { return true } + +func loadGitattributes() (map[string]string, error) { + gitAttributes := map[string]string{} + data, err := ioutil.ReadFile(".gitattributes") + if err != nil { + return nil, err + } + + if data != nil { + tokens := strings.Fields(string(data)) + for i := 0; i < len(tokens); i = i + 2 { + gitAttributes[tokens[i]] = tokens[i+1] + } + } + + return gitAttributes, nil +} + +func parseAttributes(attributes map[string]string) { + for key, val := range attributes { + switch { + case val == "enry-vendored" || val == "enry-documentation": + gitAttributes[key] = true + case val == "enry-vendored=false" || val == "enry-documentation=false": + gitAttributes[key] = false + case strings.Contains(val, "enry-language="): + tokens := strings.Split(val, "=") + if len(tokens) == 2 { + languageAttributes[key] = tokens[1] + } + } + } +} + +func init() { + rawAttributes, err := loadGitattributes() + if err == nil && len(rawAttributes) > 0 { + parseAttributes(rawAttributes) + } +} From c3ca375dabb43a857ebfc0d09ad9bd5df9768310 Mon Sep 17 00:00:00 2001 From: David Paz Date: Thu, 22 Jun 2017 12:09:13 +0200 Subject: [PATCH 2/9] Added support for language attribute --- README.md | 33 +++++++++++++++- cli/enry/main.go | 2 + common.go | 24 ++++++++++++ utils.go | 97 ++++++++++++++++++++++++++++++++---------------- 4 files changed, 122 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 8e23bccc..06934fc3 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ Note that even if enry's CLI is compatible with linguist's, its main point is th Development ------------ -*enry* re-uses parts of original [linguist](https://github.com/github/linguist) especially data in `languages.yml` to generate internal data structures. In oreder to update to latest upstream run +*enry* re-uses parts of original [linguist](https://github.com/github/linguist) especially data in `languages.yml` to generate internal data structures. In order to update to latest upstream run make clean code-generate @@ -172,6 +172,37 @@ to get time averages for main detection function and strategies for the whole sa if you want see measures by sample file +.gitAttributes +-------------- + +Like in linguist you can override the strategies via `.gitattributes` file. +Add a `.gitattributes` file to the directory and use the enry matchers `enry-documentation`,`enry-language` or `enry-vendored` to do the override. + +#### Vendored code + + +Use the `enry-vendored` attribute to vendor or un-vendor paths. + +``` +$cat .gitattributes +this-is-a-vendor-directory/ enry-vendored +this-is-not/ enry-vendored=false +``` +#### Documentation + +Documentation works the same way as vendored code but using `enry-documentation` and `enry-documentation=false`. + +#### Language assignation + +If you want some files to be classified according to certain language use `enry-language=[language]`. + +``` +$cat .gitattributes +.*\.go enry-language=MyFavouriteLanguage +``` +Note, that the regular expression that match the file name should be one compatible with go, see: [Golang regexp](https://golang.org/pkg/regexp/). + + Why Enry? ------------ diff --git a/cli/enry/main.go b/cli/enry/main.go index b48b27c8..57f7912a 100644 --- a/cli/enry/main.go +++ b/cli/enry/main.go @@ -29,6 +29,8 @@ func main() { log.Fatal(err) } + enry.LoadGitAttributes() + errors := false out := make(map[string][]string, 0) err = filepath.Walk(root, func(path string, f os.FileInfo, err error) error { diff --git a/common.go b/common.go index b299d364..c0878ff2 100644 --- a/common.go +++ b/common.go @@ -18,6 +18,7 @@ type Strategy func(filename string, content []byte, candidates []string) (langua // DefaultStrategies is the strategies' sequence GetLanguage uses to detect languages. var DefaultStrategies = []Strategy{ + GetLanguagesByGitattributes, GetLanguagesByModeline, GetLanguagesByFilename, GetLanguagesByShebang, @@ -95,6 +96,12 @@ func GetLanguageByClassifier(content []byte, candidates []string) (language stri return getLanguageByStrategy(GetLanguagesByClassifier, "", content, candidates) } +// GetLanguageByGitattributes returns the language assigned for a given regular expresion in .gitattributes. +// This strategy needs to be initialized calling LoadGitattributes +func GetLanguageByGitattributes(filename string) (language string, safe bool) { + return getLanguageByStrategy(GetLanguagesByGitattributes, filename, nil, nil) +} + func getLanguageByStrategy(strategy Strategy, filename string, content []byte, candidates []string) (string, bool) { languages := strategy(filename, content, candidates) return getFirstLanguageAndSafe(languages) @@ -446,3 +453,20 @@ func GetLanguageByAlias(alias string) (lang string, ok bool) { return } + +// GetLanguagesByGitattributes returns the language assigned in .gitattributes if the regular expresion +// matchs with the filename +func GetLanguagesByGitattributes(filename string, content []byte, candidates []string) []string { + return languageByGitattribute(filename) +} + +func languageByGitattribute(filename string) []string { + languages := []string{} + for regExp, language := range languageGitattributes { + if regExp.MatchString(filename) { + return append(languages, language) + } + } + + return languages +} diff --git a/utils.go b/utils.go index dc22b4df..70686eb5 100644 --- a/utils.go +++ b/utils.go @@ -3,10 +3,11 @@ package enry import ( "bytes" "io/ioutil" + "log" + "os" "path/filepath" + "regexp" "strings" - - "gopkg.in/src-d/enry.v1/data" ) var ( @@ -25,8 +26,8 @@ var ( "XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true, } - gitAttributes = map[string]bool{} - languageAttributes = map[string]string{} + gitattributes = map[string]bool{} + languageGitattributes = map[*regexp.Regexp]string{} ) // IsAuxiliaryLanguage returns whether or not lang is an auxiliary language. @@ -49,30 +50,20 @@ func IsDotFile(path string) bool { // IsVendor returns whether or not path is a vendor path. func IsVendor(path string) bool { -<<<<<<< HEAD - return data.VendorMatchers.Match(path) -======= - val, ok := gitAttributes[path] - if ok { + if val, ok := gitattributes[path]; ok { return val } return vendorMatchers.Match(path) ->>>>>>> 132a9bb... Added support for vendor and documentation attributes } // IsDocumentation returns whether or not path is a documentation path. func IsDocumentation(path string) bool { -<<<<<<< HEAD - return data.DocumentationMatchers.Match(path) -======= - val, ok := gitAttributes[path] - if ok { + if val, ok := gitattributes[path]; ok { return val } return documentationMatchers.Match(path) ->>>>>>> 132a9bb... Added support for vendor and documentation attributes } const sniffLen = 8000 @@ -91,42 +82,82 @@ func IsBinary(data []byte) bool { return true } +// LoadGitattributes reads and parse the file .gitattributes wich overrides the standards strategies +func LoadGitattributes() { + rawAttributes, err := loadGitattributes() + if err == nil && len(rawAttributes) > 0 { + parseAttributes(rawAttributes) + } +} + func loadGitattributes() (map[string]string, error) { - gitAttributes := map[string]string{} + gitattributes := map[string]string{} data, err := ioutil.ReadFile(".gitattributes") if err != nil { + if err != os.ErrNotExist { + log.Println(".gitattributes: " + err.Error()) + } + return nil, err } - - if data != nil { - tokens := strings.Fields(string(data)) - for i := 0; i < len(tokens); i = i + 2 { - gitAttributes[tokens[i]] = tokens[i+1] + if len(data) > 0 { + lines := strings.Split(string(data), "\n") + for _, line := range lines { + loadLine(line, gitattributes) } } - return gitAttributes, nil + return gitattributes, nil +} + +func loadLine(line string, gitattributes map[string]string) { + tokens := strings.Fields(line) + if len(tokens) == 2 { + gitattributes[tokens[0]] = tokens[1] + } else { + log.Println(".gitattributes: Each line only can have a pair of elements\nE.g. /path/to/file attribute") + } } func parseAttributes(attributes map[string]string) { for key, val := range attributes { + if isInGitattributes(key) { + log.Printf("You are overriding one of your previous lines %s", key) + } switch { case val == "enry-vendored" || val == "enry-documentation": - gitAttributes[key] = true + gitattributes[key] = true case val == "enry-vendored=false" || val == "enry-documentation=false": - gitAttributes[key] = false + gitattributes[key] = false case strings.Contains(val, "enry-language="): - tokens := strings.Split(val, "=") - if len(tokens) == 2 { - languageAttributes[key] = tokens[1] - } + processLanguageAttr(key, val) + default: + log.Printf("The matcher %s doesn't exists\n", val) } } } -func init() { - rawAttributes, err := loadGitattributes() - if err == nil && len(rawAttributes) > 0 { - parseAttributes(rawAttributes) +func isInGitattributes(key string) bool { + if _, ok := gitattributes[key]; ok { + return ok + } + + regExp, err := regexp.Compile(key) + if err == nil { + if _, ok := languageGitattributes[regExp]; ok { + return ok + } + } + + return false +} + +func processLanguageAttr(regExpString string, attribute string) { + tokens := strings.Split(attribute, "=") + if len(tokens) == 2 { + regExp, err := regexp.Compile(regExpString) + if err == nil { + languageGitattributes[regExp] = tokens[1] + } } } From 60df02267f796d31f1b366dd8e6ca1e934f20990 Mon Sep 17 00:00:00 2001 From: David Paz Date: Mon, 26 Jun 2017 11:52:58 +0200 Subject: [PATCH 3/9] Refactored to be more easily tested --- utils.go | 65 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/utils.go b/utils.go index 70686eb5..ff62a410 100644 --- a/utils.go +++ b/utils.go @@ -2,6 +2,8 @@ package enry import ( "bytes" + "errors" + "fmt" "io/ioutil" "log" "os" @@ -84,15 +86,15 @@ func IsBinary(data []byte) bool { // LoadGitattributes reads and parse the file .gitattributes wich overrides the standards strategies func LoadGitattributes() { - rawAttributes, err := loadGitattributes() + rawAttributes, err := loadGitattributes(".gitattributes") if err == nil && len(rawAttributes) > 0 { parseAttributes(rawAttributes) } } -func loadGitattributes() (map[string]string, error) { +func loadGitattributes(name string) (map[string]string, error) { gitattributes := map[string]string{} - data, err := ioutil.ReadFile(".gitattributes") + data, err := ioutil.ReadFile(name) if err != nil { if err != os.ErrNotExist { log.Println(".gitattributes: " + err.Error()) @@ -100,6 +102,7 @@ func loadGitattributes() (map[string]string, error) { return nil, err } + if len(data) > 0 { lines := strings.Split(string(data), "\n") for _, line := range lines { @@ -110,54 +113,64 @@ func loadGitattributes() (map[string]string, error) { return gitattributes, nil } -func loadLine(line string, gitattributes map[string]string) { +func loadLine(line string, gitattributes map[string]string) error { tokens := strings.Fields(line) if len(tokens) == 2 { + var err error + if isInside(tokens[0], gitattributes) { + err = errors.New(fmt.Sprintf("You are overriding one of your previous lines %s\n", tokens[0])) + log.Printf(err.Error()) + } + gitattributes[tokens[0]] = tokens[1] + return err } else { - log.Println(".gitattributes: Each line only can have a pair of elements\nE.g. /path/to/file attribute") + err := errors.New(".gitattributes: Each line only can have a pair of elements E.g. path/to/file attribute") + log.Println(err.Error()) + + return err } } -func parseAttributes(attributes map[string]string) { +func parseAttributes(attributes map[string]string) []error { + var errArray []error for key, val := range attributes { - if isInGitattributes(key) { - log.Printf("You are overriding one of your previous lines %s", key) - } switch { case val == "enry-vendored" || val == "enry-documentation": gitattributes[key] = true case val == "enry-vendored=false" || val == "enry-documentation=false": gitattributes[key] = false case strings.Contains(val, "enry-language="): - processLanguageAttr(key, val) + err := processLanguageAttr(key, val) + if err != nil { + errArray = append(errArray, err) + } default: - log.Printf("The matcher %s doesn't exists\n", val) + err := errors.New(fmt.Sprintf("The matcher %s doesn't exists\n", val)) + errArray = append(errArray, err) + log.Printf(err.Error()) } } + + return errArray } -func isInGitattributes(key string) bool { +func isInside(key string, gitattributes map[string]string) bool { if _, ok := gitattributes[key]; ok { return ok } - regExp, err := regexp.Compile(key) - if err == nil { - if _, ok := languageGitattributes[regExp]; ok { - return ok - } - } - return false } -func processLanguageAttr(regExpString string, attribute string) { - tokens := strings.Split(attribute, "=") - if len(tokens) == 2 { - regExp, err := regexp.Compile(regExpString) - if err == nil { - languageGitattributes[regExp] = tokens[1] - } +func processLanguageAttr(regExpString string, attribute string) error { + tokens := strings.SplitN(attribute, "=", 2) + regExp, err := regexp.Compile(regExpString) + if err != nil { + log.Printf(err.Error()) + return err } + + languageGitattributes[regExp] = tokens[1] + return nil } From 04528e0b407a74caa9f6243ae6b43b1f2d600dbd Mon Sep 17 00:00:00 2001 From: David Paz Date: Mon, 26 Jun 2017 12:42:18 +0200 Subject: [PATCH 4/9] Now looks if the language is known by enry --- Makefile | 2 +- cli/enry/main.go | 2 +- common.go | 10 +++------- utils.go | 15 ++++++++++----- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Makefile b/Makefile index 238674f7..91fd5b83 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ test-coverage: $(LINGUIST_PATH) tail -n +2 $(COVERAGE_PROFILE) >> $(COVERAGE_REPORT); \ rm $(COVERAGE_PROFILE); \ fi; \ - done; + done; code-generate: $(LINGUIST_PATH) mkdir -p data diff --git a/cli/enry/main.go b/cli/enry/main.go index 57f7912a..316976f7 100644 --- a/cli/enry/main.go +++ b/cli/enry/main.go @@ -29,7 +29,7 @@ func main() { log.Fatal(err) } - enry.LoadGitAttributes() + enry.LoadGitattributes() errors := false out := make(map[string][]string, 0) diff --git a/common.go b/common.go index c0878ff2..616bb63f 100644 --- a/common.go +++ b/common.go @@ -96,7 +96,7 @@ func GetLanguageByClassifier(content []byte, candidates []string) (language stri return getLanguageByStrategy(GetLanguagesByClassifier, "", content, candidates) } -// GetLanguageByGitattributes returns the language assigned for a given regular expresion in .gitattributes. +// GetLanguageByGitattributes returns the language assigned to a file for a given regular expresion in .gitattributes. // This strategy needs to be initialized calling LoadGitattributes func GetLanguageByGitattributes(filename string) (language string, safe bool) { return getLanguageByStrategy(GetLanguagesByGitattributes, filename, nil, nil) @@ -454,13 +454,9 @@ func GetLanguageByAlias(alias string) (lang string, ok bool) { return } -// GetLanguagesByGitattributes returns the language assigned in .gitattributes if the regular expresion -// matchs with the filename +// GetLanguagesByGitattributes returns a length 1 slice with the language assigned in .gitattributes if the regular expresion +// matchs with the filename. It is comply with the signature to be a Strategy type. func GetLanguagesByGitattributes(filename string, content []byte, candidates []string) []string { - return languageByGitattribute(filename) -} - -func languageByGitattribute(filename string) []string { languages := []string{} for regExp, language := range languageGitattributes { if regExp.MatchString(filename) { diff --git a/utils.go b/utils.go index ff62a410..bfb9c3bd 100644 --- a/utils.go +++ b/utils.go @@ -84,20 +84,20 @@ func IsBinary(data []byte) bool { return true } -// LoadGitattributes reads and parse the file .gitattributes wich overrides the standards strategies +// LoadGitattributes reads and parses the file .gitattributes which overrides the standard strategies func LoadGitattributes() { - rawAttributes, err := loadGitattributes(".gitattributes") + rawAttributes, err := loadRawGitattributes(".gitattributes") if err == nil && len(rawAttributes) > 0 { parseAttributes(rawAttributes) } } -func loadGitattributes(name string) (map[string]string, error) { +func loadRawGitattributes(name string) (map[string]string, error) { gitattributes := map[string]string{} data, err := ioutil.ReadFile(name) if err != nil { if err != os.ErrNotExist { - log.Println(".gitattributes: " + err.Error()) + log.Println(name + ": " + err.Error()) } return nil, err @@ -170,7 +170,12 @@ func processLanguageAttr(regExpString string, attribute string) error { log.Printf(err.Error()) return err } + lang, _ := GetLanguageByAlias(tokens[1]) + if lang != OtherLanguage { + languageGitattributes[regExp] = lang + } else { + languageGitattributes[regExp] = tokens[1] + } - languageGitattributes[regExp] = tokens[1] return nil } From 325a91e2c8ad6205ddfc6492684dee09efbd347d Mon Sep 17 00:00:00 2001 From: David Paz Date: Tue, 27 Jun 2017 11:59:37 +0200 Subject: [PATCH 5/9] Changed Matchers from enry- to linguist- --- README.md | 14 +++++++------- utils.go | 10 +++++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 06934fc3..52505256 100644 --- a/README.md +++ b/README.md @@ -176,29 +176,29 @@ if you want see measures by sample file -------------- Like in linguist you can override the strategies via `.gitattributes` file. -Add a `.gitattributes` file to the directory and use the enry matchers `enry-documentation`,`enry-language` or `enry-vendored` to do the override. +Add a `.gitattributes` file to the directory and use the same matchers that you would uses in linguist `linguist-documentation`,`linguist-language` or `linguist-vendored` to do the override. #### Vendored code -Use the `enry-vendored` attribute to vendor or un-vendor paths. +Use the `linguist-vendored` attribute to vendor or un-vendor paths. ``` $cat .gitattributes -this-is-a-vendor-directory/ enry-vendored -this-is-not/ enry-vendored=false +this-is-a-vendor-directory/ linguist-vendored +this-is-not/ linguist-vendored=false ``` #### Documentation -Documentation works the same way as vendored code but using `enry-documentation` and `enry-documentation=false`. +Documentation works the same way as vendored code but using `linguist-documentation` and `linguist-documentation=false`. #### Language assignation -If you want some files to be classified according to certain language use `enry-language=[language]`. +If you want some files to be classified according to certain language use `linguist-language=[language]`. ``` $cat .gitattributes -.*\.go enry-language=MyFavouriteLanguage +.*\.go linguist-language=MyFavouriteLanguage ``` Note, that the regular expression that match the file name should be one compatible with go, see: [Golang regexp](https://golang.org/pkg/regexp/). diff --git a/utils.go b/utils.go index bfb9c3bd..050f7106 100644 --- a/utils.go +++ b/utils.go @@ -118,7 +118,7 @@ func loadLine(line string, gitattributes map[string]string) error { if len(tokens) == 2 { var err error if isInside(tokens[0], gitattributes) { - err = errors.New(fmt.Sprintf("You are overriding one of your previous lines %s\n", tokens[0])) + err = errors.New(fmt.Sprintf(".gitattributes: You are overriding one of your previous lines %s\n", tokens[0])) log.Printf(err.Error()) } @@ -136,17 +136,17 @@ func parseAttributes(attributes map[string]string) []error { var errArray []error for key, val := range attributes { switch { - case val == "enry-vendored" || val == "enry-documentation": + case val == "linguist-vendored" || val == "linguist-documentation": gitattributes[key] = true - case val == "enry-vendored=false" || val == "enry-documentation=false": + case val == "linguist-vendored=false" || val == "linguist-documentation=false": gitattributes[key] = false - case strings.Contains(val, "enry-language="): + case strings.Contains(val, "linguist-language="): err := processLanguageAttr(key, val) if err != nil { errArray = append(errArray, err) } default: - err := errors.New(fmt.Sprintf("The matcher %s doesn't exists\n", val)) + err := errors.New(fmt.Sprintf("gitattributes: The matcher %s doesn't exists\n", val)) errArray = append(errArray, err) log.Printf(err.Error()) } From 33bb15797e09f18b6d5daeac62e2d301fccc7f78 Mon Sep 17 00:00:00 2001 From: David Paz Date: Tue, 27 Jun 2017 13:19:33 +0200 Subject: [PATCH 6/9] Now a path support multiple types of attributes --- utils.go | 110 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 36 deletions(-) diff --git a/utils.go b/utils.go index 050f7106..387585bf 100644 --- a/utils.go +++ b/utils.go @@ -10,6 +10,8 @@ import ( "path/filepath" "regexp" "strings" + + "gopkg.in/src-d/enry.v1/data" ) var ( @@ -28,10 +30,20 @@ var ( "XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true, } - gitattributes = map[string]bool{} - languageGitattributes = map[*regexp.Regexp]string{} + vendorGitattributes = map[string]bool{} + documentationGitattributes = map[string]bool{} + languageGitattributes = map[*regexp.Regexp]string{} ) +type OverrideError struct { + attribute string + path string +} + +func (e *OverrideError) Error() string { + return fmt.Sprintf(".gitattributes: You are overriding a %s attribute of one of your previous lines %s\n", e.attribute, e.path) +} + // IsAuxiliaryLanguage returns whether or not lang is an auxiliary language. func IsAuxiliaryLanguage(lang string) bool { _, ok := auxiliaryLanguages[lang] @@ -52,20 +64,20 @@ func IsDotFile(path string) bool { // IsVendor returns whether or not path is a vendor path. func IsVendor(path string) bool { - if val, ok := gitattributes[path]; ok { + if val, ok := vendorGitattributes[path]; ok { return val } - return vendorMatchers.Match(path) + return data.VendorMatchers.Match(path) } // IsDocumentation returns whether or not path is a documentation path. func IsDocumentation(path string) bool { - if val, ok := gitattributes[path]; ok { + if val, ok := documentationGitattributes[path]; ok { return val } - return documentationMatchers.Match(path) + return data.DocumentationMatchers.Match(path) } const sniffLen = 8000 @@ -92,8 +104,8 @@ func LoadGitattributes() { } } -func loadRawGitattributes(name string) (map[string]string, error) { - gitattributes := map[string]string{} +func loadRawGitattributes(name string) (map[string][]string, error) { + gitattributes := map[string][]string{} data, err := ioutil.ReadFile(name) if err != nil { if err != os.ErrNotExist { @@ -113,54 +125,80 @@ func loadRawGitattributes(name string) (map[string]string, error) { return gitattributes, nil } -func loadLine(line string, gitattributes map[string]string) error { +func loadLine(line string, gitattributes map[string][]string) error { tokens := strings.Fields(line) if len(tokens) == 2 { - var err error - if isInside(tokens[0], gitattributes) { - err = errors.New(fmt.Sprintf(".gitattributes: You are overriding one of your previous lines %s\n", tokens[0])) - log.Printf(err.Error()) - } - gitattributes[tokens[0]] = tokens[1] - return err - } else { + gitattributes[tokens[0]] = append(gitattributes[tokens[0]], tokens[1]) + return nil + } else if len(tokens) != 0 { err := errors.New(".gitattributes: Each line only can have a pair of elements E.g. path/to/file attribute") log.Println(err.Error()) - return err } + return nil } -func parseAttributes(attributes map[string]string) []error { - var errArray []error - for key, val := range attributes { - switch { - case val == "linguist-vendored" || val == "linguist-documentation": - gitattributes[key] = true - case val == "linguist-vendored=false" || val == "linguist-documentation=false": - gitattributes[key] = false - case strings.Contains(val, "linguist-language="): - err := processLanguageAttr(key, val) +func parseAttributes(attributes map[string][]string) []error { + errArray := []error{} + for key, values := range attributes { + for _, val := range values { + err := parseAttribute(key, val) if err != nil { errArray = append(errArray, err) } - default: - err := errors.New(fmt.Sprintf("gitattributes: The matcher %s doesn't exists\n", val)) - errArray = append(errArray, err) - log.Printf(err.Error()) } } return errArray } -func isInside(key string, gitattributes map[string]string) bool { - if _, ok := gitattributes[key]; ok { - return ok +func parseAttribute(key string, attribute string) error { + var err error + switch { + case strings.Contains(attribute, "linguist-vendored"): + err = processVendorAttr(key, attribute) + case strings.Contains(attribute, "linguist-documentation"): + err = processDocumentationAttr(key, attribute) + case strings.Contains(attribute, "linguist-language="): + err = processLanguageAttr(key, attribute) + default: + err = errors.New(fmt.Sprintf("gitattributes: The matcher %s doesn't exists\n", attribute)) + log.Printf(err.Error()) + } + return err +} + +func processVendorAttr(key string, attribute string) error { + var err error + if _, ok := vendorGitattributes[key]; ok { + err = &OverrideError{attribute: "vendor", path: key} + } + + switch { + case attribute == "linguist-vendored": + vendorGitattributes[key] = true + case attribute == "linguist-vendored=false": + vendorGitattributes[key] = false + } + + return err +} + +func processDocumentationAttr(key string, attribute string) error { + var err error + if _, ok := documentationGitattributes[key]; ok { + err = &OverrideError{attribute: "documentation", path: key} + } + + switch { + case attribute == "linguist-documentation": + documentationGitattributes[key] = true + case attribute == "linguist-documentation=false": + documentationGitattributes[key] = false } - return false + return err } func processLanguageAttr(regExpString string, attribute string) error { From 29dc81f1b729cff06af26672b7eb6e06cc38c5d1 Mon Sep 17 00:00:00 2001 From: David Paz Date: Mon, 3 Jul 2017 11:46:29 +0200 Subject: [PATCH 7/9] Updated to the new interface --- README.md | 1 - cli/enry/main.go | 37 ++++--- common.go | 36 ++++--- gitattributes.go | 234 ++++++++++++++++++++++++++++++++++++++++++ gitattributes_test.go | 196 +++++++++++++++++++++++++++++++++++ utils.go | 149 --------------------------- 6 files changed, 471 insertions(+), 182 deletions(-) create mode 100644 gitattributes.go create mode 100644 gitattributes_test.go diff --git a/README.md b/README.md index 52505256..29b96f2c 100644 --- a/README.md +++ b/README.md @@ -180,7 +180,6 @@ Add a `.gitattributes` file to the directory and use the same matchers that you #### Vendored code - Use the `linguist-vendored` attribute to vendor or un-vendor paths. ``` diff --git a/cli/enry/main.go b/cli/enry/main.go index 316976f7..e8d6a2ac 100644 --- a/cli/enry/main.go +++ b/cli/enry/main.go @@ -29,7 +29,11 @@ func main() { log.Fatal(err) } - enry.LoadGitattributes() + gitAttributes := enry.NewGitAttributes() + reader, err := os.Open(".gitattributes") + if err == nil { + gitAttributes.LoadGitAttributes("", reader) + } errors := false out := make(map[string][]string, 0) @@ -55,8 +59,9 @@ func main() { relativePath = relativePath + "/" } - if enry.IsVendor(relativePath) || enry.IsDotFile(relativePath) || - enry.IsDocumentation(relativePath) || enry.IsConfiguration(relativePath) { + if gitAttributes.IsVendor(relativePath) || enry.IsDotFile(relativePath) || + gitAttributes.IsDocumentation(relativePath) || enry.IsConfiguration(relativePath) || + gitAttributes.IsGenerated(path) { if f.IsDir() { return filepath.SkipDir } @@ -68,20 +73,18 @@ func main() { return nil } - language, ok := enry.GetLanguageByExtension(path) - if !ok { - if language, ok = enry.GetLanguageByFilename(path); !ok { - content, err := ioutil.ReadFile(path) - if err != nil { - errors = true - log.Println(err) - return nil - } - - language = enry.GetLanguage(filepath.Base(path), content) - if language == enry.OtherLanguage { - return nil - } + content, err := ioutil.ReadFile(path) + if err != nil { + errors = true + log.Println(err) + return nil + } + + language := gitAttributes.GetLanguage(filepath.Base(path)) + if len(language) == 0 { + language = enry.GetLanguage(filepath.Base(path), content) + if language == enry.OtherLanguage { + return nil } } diff --git a/common.go b/common.go index 616bb63f..a8079769 100644 --- a/common.go +++ b/common.go @@ -3,6 +3,7 @@ package enry import ( "bufio" "bytes" + "os" "path/filepath" "regexp" "strings" @@ -18,7 +19,6 @@ type Strategy func(filename string, content []byte, candidates []string) (langua // DefaultStrategies is the strategies' sequence GetLanguage uses to detect languages. var DefaultStrategies = []Strategy{ - GetLanguagesByGitattributes, GetLanguagesByModeline, GetLanguagesByFilename, GetLanguagesByShebang, @@ -99,7 +99,7 @@ func GetLanguageByClassifier(content []byte, candidates []string) (language stri // GetLanguageByGitattributes returns the language assigned to a file for a given regular expresion in .gitattributes. // This strategy needs to be initialized calling LoadGitattributes func GetLanguageByGitattributes(filename string) (language string, safe bool) { - return getLanguageByStrategy(GetLanguagesByGitattributes, filename, nil, nil) + return getLanguageByStrategy(GetLanguagesByGitAttributes, filename, nil, nil) } func getLanguageByStrategy(strategy Strategy, filename string, content []byte, candidates []string) (string, bool) { @@ -414,6 +414,25 @@ func GetLanguagesBySpecificClassifier(content []byte, candidates []string, class return classifier.Classify(content, mapCandidates) } +// GetLanguagesByGitAttributes returns either a string slice with the language +// if the filename matches with a regExp in .gitattributes or returns a empty slice +// in case no regExp matches the filename. It complies with the signature to be a Strategy type. +func GetLanguagesByGitAttributes(filename string, content []byte, candidates []string) []string { + gitAttributes := NewGitAttributes() + reader, err := os.Open(".gitattributes") + if err != nil { + return nil + } + + gitAttributes.LoadGitAttributes("", reader) + lang := gitAttributes.GetLanguage(filename) + if lang != OtherLanguage { + return []string{} + } + + return []string{lang} +} + // GetLanguageExtensions returns the different extensions being used by the language. func GetLanguageExtensions(language string) []string { return data.ExtensionsByLanguage[language] @@ -453,16 +472,3 @@ func GetLanguageByAlias(alias string) (lang string, ok bool) { return } - -// GetLanguagesByGitattributes returns a length 1 slice with the language assigned in .gitattributes if the regular expresion -// matchs with the filename. It is comply with the signature to be a Strategy type. -func GetLanguagesByGitattributes(filename string, content []byte, candidates []string) []string { - languages := []string{} - for regExp, language := range languageGitattributes { - if regExp.MatchString(filename) { - return append(languages, language) - } - } - - return languages -} diff --git a/gitattributes.go b/gitattributes.go new file mode 100644 index 00000000..2b9867c1 --- /dev/null +++ b/gitattributes.go @@ -0,0 +1,234 @@ +package enry + +import ( + "errors" + "fmt" + "io" + "io/ioutil" + "regexp" + "strings" +) + +type attrType int + +const ( + vendor attrType = iota + documentation + generated + language +) + +const attrTypeName = "vendordocumentationgeneratedlanguage" + +var attrTypeIndex = [...]uint8{0, 6, 19, 28, 36} + +func (i attrType) String() string { + if i < 0 || i >= attrType(len(attrTypeIndex)-1) { + return fmt.Sprintf("attrType(%d)", i) + } + + return attrTypeName[attrTypeIndex[i]:attrTypeIndex[i+1]] +} + +type boolAttribute struct { + kind attrType + matchers []string + attributes map[string]bool +} + +type regExpAttribute struct { + matchers []string + attributes map[*regexp.Regexp]string +} + +// GitAttributes is a struct that contains two maps: +// boolAttributes contains all the attributes that works like a boolean condition, +// regExpAttributes contains all the attributes that match a regExp to choose if an attribute is applied or not +type GitAttributes struct { + boolAttributes map[attrType]boolAttribute + regExpAttributes map[attrType]regExpAttribute +} + +type overrideError struct { + attribute attrType + path string +} + +func (e *overrideError) Error() string { + return fmt.Sprintf("gitattributes: You are overriding a %v attribute of one of your previous lines %s\n", e.attribute, e.path) +} + +// Returns whether or not path is a vendor path. +func (gitAttrs *GitAttributes) IsVendor(path string) bool { + if val, ok := gitAttrs.boolAttributes[vendor].attributes[path]; ok { + return val + } + + return IsVendor(path) +} + +// Returns whether or not path is a documentation path. +func (gitAttrs *GitAttributes) IsDocumentation(path string) bool { + if val, ok := gitAttrs.boolAttributes[documentation].attributes[path]; ok { + return val + } + + return IsDocumentation(path) +} + +// Returns whether or not path is a generated path. +func (gitAttrs *GitAttributes) IsGenerated(path string) bool { + if val, ok := gitAttrs.boolAttributes[generated].attributes[path]; ok { + return val + } + + return false +} + +// GetLanguage get the language of a file matching the language attributes given. +// Returns either OthetLanguage or the language if the regExp matches +func (gitAttrs *GitAttributes) GetLanguage(filename string) string { + for regExp, language := range gitAttrs.regExpAttributes[language].attributes { + if regExp.MatchString(filename) { + return language + } + } + + return OtherLanguage +} + +// NewGitAttributes initialize a Gitattributes object +func NewGitAttributes() *GitAttributes { + gitAttrs := GitAttributes{ + boolAttributes: map[attrType]boolAttribute{ + vendor: boolAttribute{matchers: []string{"linguist-vendored", "linguist-vendored=false"}, attributes: map[string]bool{}}, + documentation: boolAttribute{matchers: []string{"linguist-documentation", "linguist-documentation=false"}, attributes: map[string]bool{}}, + generated: boolAttribute{matchers: []string{"linguist-generated", "linguist-generated=false"}, attributes: map[string]bool{}}, + }, + regExpAttributes: map[attrType]regExpAttribute{ + language: regExpAttribute{matchers: []string{"linguist-language="}, attributes: map[*regexp.Regexp]string{}}, + }, + } + + return &gitAttrs +} + +// LoadGitattributes reads and parses the file .gitattributes which overrides the standard strategies. +// Returns slice of errors that have may ocurred in the load. +func (gitAttrs *GitAttributes) LoadGitAttributes(path string, reader io.Reader) []error { + rawAttributes, errArr := loadRawGitAttributes(reader) + if len(rawAttributes) == 0 { + return []error{} + } + + return append(gitAttrs.parseAttributes(path, rawAttributes), errArr...) +} + +func loadRawGitAttributes(reader io.Reader) (map[string][]string, []error) { + rawAttributes := map[string][]string{} + var errArr []error + data, err := ioutil.ReadAll(reader) + if err != nil { + errArr = append(errArr, err) + return nil, errArr + } + + if len(data) > 0 { + lines := strings.Split(string(data), "\n") + for _, line := range lines { + err := loadLine(line, rawAttributes) + if err != nil { + errArr = append(errArr, err) + } + } + } + + return rawAttributes, errArr +} + +func loadLine(line string, gitattributes map[string][]string) error { + tokens := strings.Fields(line) + if len(tokens) == 2 { + gitattributes[tokens[0]] = append(gitattributes[tokens[0]], tokens[1]) + return nil + } else if len(tokens) != 0 { + err := errors.New("gitattributes: Each line only can have a pair of elements E.g. path/to/file attribute") + return err + } + + return nil +} + +func (gitAttrs *GitAttributes) parseAttributes(path string, attributes map[string][]string) []error { + errArray := []error{} + for key, values := range attributes { + for _, val := range values { + err := gitAttrs.parseAttribute(path+key, val) + if err != nil { + errArray = append(errArray, err) + } + } + } + + return errArray +} + +func (gitAttrs *GitAttributes) matches(kind attrType, str string) bool { + if bollAttrs, ok := gitAttrs.boolAttributes[kind]; ok && strings.Contains(str, bollAttrs.matchers[0]) { + return true + } else if regExpAttrs, ok := gitAttrs.regExpAttributes[kind]; ok && strings.Contains(str, regExpAttrs.matchers[0]) { + return true + } + + return false +} + +func (gitAttrs *GitAttributes) parseAttribute(key string, attribute string) error { + var err error + for kind := vendor; kind <= language; kind++ { + if gitAttrs.matches(kind, attribute) { + if kind < language { + err = gitAttrs.processBoolAttr(kind, key, attribute) + } else { + err = gitAttrs.processRegExpAttr(kind, key, attribute) + } + } + } + + return err +} + +func (gitAttrs *GitAttributes) processBoolAttr(kind attrType, key string, attribute string) error { + var err error + if _, ok := gitAttrs.boolAttributes[kind].attributes[key]; ok { + err = &overrideError{attribute: kind, path: key} + } + + switch { + case attribute == gitAttrs.boolAttributes[kind].matchers[0]: + gitAttrs.boolAttributes[kind].attributes[key] = true + case attribute == gitAttrs.boolAttributes[kind].matchers[1]: + gitAttrs.boolAttributes[kind].attributes[key] = false + default: + err = errors.New(fmt.Sprintf("gitattributes: The matcher %s doesn't exists\n", attribute)) + } + + return err +} + +func (gitAttrs *GitAttributes) processRegExpAttr(kind attrType, regExpString string, attribute string) error { + tokens := strings.SplitN(attribute, "=", 2) + regExp, err := regexp.Compile(regExpString) + if err != nil { + return err + } + + lang, _ := GetLanguageByAlias(tokens[1]) + if lang != OtherLanguage { + gitAttrs.regExpAttributes[kind].attributes[regExp] = lang + } else { + gitAttrs.regExpAttributes[kind].attributes[regExp] = tokens[1] + } + + return nil +} diff --git a/gitattributes_test.go b/gitattributes_test.go new file mode 100644 index 00000000..e55469d9 --- /dev/null +++ b/gitattributes_test.go @@ -0,0 +1,196 @@ +package enry + +import ( + "fmt" + "io/ioutil" + "os" + + "github.com/stretchr/testify/assert" +) + +func (s *EnryTestSuite) TestLoadGitAttributes() { + gitAttrs := NewGitAttributes() + tmpGitAttributes, err := ioutil.TempFile("/tmp", "gitattributes") + assert.NoError(s.T(), err) + data := []byte("path linguist-vendored\n path/foo linguist-vendored=false\n path/vendor linguist-vendored=false \n path/foo linguist-documentation\n path/generated linguist-generated\n" + + "path/bar linguist-vendored=fail\n path/foo linguist-documentation=false\n path/bar not-a-matcher\n path/a linguist-documentation linguist-vendored") + tmpGitAttributes.Write(data) + tmpGitAttributes.Close() + reader, err := os.Open(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) + errArr := gitAttrs.LoadGitAttributes("test/", reader) + if len(errArr) != 3 { + fmt.Println(errArr) + s.Fail("The error length it's not the expected") + } + + tests := []struct { + name string + expected int + }{ + {name: "TestLoadGitAttributes_1", expected: 3}, + {name: "TestLoadGitAttributes_2", expected: 1}, + {name: "TestLoadGitAttributes_3", expected: 1}, + {name: "TestLoadGitAttributes_4", expected: 0}, + } + + for i, test := range tests { + if attrType(i) < language { + assert.Equal(s.T(), len(gitAttrs.boolAttributes[attrType(i)].attributes), test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, len(gitAttrs.boolAttributes[attrType(i)].attributes), test.expected)) + } else { + assert.Equal(s.T(), len(gitAttrs.regExpAttributes[attrType(i)].attributes), test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, len(gitAttrs.regExpAttributes[attrType(i)].attributes), test.expected)) + } + } + + err = os.RemoveAll(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) +} + +func (s *EnryTestSuite) TestLoadGitAttributesEmpty() { + gitAttrs := NewGitAttributes() + tmpGitAttributes, err := ioutil.TempFile("/tmp", "gitattributes") + assert.NoError(s.T(), err) + reader, err := os.Open(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) + errArr := gitAttrs.LoadGitAttributes("test/", reader) + if len(errArr) != 0 { + fmt.Println(errArr) + s.Fail("The error length it's not the expected") + } +} + +func (s *EnryTestSuite) TestIsVendorGitAttributes() { + gitAttrs := NewGitAttributes() + tmpGitAttributes, err := ioutil.TempFile("/tmp", "gitattributes") + assert.NoError(s.T(), err) + data := []byte("path linguist-vendored\n path/foo linguist-vendored=false\n path/vendor linguist-vendored=false") + tmpGitAttributes.Write(data) + tmpGitAttributes.Close() + reader, err := os.Open(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) + errArr := gitAttrs.LoadGitAttributes("", reader) + if len(errArr) != 0 { + fmt.Println(errArr) + s.Fail("The error length it's not the expected") + } + + tests := []struct { + name string + path string + expected bool + }{ + {name: "TestIsVendorGitAttributes_1", path: "path", expected: true}, + {name: "TestIsVendorGitAttributes_2", path: "path/foo", expected: false}, + {name: "TestIsVendorGitAttributes_3", path: "path/vendor", expected: false}, + {name: "TestIsVendorGitAttributes_4", path: "vendor/", expected: true}, + {name: "TestIsVendorGitAttributes_5", path: "dir/", expected: false}, + } + for _, test := range tests { + is := gitAttrs.IsVendor(test.path) + assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) + } + + err = os.RemoveAll(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) +} + +func (s *EnryTestSuite) TestIsDocumentationGitAttributes() { + gitAttrs := NewGitAttributes() + tmpGitAttributes, err := ioutil.TempFile("/tmp", "gitattributes") + assert.NoError(s.T(), err) + data := []byte("path linguist-documentation\n path/foo linguist-documentation=false\n path/documentation linguist-vendored=false") + tmpGitAttributes.Write(data) + tmpGitAttributes.Close() + reader, err := os.Open(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) + errArr := gitAttrs.LoadGitAttributes("", reader) + if len(errArr) != 0 { + fmt.Println(errArr) + s.Fail("The error length it's not the expected") + } + + tests := []struct { + name string + path string + expected bool + }{ + {name: "TestIsDocumentationGitAttributes_1", path: "path", expected: true}, + {name: "TestIsDocumentationGitAttributes_2", path: "path/foo", expected: false}, + {name: "TestIsDocumentationGitAttributes_3", path: "path/documentation", expected: false}, + {name: "TestIsDocumentationGitAttributes_4", path: "README", expected: true}, + {name: "TestIsDocumentationGitAttributes_5", path: "dir/", expected: false}, + } + for _, test := range tests { + is := gitAttrs.IsDocumentation(test.path) + assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) + } + + err = os.RemoveAll(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) +} + +func (s *EnryTestSuite) TestIsGeneratedGitAttributes() { + gitAttrs := NewGitAttributes() + tmpGitAttributes, err := ioutil.TempFile("/tmp", "gitattributes") + assert.NoError(s.T(), err) + data := []byte("path linguist-generated\n path/foo linguist-generated=false\n path/generated linguist-generated=false") + tmpGitAttributes.Write(data) + tmpGitAttributes.Close() + reader, err := os.Open(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) + errArr := gitAttrs.LoadGitAttributes("", reader) + if len(errArr) != 0 { + fmt.Println(errArr) + s.Fail("The error length it's not the expected") + } + + tests := []struct { + name string + path string + expected bool + }{ + {name: "TestIsGeneratedGitAttributes_1", path: "path", expected: true}, + {name: "TestIsGeneratedGitAttributes_2", path: "path/foo", expected: false}, + {name: "TestIsGeneratedGitAttributes_3", path: "path/generated", expected: false}, + {name: "TestIsGeneratedGitAttributes_4", path: "path2", expected: false}, + } + for _, test := range tests { + is := gitAttrs.IsGenerated(test.path) + assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) + } + + err = os.RemoveAll(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) +} + +func (s *EnryTestSuite) TestGetLanguageGitAttributes() { + gitAttrs := NewGitAttributes() + tmpGitAttributes, err := ioutil.TempFile("/tmp", "gitattributes") + assert.NoError(s.T(), err) + data := []byte(".*\\.go linguist-language=GO\n path/not-java/.*\\.java linguist-language=notJava\n") + tmpGitAttributes.Write(data) + tmpGitAttributes.Close() + reader, err := os.Open(tmpGitAttributes.Name()) + assert.NoError(s.T(), err) + errArr := gitAttrs.LoadGitAttributes("", reader) + if len(errArr) != 0 { + fmt.Println(errArr) + s.Fail("The error length it's not the expected") + } + + tests := []struct { + name string + path string + expected string + }{ + {name: "TestGetLanguageGitAttributes_1", path: "path/files/a.go", expected: "Go"}, + {name: "TestGetLanguageGitAttributes_2", path: "path/files/subdir/b.go", expected: "Go"}, + {name: "TestGetLanguageGitAttributes_3", path: "path/not-java/c.java", expected: "notJava"}, + {name: "TestGetLanguageGitAttributes_4", path: "path/d.py", expected: ""}, + } + + for _, test := range tests { + is := gitAttrs.GetLanguage(test.path) + assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) + } +} diff --git a/utils.go b/utils.go index 387585bf..1569eae2 100644 --- a/utils.go +++ b/utils.go @@ -2,13 +2,7 @@ package enry import ( "bytes" - "errors" - "fmt" - "io/ioutil" - "log" - "os" "path/filepath" - "regexp" "strings" "gopkg.in/src-d/enry.v1/data" @@ -29,21 +23,8 @@ var ( configurationLanguages = map[string]bool{ "XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true, } - - vendorGitattributes = map[string]bool{} - documentationGitattributes = map[string]bool{} - languageGitattributes = map[*regexp.Regexp]string{} ) -type OverrideError struct { - attribute string - path string -} - -func (e *OverrideError) Error() string { - return fmt.Sprintf(".gitattributes: You are overriding a %s attribute of one of your previous lines %s\n", e.attribute, e.path) -} - // IsAuxiliaryLanguage returns whether or not lang is an auxiliary language. func IsAuxiliaryLanguage(lang string) bool { _, ok := auxiliaryLanguages[lang] @@ -64,19 +45,11 @@ func IsDotFile(path string) bool { // IsVendor returns whether or not path is a vendor path. func IsVendor(path string) bool { - if val, ok := vendorGitattributes[path]; ok { - return val - } - return data.VendorMatchers.Match(path) } // IsDocumentation returns whether or not path is a documentation path. func IsDocumentation(path string) bool { - if val, ok := documentationGitattributes[path]; ok { - return val - } - return data.DocumentationMatchers.Match(path) } @@ -95,125 +68,3 @@ func IsBinary(data []byte) bool { return true } - -// LoadGitattributes reads and parses the file .gitattributes which overrides the standard strategies -func LoadGitattributes() { - rawAttributes, err := loadRawGitattributes(".gitattributes") - if err == nil && len(rawAttributes) > 0 { - parseAttributes(rawAttributes) - } -} - -func loadRawGitattributes(name string) (map[string][]string, error) { - gitattributes := map[string][]string{} - data, err := ioutil.ReadFile(name) - if err != nil { - if err != os.ErrNotExist { - log.Println(name + ": " + err.Error()) - } - - return nil, err - } - - if len(data) > 0 { - lines := strings.Split(string(data), "\n") - for _, line := range lines { - loadLine(line, gitattributes) - } - } - - return gitattributes, nil -} - -func loadLine(line string, gitattributes map[string][]string) error { - tokens := strings.Fields(line) - if len(tokens) == 2 { - - gitattributes[tokens[0]] = append(gitattributes[tokens[0]], tokens[1]) - return nil - } else if len(tokens) != 0 { - err := errors.New(".gitattributes: Each line only can have a pair of elements E.g. path/to/file attribute") - log.Println(err.Error()) - return err - } - return nil -} - -func parseAttributes(attributes map[string][]string) []error { - errArray := []error{} - for key, values := range attributes { - for _, val := range values { - err := parseAttribute(key, val) - if err != nil { - errArray = append(errArray, err) - } - } - } - - return errArray -} - -func parseAttribute(key string, attribute string) error { - var err error - switch { - case strings.Contains(attribute, "linguist-vendored"): - err = processVendorAttr(key, attribute) - case strings.Contains(attribute, "linguist-documentation"): - err = processDocumentationAttr(key, attribute) - case strings.Contains(attribute, "linguist-language="): - err = processLanguageAttr(key, attribute) - default: - err = errors.New(fmt.Sprintf("gitattributes: The matcher %s doesn't exists\n", attribute)) - log.Printf(err.Error()) - } - return err -} - -func processVendorAttr(key string, attribute string) error { - var err error - if _, ok := vendorGitattributes[key]; ok { - err = &OverrideError{attribute: "vendor", path: key} - } - - switch { - case attribute == "linguist-vendored": - vendorGitattributes[key] = true - case attribute == "linguist-vendored=false": - vendorGitattributes[key] = false - } - - return err -} - -func processDocumentationAttr(key string, attribute string) error { - var err error - if _, ok := documentationGitattributes[key]; ok { - err = &OverrideError{attribute: "documentation", path: key} - } - - switch { - case attribute == "linguist-documentation": - documentationGitattributes[key] = true - case attribute == "linguist-documentation=false": - documentationGitattributes[key] = false - } - - return err -} - -func processLanguageAttr(regExpString string, attribute string) error { - tokens := strings.SplitN(attribute, "=", 2) - regExp, err := regexp.Compile(regExpString) - if err != nil { - log.Printf(err.Error()) - return err - } - lang, _ := GetLanguageByAlias(tokens[1]) - if lang != OtherLanguage { - languageGitattributes[regExp] = lang - } else { - languageGitattributes[regExp] = tokens[1] - } - - return nil -} From 8d2a353012e8b78a81aa35e35a40184eb4d2874a Mon Sep 17 00:00:00 2001 From: David Paz Date: Tue, 18 Jul 2017 11:15:25 +0200 Subject: [PATCH 8/9] Changes in the README --- README.md | 11 +++++++---- cli/enry/main.go | 2 +- utils_test.go | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 29b96f2c..f731597f 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,7 @@ Using [linguist/samples](https://github.com/github/linguist/tree/master/samples) * all files for SQL language fall to the classifier because we don't parse this [disambiguator expresion](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.rb#L433) for `*.sql` files right. This expression doesn't comply with the pattern for the rest of [heuristics.rb](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.rb) file. + Benchmarks ------------ @@ -172,7 +173,8 @@ to get time averages for main detection function and strategies for the whole sa if you want see measures by sample file -.gitAttributes + +.gitattributes -------------- Like in linguist you can override the strategies via `.gitattributes` file. @@ -183,7 +185,7 @@ Add a `.gitattributes` file to the directory and use the same matchers that you Use the `linguist-vendored` attribute to vendor or un-vendor paths. ``` -$cat .gitattributes +$ cat .gitattributes this-is-a-vendor-directory/ linguist-vendored this-is-not/ linguist-vendored=false ``` @@ -196,10 +198,11 @@ Documentation works the same way as vendored code but using `linguist-documentat If you want some files to be classified according to certain language use `linguist-language=[language]`. ``` -$cat .gitattributes +$ cat .gitattributes .*\.go linguist-language=MyFavouriteLanguage ``` -Note, that the regular expression that match the file name should be one compatible with go, see: [Golang regexp](https://golang.org/pkg/regexp/). + +Note that the regular expression that matches the file name should be compatible with go, see: [Golang regexp](https://golang.org/pkg/regexp/). Why Enry? diff --git a/cli/enry/main.go b/cli/enry/main.go index e8d6a2ac..c2402db2 100644 --- a/cli/enry/main.go +++ b/cli/enry/main.go @@ -81,7 +81,7 @@ func main() { } language := gitAttributes.GetLanguage(filepath.Base(path)) - if len(language) == 0 { + if language == enry.OtherLanguage { language = enry.GetLanguage(filepath.Base(path), content) if language == enry.OtherLanguage { return nil diff --git a/utils_test.go b/utils_test.go index b1d1e1f5..db9d95d6 100644 --- a/utils_test.go +++ b/utils_test.go @@ -79,3 +79,35 @@ func (s *EnryTestSuite) TestIsBinary() { assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) } } + +func (s *EnryTestSuite) TestIdDot() { + tests := []struct { + name string + path string + expected bool + }{ + {name: "TestIsDot_1", path: "foo/var/.dotfile", expected: true}, + {name: "TestIsDot_2", path: "foo/var/file", expected: false}, + {name: "TestIsDot_3", path: "foo/var/file.dot", expected: false}, + } + for _, test := range tests { + is := IsDotFile(test.path) + assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) + } +} + +func (s *EnryTestSuite) TestIsAuxiliaryLanguage() { + tests := []struct { + name string + lang string + expected bool + }{ + {name: "TestIsAuxilaryLang_1", lang: "YAML", expected: true}, + {name: "TestIsAuxilaryLang_2", lang: "Go", expected: false}, + {name: "TestIsAuxilaryLang_3", lang: "JSON", expected: true}, + } + for _, test := range tests { + is := IsAuxiliaryLanguage(test.lang) + assert.Equal(s.T(), is, test.expected, fmt.Sprintf("%v: is = %v, expected: %v", test.name, is, test.expected)) + } +} From 11ef5edae52c1e922d162abadaaab474e7679039 Mon Sep 17 00:00:00 2001 From: David Paz Date: Wed, 19 Jul 2017 10:35:38 +0200 Subject: [PATCH 9/9] Pending changes --- README.md | 2 +- common.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f731597f..d982e059 100644 --- a/README.md +++ b/README.md @@ -178,7 +178,7 @@ if you want see measures by sample file -------------- Like in linguist you can override the strategies via `.gitattributes` file. -Add a `.gitattributes` file to the directory and use the same matchers that you would uses in linguist `linguist-documentation`,`linguist-language` or `linguist-vendored` to do the override. +Add a `.gitattributes` file to the directory and use the same matchers that you would use in linguist `linguist-documentation`,`linguist-language` or `linguist-vendored` to do the override. #### Vendored code diff --git a/common.go b/common.go index a8079769..83ca5f9f 100644 --- a/common.go +++ b/common.go @@ -415,7 +415,7 @@ func GetLanguagesBySpecificClassifier(content []byte, candidates []string, class } // GetLanguagesByGitAttributes returns either a string slice with the language -// if the filename matches with a regExp in .gitattributes or returns a empty slice +// if the filename matches with a regExp in .gitattributes or returns an empty slice // in case no regExp matches the filename. It complies with the signature to be a Strategy type. func GetLanguagesByGitAttributes(filename string, content []byte, candidates []string) []string { gitAttributes := NewGitAttributes()