From e095afed92ea86da4d0b7053468237f538218ba3 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Mon, 6 Jun 2022 16:33:18 +0530 Subject: [PATCH 01/24] feat(*): add SDK core * support staticcheck * save report to JSON * generate TOML files for each issue * use a markdown parser for parsing issue descriptions --- .github/workflows/lint.yml | 18 ++++ go.mod | 8 ++ go.sum | 4 + sdk/sdk.go | 90 ++++++++++++++++++ sdk/types/types.go | 48 ++++++++++ sdk/utils/processors/staticcheck.go | 71 ++++++++++++++ sdk/utils/utils.go | 138 ++++++++++++++++++++++++++++ 7 files changed, 377 insertions(+) create mode 100644 .github/workflows/lint.yml create mode 100644 go.mod create mode 100644 go.sum create mode 100644 sdk/sdk.go create mode 100644 sdk/types/types.go create mode 100644 sdk/utils/processors/staticcheck.go create mode 100644 sdk/utils/utils.go diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..3de8d66 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,18 @@ +name: Lint +on: [push, pull_request] +jobs: + golangci: + name: lint + runs-on: ubuntu-latest + strategy: + matrix: + go-version: [1.17.x] + steps: + - uses: actions/setup-go@v3 + with: + go-version: ${{ matrix.go-version }} + - uses: actions/checkout@v3 + - name: golangci-lint + uses: golangci/golangci-lint-action@v3 + with: + version: v1.43 diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..263e579 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module github.com/deepsourcelabs/deepsource-go + +go 1.17 + +require ( + github.com/BurntSushi/toml v1.1.0 + github.com/yuin/goldmark v1.4.12 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..26dae45 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/BurntSushi/toml v1.1.0 h1:ksErzDEI1khOiGPgpwuI7x2ebx/uXQNw7xJpn9Eq1+I= +github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/yuin/goldmark v1.4.12 h1:6hffw6vALvEDqJ19dOJvJKOoAOKe4NDaTqvd2sktGN0= +github.com/yuin/goldmark v1.4.12/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= diff --git a/sdk/sdk.go b/sdk/sdk.go new file mode 100644 index 0000000..242ddc0 --- /dev/null +++ b/sdk/sdk.go @@ -0,0 +1,90 @@ +package sdk + +import ( + "bytes" + "log" + "os/exec" + + "github.com/deepsourcelabs/deepsource-go/sdk/types" + "github.com/deepsourcelabs/deepsource-go/sdk/utils" + "github.com/deepsourcelabs/deepsource-go/sdk/utils/processors" +) + +// The main analyzer interface. Analyzers must implement Run and Processor. +type Analyzer interface { + Run() error + Processor(result interface{}) (types.AnalysisReport, error) +} + +// CLIAnalyzer is used for creating an analyzer. +type CLIAnalyzer struct { + Name string + Command string + Args []string + ExportOpts ExportOpts +} + +type ExportOpts struct { + Path string + Type string +} + +// Run executes the analyzer and streams the output to the processor. +func (a *CLIAnalyzer) Run() error { + cmd := exec.Command(a.Command, a.Args...) + + // store the process's standard output in a buffer + var out bytes.Buffer + cmd.Stdout = &out + + // TODO: handle exit status 1 + _ = cmd.Run() + + // fetch report from processor + report, err := a.Processor(out.String()) + if err != nil { + return err + } + + // save report to file + err = utils.SaveReport(report, a.ExportOpts.Path, a.ExportOpts.Type) + if err != nil { + return err + } + + return nil +} + +// Processor takes the analyzer output and generates a report. +func (a *CLIAnalyzer) Processor(result interface{}) (types.AnalysisReport, error) { + var report types.AnalysisReport + var err error + + // use custom processors for each major linter/analyzer + switch a.Name { + case "staticcheck": + report, err = processors.StaticCheck(result) + default: + // if a match is not found, the user needs to implement a processor + log.Printf("custom processor needs to be implemented for %s.\n", a.Name) + } + + return report, err +} + +// GenerateTOML helps in generating TOML files for each issue from a JSON file. +func (a *CLIAnalyzer) GenerateTOML(filename string, rootDir string) error { + // fetch parsed issues + issues, err := utils.ParseIssues(filename) + if err != nil { + return err + } + + // generate TOML files + err = utils.BuildTOML(issues, rootDir) + if err != nil { + return err + } + + return nil +} diff --git a/sdk/types/types.go b/sdk/types/types.go new file mode 100644 index 0000000..50370da --- /dev/null +++ b/sdk/types/types.go @@ -0,0 +1,48 @@ +package types + +type Coordinate struct { + Line int `json:"line"` + Column int `json:"column"` +} + +type Position struct { + Begin Coordinate `json:"begin"` + End Coordinate `json:"end"` +} + +type Location struct { + Path string `json:"path"` + Position Position `json:"position"` +} + +type SourceCode struct { + Rendered []byte `json:"rendered"` +} + +type ProcessedData struct { + SourceCode SourceCode `json:"source_code,omitempty"` +} + +type Issue struct { + IssueCode string `json:"issue_code"` + IssueText string `json:"issue_text"` + Location Location `json:"location"` + ProcessedData ProcessedData `json:"processed_data,omitempty"` +} + +// Location of an issue +type IssueLocation struct { + Path string `json:"path"` + Position Position `json:"position"` +} + +type AnalysisError struct { + HMessage string `json:"hmessage"` + Level int `json:"level"` +} + +type AnalysisReport struct { + Issues []Issue `json:"issues"` + Errors []AnalysisError `json:"errors"` + ExtraData interface{} `json:"extra_data"` +} diff --git a/sdk/utils/processors/staticcheck.go b/sdk/utils/processors/staticcheck.go new file mode 100644 index 0000000..19db6d9 --- /dev/null +++ b/sdk/utils/processors/staticcheck.go @@ -0,0 +1,71 @@ +package processors + +import ( + "encoding/json" + "strings" + + "github.com/deepsourcelabs/deepsource-go/sdk/types" +) + +// sccIssue represents a staticcheck issue. +type sccIssue struct { + Code string `json:"code"` + Severity string `json:"severity"` + Location sccIssueLocation `json:"location"` + Message string `json:"message"` +} + +type sccIssueLocation struct { + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +// StaticCheck processor returns a DeepSource compatible analysis report from staticcheck's results. +func StaticCheck(result interface{}) (types.AnalysisReport, error) { + var issue sccIssue + var issues []types.Issue + + // trim newline from stdout + jsonStr := strings.TrimSuffix(result.(string), "\n") + + // parse output and generate issues + lines := strings.Split(jsonStr, "\n") + for _, l := range lines { + err := json.Unmarshal([]byte(l), &issue) + if err != nil { + return types.AnalysisReport{}, err + } + + // convert to a DeepSource issue + dsIssue := convertIssue(issue) + + issues = append(issues, dsIssue) + } + + // populate report + report := types.AnalysisReport{ + Issues: issues, + } + + // return report + return report, nil +} + +// convertIssue is a helper utility for converting a staticcheck issue to a DeepSource issue. +func convertIssue(issue sccIssue) types.Issue { + convertedIssue := types.Issue{ + IssueCode: issue.Code, + IssueText: issue.Message, + Location: types.Location{ + Path: issue.Location.File, + Position: types.Position{ + Begin: types.Coordinate{ + Line: issue.Location.Line, + Column: issue.Location.Column, + }, + }, + }, + } + return convertedIssue +} diff --git a/sdk/utils/utils.go b/sdk/utils/utils.go new file mode 100644 index 0000000..00ab73e --- /dev/null +++ b/sdk/utils/utils.go @@ -0,0 +1,138 @@ +package utils + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "os" + "path" + + "github.com/BurntSushi/toml" + "github.com/deepsourcelabs/deepsource-go/sdk/types" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer/html" +) + +type IssueMeta struct { + Code string `json:"code"` + Text string `json:"text"` + ShortDescription string `json:"short_desc"` + Description string `json:"desc"` +} + +// SaveReport saves the analysis report to the local filesystem. +func SaveReport(report types.AnalysisReport, filename string, exportType string) error { + var err error + + switch exportType { + case "json": + err = exportJSON(report, filename) + default: + return errors.New("export type not supported. supported types include: json") + } + + return err +} + +// exportJSON is a helper utility for saving the analysis report in a JSON format. +func exportJSON(report types.AnalysisReport, filename string) error { + data, err := json.MarshalIndent(report, "", " ") + if err != nil { + return err + } + + if err = ioutil.WriteFile(filename, data, 0644); err != nil { + return err + } + + return nil +} + +// ParseIssues reads a JSON file containing all issues, and returns all issues. +func ParseIssues(filename string) ([]IssueMeta, error) { + content, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + + var issues []IssueMeta + var parsedIssues []IssueMeta + + err = json.Unmarshal(content, &issues) + if err != nil { + return nil, err + } + + for _, issue := range issues { + // read description from a markdown file + desc, err := readMarkdown(issue.Description) + if err != nil { + return nil, err + } + + issue.Description = desc + parsedIssues = append(parsedIssues, issue) + } + + return parsedIssues, nil +} + +// readMarkdown is a helper utility used for parsing a markdown file. +func readMarkdown(filename string) (string, error) { + content, err := ioutil.ReadFile(filename) + if err != nil { + return "", err + } + + // TODO: sanitize HTML + md := goldmark.New( + goldmark.WithExtensions(extension.GFM), + goldmark.WithParserOptions( + parser.WithAutoHeadingID(), + ), + goldmark.WithRendererOptions( + html.WithHardWraps(), + html.WithXHTML(), + ), + ) + + var buf bytes.Buffer + if err := md.Convert(content, &buf); err != nil { + return "", err + } + + return buf.String(), nil +} + +// BuildTOML uses issues to generate TOML files to a directory. +func BuildTOML(issues []IssueMeta, rootDir string) error { + for _, issue := range issues { + // The unique identifier (filename) is based on the issue code. TOML files cannot be generated for issues having an invalid/empty code. + if issue.Code == "" { + return errors.New("invalid issue code. cannot generate toml") + } + + // generate file path based on root directory and filename + filename := fmt.Sprintf("%s.toml", issue.Code) + tomlPath := path.Join(rootDir, filename) + + f, err := os.Create(tomlPath) + if err != nil { + return err + } + + if err := toml.NewEncoder(f).Encode(issue); err != nil { + return err + } + + if err := f.Close(); err != nil { + return err + } + } + + return nil +} From d76804799bb6b2bc7c5dd088b01e496ab9e73396 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 7 Jun 2022 09:39:12 +0530 Subject: [PATCH 02/24] tests(*): integration tests using tree-sitter * integration tests for analyzers * parse triggers using tree-sitter * utilities for working with tree-sitter * staticcheck trigger for tree-sitter --- go.mod | 1 + go.sum | 12 +++ sdk/sdk_test.go | 53 +++++++++++ sdk/triggers/common.go | 119 ++++++++++++++++++++++++ sdk/triggers/staticcheck/staticcheck.go | 7 ++ 5 files changed, 192 insertions(+) create mode 100644 sdk/sdk_test.go create mode 100644 sdk/triggers/common.go create mode 100644 sdk/triggers/staticcheck/staticcheck.go diff --git a/go.mod b/go.mod index 263e579..451f5fe 100644 --- a/go.mod +++ b/go.mod @@ -4,5 +4,6 @@ go 1.17 require ( github.com/BurntSushi/toml v1.1.0 + github.com/smacker/go-tree-sitter v0.0.0-20220421092837-ec55f7cfeaf4 github.com/yuin/goldmark v1.4.12 ) diff --git a/go.sum b/go.sum index 26dae45..54771e5 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,16 @@ github.com/BurntSushi/toml v1.1.0 h1:ksErzDEI1khOiGPgpwuI7x2ebx/uXQNw7xJpn9Eq1+I= github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/smacker/go-tree-sitter v0.0.0-20220421092837-ec55f7cfeaf4 h1:UFOHRX5nrxNCVORhicjy31nzSVt9rEjf/YRcx2Dc3MM= +github.com/smacker/go-tree-sitter v0.0.0-20220421092837-ec55f7cfeaf4/go.mod h1:EiUuVMUfLQj8Sul+S8aKWJwQy7FRYnJCO2EWzf8F5hk= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/yuin/goldmark v1.4.12 h1:6hffw6vALvEDqJ19dOJvJKOoAOKe4NDaTqvd2sktGN0= github.com/yuin/goldmark v1.4.12/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/sdk/sdk_test.go b/sdk/sdk_test.go new file mode 100644 index 0000000..b88df26 --- /dev/null +++ b/sdk/sdk_test.go @@ -0,0 +1,53 @@ +package sdk + +import ( + "encoding/json" + "os" + "testing" + + "github.com/deepsourcelabs/deepsource-go/sdk/triggers" + "github.com/deepsourcelabs/deepsource-go/sdk/types" +) + +func TestStaticCheck(t *testing.T) { + t.Run("verify staticcheck", func(t *testing.T) { + a := CLIAnalyzer{ + Name: "staticcheck", + Command: "staticcheck", + Args: []string{"-f", "json", "./triggers/staticcheck/..."}, + ExportOpts: ExportOpts{ + Path: "triggers/staticcheck/issues.json", + Type: "json", + }, + } + + err := a.Run() + if err != nil { + t.Error(err) + } + + // read the generated report + reportContent, err := os.ReadFile("triggers/staticcheck/issues.json") + if err != nil { + t.Error(err) + } + + var report types.AnalysisReport + err = json.Unmarshal(reportContent, &report) + if err != nil { + t.Error(err) + } + + // do a verification check for the generated report + err = triggers.Verify(report, "triggers/staticcheck/staticcheck.go") + if err != nil { + t.Error(err) + } + + // cleanup after test + err = os.Remove("triggers/staticcheck/issues.json") + if err != nil { + t.Error(err) + } + }) +} diff --git a/sdk/triggers/common.go b/sdk/triggers/common.go new file mode 100644 index 0000000..a7e70cd --- /dev/null +++ b/sdk/triggers/common.go @@ -0,0 +1,119 @@ +package triggers + +import ( + "context" + "errors" + "os" + "path/filepath" + "regexp" + "strings" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/golang" + + "github.com/deepsourcelabs/deepsource-go/sdk/types" +) + +// ParsedIssue represents an issue parsed using tree-sitter. +type ParsedIssue struct { + IssueCode string + Line int +} + +// Verify compares the generated report and parsed issues using tree-sitter. +func Verify(report types.AnalysisReport, filename string) error { + parser := sitter.NewParser() + + // get language + lang, err := getLanguage(filename) + if err != nil { + return err + } + parser.SetLanguage(lang) + + // read report + content, err := os.ReadFile(filename) + if err != nil { + return err + } + + // generate tree + ctx := context.Background() + tree, err := parser.ParseCtx(ctx, nil, content) + if err != nil { + return err + } + + // create a query for fetching comments + queryStr := "(comment) @comment" + query, err := sitter.NewQuery([]byte(queryStr), lang) + if err != nil { + return err + } + + // execute query on root node + qc := sitter.NewQueryCursor() + n := tree.RootNode() + qc.Exec(query, n) + defer qc.Close() + + var parsedIssues []ParsedIssue + + // iterate over matches + for { + m, ok := qc.NextMatch() + if !ok { + break + } + + for _, c := range m.Captures { + node := c.Node + nodeContent := node.Content(content) + + // check if the comment contains raise annotation + if strings.Contains(nodeContent, "raise") { + // find match using expression + exp := regexp.MustCompile(`.+ raise: `) + submatches := exp.FindStringSubmatch(nodeContent) + + if len(submatches) != 0 { + substrings := exp.Split(nodeContent, -1) + if len(substrings) > 1 { + issueCodes := strings.Split(substrings[1], ",") + // add issue to parsedIssues + for _, issueCode := range issueCodes { + parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} + parsedIssues = append(parsedIssues, parsedIssue) + } + } + } + } + } + } + + // if number of issues don't match, exit early. + if len(parsedIssues) != len(report.Issues) { + return errors.New("mismatch between the number of reported issues and parsed issues") + } + + // compare the report's issues and parsed issues + for i, issue := range report.Issues { + if (parsedIssues[i].Line != issue.Location.Position.Begin.Line) && (parsedIssues[i].IssueCode != issue.IssueCode) { + return errors.New("mismatch between parsed issue and report issue") + } + } + + return nil +} + +// getLanguage is a helper for fetching tree-sitter language based on the file's extension. +func getLanguage(filename string) (*sitter.Language, error) { + extension := filepath.Ext(filename) + + switch extension { + case ".go": + return golang.GetLanguage(), nil + default: + return nil, errors.New("language not supported") + } +} diff --git a/sdk/triggers/staticcheck/staticcheck.go b/sdk/triggers/staticcheck/staticcheck.go new file mode 100644 index 0000000..d94ac7d --- /dev/null +++ b/sdk/triggers/staticcheck/staticcheck.go @@ -0,0 +1,7 @@ +package pkg + +import "fmt" + +func trigger() { // raise: U1000 + fmt.Sprint("trigger") // raise: SA4017, S1039 +} From 5ad1b10358d32af78068f1508c725cd69b2549a1 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 7 Jun 2022 09:59:29 +0530 Subject: [PATCH 03/24] ci: add golangci-lint configuration --- .golangci.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .golangci.yml diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..a51f2a8 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,3 @@ +run: + skip-dirs: + - sdk/triggers From ceef64243c05d6e3e19488fd1c7bb396c1dd4c8a Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 7 Jun 2022 10:55:43 +0530 Subject: [PATCH 04/24] fix(sdk/utils): sanitize markdown --- go.mod | 7 +++++++ go.sum | 13 +++++++++++++ sdk/utils/utils.go | 19 ++++++++----------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/go.mod b/go.mod index 451f5fe..d8fc1b5 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,13 @@ go 1.17 require ( github.com/BurntSushi/toml v1.1.0 + github.com/microcosm-cc/bluemonday v1.0.18 github.com/smacker/go-tree-sitter v0.0.0-20220421092837-ec55f7cfeaf4 github.com/yuin/goldmark v1.4.12 ) + +require ( + github.com/aymerick/douceur v0.2.0 // indirect + github.com/gorilla/css v1.0.0 // indirect + golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect +) diff --git a/go.sum b/go.sum index 54771e5..5b8fcaf 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,13 @@ github.com/BurntSushi/toml v1.1.0 h1:ksErzDEI1khOiGPgpwuI7x2ebx/uXQNw7xJpn9Eq1+I= github.com/BurntSushi/toml v1.1.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= +github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= +github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= +github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= +github.com/microcosm-cc/bluemonday v1.0.18 h1:6HcxvXDAi3ARt3slx6nTesbvorIc3QeTzBNRvWktHBo= +github.com/microcosm-cc/bluemonday v1.0.18/go.mod h1:Z0r70sCuXHig8YpBzCc5eGHAap2K7e/u082ZUpDRRqM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/smacker/go-tree-sitter v0.0.0-20220421092837-ec55f7cfeaf4 h1:UFOHRX5nrxNCVORhicjy31nzSVt9rEjf/YRcx2Dc3MM= @@ -11,6 +17,13 @@ github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJy github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/yuin/goldmark v1.4.12 h1:6hffw6vALvEDqJ19dOJvJKOoAOKe4NDaTqvd2sktGN0= github.com/yuin/goldmark v1.4.12/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e h1:XpT3nA5TvE525Ne3hInMh6+GETgn27Zfm9dxsThnX2Q= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/sdk/utils/utils.go b/sdk/utils/utils.go index 00ab73e..a212fc7 100644 --- a/sdk/utils/utils.go +++ b/sdk/utils/utils.go @@ -11,10 +11,9 @@ import ( "github.com/BurntSushi/toml" "github.com/deepsourcelabs/deepsource-go/sdk/types" + "github.com/microcosm-cc/bluemonday" "github.com/yuin/goldmark" "github.com/yuin/goldmark/extension" - "github.com/yuin/goldmark/parser" - "github.com/yuin/goldmark/renderer/html" ) type IssueMeta struct { @@ -88,16 +87,9 @@ func readMarkdown(filename string) (string, error) { return "", err } - // TODO: sanitize HTML + // use the Github-flavored Markdown extension md := goldmark.New( goldmark.WithExtensions(extension.GFM), - goldmark.WithParserOptions( - parser.WithAutoHeadingID(), - ), - goldmark.WithRendererOptions( - html.WithHardWraps(), - html.WithXHTML(), - ), ) var buf bytes.Buffer @@ -105,7 +97,12 @@ func readMarkdown(filename string) (string, error) { return "", err } - return buf.String(), nil + // sanitize markdown body + body := buf.String() + p := bluemonday.UGCPolicy() + sanitizedBody := p.Sanitize(body) + + return sanitizedBody, nil } // BuildTOML uses issues to generate TOML files to a directory. From bd4a0feb5bc410bcc9aab930348cba264752fd12 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Sat, 11 Jun 2022 18:29:57 +0530 Subject: [PATCH 05/24] refactor(*): refactor SDK and add tests * add unit tests * update SDK structure * clean analyzer fields * add unix processor * add utilities --- .golangci.yml | 2 +- analyzers/sdk.go | 101 +++++++ analyzers/sdk_test.go | 167 +++++++++++ analyzers/testdata/issues.toml | 17 ++ .../triggers/staticcheck/staticcheck.go | 0 {sdk => analyzers}/types/types.go | 0 analyzers/utils/processors/unix.go | 84 ++++++ analyzers/utils/utils.go | 282 ++++++++++++++++++ sdk/sdk.go | 90 ------ sdk/sdk_test.go | 53 ---- sdk/triggers/common.go | 119 -------- sdk/utils/processors/staticcheck.go | 71 ----- sdk/utils/utils.go | 135 --------- 13 files changed, 652 insertions(+), 469 deletions(-) create mode 100644 analyzers/sdk.go create mode 100644 analyzers/sdk_test.go create mode 100644 analyzers/testdata/issues.toml rename {sdk => analyzers/testdata}/triggers/staticcheck/staticcheck.go (100%) rename {sdk => analyzers}/types/types.go (100%) create mode 100644 analyzers/utils/processors/unix.go create mode 100644 analyzers/utils/utils.go delete mode 100644 sdk/sdk.go delete mode 100644 sdk/sdk_test.go delete mode 100644 sdk/triggers/common.go delete mode 100644 sdk/utils/processors/staticcheck.go delete mode 100644 sdk/utils/utils.go diff --git a/.golangci.yml b/.golangci.yml index a51f2a8..02b2438 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,3 +1,3 @@ run: skip-dirs: - - sdk/triggers + - analyzers/triggers diff --git a/analyzers/sdk.go b/analyzers/sdk.go new file mode 100644 index 0000000..39b7068 --- /dev/null +++ b/analyzers/sdk.go @@ -0,0 +1,101 @@ +package analyzers + +import ( + "bytes" + "os/exec" + + "github.com/deepsourcelabs/deepsource-go/analyzers/types" + "github.com/deepsourcelabs/deepsource-go/analyzers/utils" +) + +type Processor interface { + Process(bytes.Buffer) (types.AnalysisReport, error) +} + +// CLIAnalyzer is used for creating an analyzer. +type CLIAnalyzer struct { + Name string + Command string + Args []string + AllowedExitCodes []int + Processor Processor + stdout *bytes.Buffer + stderr *bytes.Buffer + exitCode int +} + +// Stdout returns the stdout buffer. +func (a *CLIAnalyzer) Stdout() bytes.Buffer { + return *a.stdout +} + +// Stderr returns the stderr buffer. +func (a *CLIAnalyzer) Stderr() bytes.Buffer { + return *a.stderr +} + +// Run executes the analyzer and streams the output to the processor. +func (a *CLIAnalyzer) Run() error { + outBuf, errBuf, exitCode, err := runCmd(a.Command, a.Args, a.AllowedExitCodes) + if err != nil { + return err + } + + a.stdout = &outBuf + a.stderr = &errBuf + a.exitCode = exitCode + + return nil +} + +// runCmd returns the stdout and stderr streams, along with an exit code and error after running the command. +func runCmd(command string, args []string, allowedExitCodes []int) (bytes.Buffer, bytes.Buffer, int, error) { + cmd := exec.Command(command, args...) + + // store stdout and stderr in buffers + var outBuf, errBuf bytes.Buffer + cmd.Stdout = &outBuf + cmd.Stderr = &errBuf + + err := cmd.Start() + if err != nil { + return bytes.Buffer{}, bytes.Buffer{}, -1, err + } + + // wait for the command to exit + err = cmd.Wait() + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + exitCode := exitErr.ExitCode() + // if exit code is allowed, return the buffers with no error + for _, v := range allowedExitCodes { + if v == exitCode { + return outBuf, errBuf, exitCode, nil + } + } + } else { + // in case of errors, exit code is -1 + return outBuf, errBuf, -1, err + } + } + + // default exit code is 0 + return outBuf, errBuf, 0, nil +} + +// GenerateTOML helps in generating TOML files for each issue from a JSON file. +func (a *CLIAnalyzer) GenerateTOML(filename string, rootDir string) error { + // fetch parsed issues + issues, err := utils.ParseIssues(filename) + if err != nil { + return err + } + + // generate TOML files + err = utils.BuildTOML(issues, rootDir) + if err != nil { + return err + } + + return nil +} diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go new file mode 100644 index 0000000..cb899c7 --- /dev/null +++ b/analyzers/sdk_test.go @@ -0,0 +1,167 @@ +package analyzers + +import ( + "bytes" + "encoding/json" + "errors" + "os" + "regexp" + "strconv" + "strings" + "testing" + + "github.com/deepsourcelabs/deepsource-go/analyzers/types" + "github.com/deepsourcelabs/deepsource-go/analyzers/utils" +) + +type StaticCheckProcessor struct{} + +// StaticCheck processor returns a DeepSource-compatible analysis report from staticcheck's results. +func (s *StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { + var issues []types.Issue + + // trim newline from buffer output + lines := strings.Split(buf.String(), "\n") + + for _, line := range lines { + // trim spaces + line = strings.TrimSpace(line) + if line == "" { + break + } + + // compile regular expression for parsing unix format + + // group descriptions: + // 0: complete string + // 1: path + // 2: line number + // 3: column number + // 4: message + exp, err := regexp.Compile("(.+):(.):(.): (.+)") + if err != nil { + return types.AnalysisReport{}, err + } + + // get groups + groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) + if len(groups) == 0 { + return types.AnalysisReport{}, errors.New("failed to parse output string") + } + + // convert line and column numbers to int + line, err := strconv.Atoi(groups[0][2]) + if err != nil { + return types.AnalysisReport{}, err + } + + col, err := strconv.Atoi(groups[0][3]) + if err != nil { + return types.AnalysisReport{}, err + } + + // compile regular expression for parsing staticcheck message + + // group descriptions: + // 0: complete string + // 1: partial message string + // 2: issue code + // 3: parentheses + messageExp, err := regexp.Compile("(.+)[(](.+)(.+)") + if err != nil { + return types.AnalysisReport{}, err + } + messageGroups := messageExp.FindAllStringSubmatch(groups[0][4], -1) + if len(messageGroups) == 0 { + return types.AnalysisReport{}, errors.New("failed to parse message") + } + + // populate issue + issue := types.Issue{ + IssueCode: messageGroups[0][2], + IssueText: groups[0][4], + Location: types.Location{ + Path: groups[0][1], + Position: types.Position{ + Begin: types.Coordinate{ + Line: line, + Column: col, + }, + }, + }, + } + + issues = append(issues, issue) + } + + // populate report + report := types.AnalysisReport{ + Issues: issues, + } + + // return report + return report, nil +} + +func TestStaticCheck(t *testing.T) { + t.Run("staticcheck analyzer", func(t *testing.T) { + a := CLIAnalyzer{ + Name: "staticcheck", + Command: "staticcheck", + Args: []string{"-f", "text", "./testdata/triggers/staticcheck/..."}, + Processor: &StaticCheckProcessor{}, + } + + err := a.Run() + if err != nil { + t.Fatal(err) + } + + processedReport, err := a.Processor.Process(a.Stdout()) + if err != nil { + t.Fatal(err) + } + + // save report + err = utils.SaveReport(processedReport, "testdata/triggers/staticcheck/issues.json", "json") + if err != nil { + t.Fatal(err) + } + + // read the generated report + reportContent, err := os.ReadFile("testdata/triggers/staticcheck/issues.json") + if err != nil { + t.Fatal(err) + } + + var report types.AnalysisReport + err = json.Unmarshal(reportContent, &report) + if err != nil { + t.Fatal(err) + } + + // do a verification check for the generated report + err = utils.Verify(report, "testdata/triggers/staticcheck/staticcheck.go") + if err != nil { + t.Fatal(err) + } + + // cleanup after test + err = os.Remove("testdata/triggers/staticcheck/issues.json") + if err != nil { + t.Fatal(err) + } + + // test TOML generation + err = a.GenerateTOML("testdata/issues.toml", "testdata/toml") + if err != nil { + t.Fatal(err) + } + + // cleanup TOMLs + err = os.RemoveAll("testdata/toml") + if err != nil { + t.Fatal(err) + } + }) +} diff --git a/analyzers/testdata/issues.toml b/analyzers/testdata/issues.toml new file mode 100644 index 0000000..0867b1c --- /dev/null +++ b/analyzers/testdata/issues.toml @@ -0,0 +1,17 @@ +[[issue]] + +Code = "SA4017" +Text = "Sprint is a pure function but its return value is ignored" +ShortDescription = "Sprint is a pure function but its return value is ignored" +Description = """ +## hello +""" + +[[issue]] + +Code = "S1039" +Text = "unnecessary use of fmt.Sprint" +ShortDescription = "unnecessary use of fmt.Sprint" +Description = """ +## Example +""" diff --git a/sdk/triggers/staticcheck/staticcheck.go b/analyzers/testdata/triggers/staticcheck/staticcheck.go similarity index 100% rename from sdk/triggers/staticcheck/staticcheck.go rename to analyzers/testdata/triggers/staticcheck/staticcheck.go diff --git a/sdk/types/types.go b/analyzers/types/types.go similarity index 100% rename from sdk/types/types.go rename to analyzers/types/types.go diff --git a/analyzers/utils/processors/unix.go b/analyzers/utils/processors/unix.go new file mode 100644 index 0000000..f88636f --- /dev/null +++ b/analyzers/utils/processors/unix.go @@ -0,0 +1,84 @@ +package processors + +import ( + "bytes" + "errors" + "regexp" + "strconv" + "strings" + + "github.com/deepsourcelabs/deepsource-go/analyzers/types" +) + +// UnixProcessor is a processor for unix-formatted strings. +type UnixProcessor struct{} + +func (u *UnixProcessor) Unix(buf bytes.Buffer) (types.AnalysisReport, error) { + var issues []types.Issue + + // trim newline from buffer output + lines := strings.Split(buf.String(), "\n") + + for _, line := range lines { + // trim spaces + line = strings.TrimSpace(line) + if line == "" { + break + } + + // compile regular expression for parsing unix format + + // group descriptions: + // 0: complete string + // 1: path + // 2: line number + // 3: column number + // 4: message + exp, err := regexp.Compile("(.+):(.):(.): (.+)") + if err != nil { + return types.AnalysisReport{}, err + } + + // get groups + groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) + if len(groups) == 0 { + return types.AnalysisReport{}, errors.New("failed to parse message") + } + + // convert line and column numbers to int + line, err := strconv.Atoi(groups[0][2]) + if err != nil { + return types.AnalysisReport{}, err + } + + col, err := strconv.Atoi(groups[0][3]) + if err != nil { + return types.AnalysisReport{}, err + } + + // populate issue + issue := types.Issue{ + IssueCode: "", + IssueText: groups[0][4], + Location: types.Location{ + Path: groups[0][1], + Position: types.Position{ + Begin: types.Coordinate{ + Line: line, + Column: col, + }, + }, + }, + } + + issues = append(issues, issue) + } + + // populate report + report := types.AnalysisReport{ + Issues: issues, + } + + // return report + return report, nil +} diff --git a/analyzers/utils/utils.go b/analyzers/utils/utils.go new file mode 100644 index 0000000..ec7aa54 --- /dev/null +++ b/analyzers/utils/utils.go @@ -0,0 +1,282 @@ +package utils + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "os" + "path" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/BurntSushi/toml" + "github.com/deepsourcelabs/deepsource-go/analyzers/types" + "github.com/microcosm-cc/bluemonday" + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/golang" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/extension" +) + +type IssueMeta struct { + Code string `json:"code"` + Text string `json:"text"` + ShortDescription string `json:"short_desc"` + Description string `json:"desc"` +} + +type IssueTOML struct { + Issue []map[string]interface{} +} + +// SaveReport saves the analysis report to the local filesystem. +func SaveReport(report types.AnalysisReport, filename string, exportType string) error { + var err error + + switch exportType { + case "json": + err = exportJSON(report, filename) + default: + return errors.New("export type not supported. supported types include: json") + } + + return err +} + +// exportJSON is a helper utility for saving the analysis report in a JSON format. +func exportJSON(report types.AnalysisReport, filename string) error { + data, err := json.MarshalIndent(report, "", " ") + if err != nil { + return err + } + + if err = ioutil.WriteFile(filename, data, 0644); err != nil { + return err + } + + return nil +} + +// ParseIssues reads a JSON file containing all issues, and returns all issues. +func ParseIssues(filename string) ([]IssueMeta, error) { + content, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + + var issues []IssueMeta + var parsedIssues []IssueMeta + + var issuesTOML IssueTOML + err = toml.Unmarshal(content, &issuesTOML) + if err != nil { + return nil, err + } + + for _, issueTOML := range issuesTOML.Issue { + is := IssueMeta{ + Code: issueTOML["Code"].(string), + Text: issueTOML["Text"].(string), + ShortDescription: issueTOML["ShortDescription"].(string), + Description: issueTOML["Description"].(string), + } + + issues = append(issues, is) + } + + for _, issue := range issues { + // parse markdown content + desc, err := readMarkdown(issue.Description) + if err != nil { + return nil, err + } + + issue.Description = desc + parsedIssues = append(parsedIssues, issue) + } + + return parsedIssues, nil +} + +// readMarkdown is a helper utility used for parsing markdown content. +func readMarkdown(content string) (string, error) { + // use the Github-flavored Markdown extension + md := goldmark.New( + goldmark.WithExtensions(extension.GFM), + ) + + var buf bytes.Buffer + if err := md.Convert([]byte(content), &buf); err != nil { + return "", err + } + + // sanitize markdown body + body := buf.String() + p := bluemonday.UGCPolicy() + sanitizedBody := p.Sanitize(body) + + return sanitizedBody, nil +} + +// BuildTOML uses issues to generate TOML files to a directory. +func BuildTOML(issues []IssueMeta, rootDir string) error { + for _, issue := range issues { + // The unique identifier (filename) is based on the issue code. TOML files cannot be generated for issues having an invalid/empty code. + if issue.Code == "" { + return errors.New("invalid issue code. cannot generate toml") + } + + // if rootDir doesn't exist, create one + if _, err := os.Stat(rootDir); err != nil { + os.Mkdir(rootDir, 0700) + } + + // generate file path based on root directory and filename + filename := fmt.Sprintf("%s.toml", issue.Code) + tomlPath := path.Join(rootDir, filename) + + f, err := os.Create(tomlPath) + if err != nil { + return err + } + + if err := toml.NewEncoder(f).Encode(issue); err != nil { + return err + } + + if err := f.Close(); err != nil { + return err + } + } + + return nil +} + +// ParsedIssue represents an issue parsed using tree-sitter. +type ParsedIssue struct { + IssueCode string + Line int +} + +// Verify compares the generated report and parsed issues using tree-sitter. +func Verify(report types.AnalysisReport, filename string) error { + parser := sitter.NewParser() + + // get language + lang, err := getLanguage(filename) + if err != nil { + return err + } + parser.SetLanguage(lang) + + // read report + content, err := os.ReadFile(filename) + if err != nil { + return err + } + + // generate tree + ctx := context.Background() + tree, err := parser.ParseCtx(ctx, nil, content) + if err != nil { + return err + } + + // create a query for fetching comments + queryStr := "(comment) @comment" + query, err := sitter.NewQuery([]byte(queryStr), lang) + if err != nil { + return err + } + + // execute query on root node + qc := sitter.NewQueryCursor() + n := tree.RootNode() + qc.Exec(query, n) + defer qc.Close() + + var parsedIssues []ParsedIssue + + // iterate over matches + for { + m, ok := qc.NextMatch() + if !ok { + break + } + + for _, c := range m.Captures { + // get node content + node := c.Node + nodeContent := node.Content(content) + + // check if the comment contains raise annotation + if strings.Contains(nodeContent, "raise") { + // find match using expression + exp := regexp.MustCompile(`.+ raise: `) + submatches := exp.FindStringSubmatch(nodeContent) + + if len(submatches) != 0 { + substrings := exp.Split(nodeContent, -1) + if len(substrings) > 1 { + issueCodes := strings.Split(substrings[1], ",") + // add issue to parsedIssues + for _, issueCode := range issueCodes { + parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} + parsedIssues = append(parsedIssues, parsedIssue) + } + } + } + } + } + } + + // if number of issues don't match, exit early. + if len(parsedIssues) != len(report.Issues) { + return errors.New("mismatch between the number of reported issues and parsed issues") + } + + // compare the report's issues and parsed issues + match := compareReport(parsedIssues, report) + if !match { + return errors.New("mismatch between parsed issue and report issue") + } + + return nil +} + +// getLanguage is a helper for fetching a tree-sitter language based on the file's extension. +func getLanguage(filename string) (*sitter.Language, error) { + extension := filepath.Ext(filename) + + switch extension { + case ".go": + return golang.GetLanguage(), nil + default: + return nil, errors.New("language not supported") + } +} + +// compareReport is a helper which checks if the parsed issues are identical to the issues present in the report. +func compareReport(parsedIssues []ParsedIssue, report types.AnalysisReport) bool { + // sort report and parsedIssues by line number + sort.Slice(parsedIssues, func(i, j int) bool { + return parsedIssues[i].Line < parsedIssues[j].Line + }) + + sort.Slice(report.Issues, func(i, j int) bool { + return report.Issues[i].Location.Position.Begin.Line < report.Issues[j].Location.Position.Begin.Line + }) + + for i, issue := range report.Issues { + if (parsedIssues[i].Line != issue.Location.Position.Begin.Line) && (parsedIssues[i].IssueCode != issue.IssueCode) { + return false + } + } + + return true +} diff --git a/sdk/sdk.go b/sdk/sdk.go deleted file mode 100644 index 242ddc0..0000000 --- a/sdk/sdk.go +++ /dev/null @@ -1,90 +0,0 @@ -package sdk - -import ( - "bytes" - "log" - "os/exec" - - "github.com/deepsourcelabs/deepsource-go/sdk/types" - "github.com/deepsourcelabs/deepsource-go/sdk/utils" - "github.com/deepsourcelabs/deepsource-go/sdk/utils/processors" -) - -// The main analyzer interface. Analyzers must implement Run and Processor. -type Analyzer interface { - Run() error - Processor(result interface{}) (types.AnalysisReport, error) -} - -// CLIAnalyzer is used for creating an analyzer. -type CLIAnalyzer struct { - Name string - Command string - Args []string - ExportOpts ExportOpts -} - -type ExportOpts struct { - Path string - Type string -} - -// Run executes the analyzer and streams the output to the processor. -func (a *CLIAnalyzer) Run() error { - cmd := exec.Command(a.Command, a.Args...) - - // store the process's standard output in a buffer - var out bytes.Buffer - cmd.Stdout = &out - - // TODO: handle exit status 1 - _ = cmd.Run() - - // fetch report from processor - report, err := a.Processor(out.String()) - if err != nil { - return err - } - - // save report to file - err = utils.SaveReport(report, a.ExportOpts.Path, a.ExportOpts.Type) - if err != nil { - return err - } - - return nil -} - -// Processor takes the analyzer output and generates a report. -func (a *CLIAnalyzer) Processor(result interface{}) (types.AnalysisReport, error) { - var report types.AnalysisReport - var err error - - // use custom processors for each major linter/analyzer - switch a.Name { - case "staticcheck": - report, err = processors.StaticCheck(result) - default: - // if a match is not found, the user needs to implement a processor - log.Printf("custom processor needs to be implemented for %s.\n", a.Name) - } - - return report, err -} - -// GenerateTOML helps in generating TOML files for each issue from a JSON file. -func (a *CLIAnalyzer) GenerateTOML(filename string, rootDir string) error { - // fetch parsed issues - issues, err := utils.ParseIssues(filename) - if err != nil { - return err - } - - // generate TOML files - err = utils.BuildTOML(issues, rootDir) - if err != nil { - return err - } - - return nil -} diff --git a/sdk/sdk_test.go b/sdk/sdk_test.go deleted file mode 100644 index b88df26..0000000 --- a/sdk/sdk_test.go +++ /dev/null @@ -1,53 +0,0 @@ -package sdk - -import ( - "encoding/json" - "os" - "testing" - - "github.com/deepsourcelabs/deepsource-go/sdk/triggers" - "github.com/deepsourcelabs/deepsource-go/sdk/types" -) - -func TestStaticCheck(t *testing.T) { - t.Run("verify staticcheck", func(t *testing.T) { - a := CLIAnalyzer{ - Name: "staticcheck", - Command: "staticcheck", - Args: []string{"-f", "json", "./triggers/staticcheck/..."}, - ExportOpts: ExportOpts{ - Path: "triggers/staticcheck/issues.json", - Type: "json", - }, - } - - err := a.Run() - if err != nil { - t.Error(err) - } - - // read the generated report - reportContent, err := os.ReadFile("triggers/staticcheck/issues.json") - if err != nil { - t.Error(err) - } - - var report types.AnalysisReport - err = json.Unmarshal(reportContent, &report) - if err != nil { - t.Error(err) - } - - // do a verification check for the generated report - err = triggers.Verify(report, "triggers/staticcheck/staticcheck.go") - if err != nil { - t.Error(err) - } - - // cleanup after test - err = os.Remove("triggers/staticcheck/issues.json") - if err != nil { - t.Error(err) - } - }) -} diff --git a/sdk/triggers/common.go b/sdk/triggers/common.go deleted file mode 100644 index a7e70cd..0000000 --- a/sdk/triggers/common.go +++ /dev/null @@ -1,119 +0,0 @@ -package triggers - -import ( - "context" - "errors" - "os" - "path/filepath" - "regexp" - "strings" - - sitter "github.com/smacker/go-tree-sitter" - "github.com/smacker/go-tree-sitter/golang" - - "github.com/deepsourcelabs/deepsource-go/sdk/types" -) - -// ParsedIssue represents an issue parsed using tree-sitter. -type ParsedIssue struct { - IssueCode string - Line int -} - -// Verify compares the generated report and parsed issues using tree-sitter. -func Verify(report types.AnalysisReport, filename string) error { - parser := sitter.NewParser() - - // get language - lang, err := getLanguage(filename) - if err != nil { - return err - } - parser.SetLanguage(lang) - - // read report - content, err := os.ReadFile(filename) - if err != nil { - return err - } - - // generate tree - ctx := context.Background() - tree, err := parser.ParseCtx(ctx, nil, content) - if err != nil { - return err - } - - // create a query for fetching comments - queryStr := "(comment) @comment" - query, err := sitter.NewQuery([]byte(queryStr), lang) - if err != nil { - return err - } - - // execute query on root node - qc := sitter.NewQueryCursor() - n := tree.RootNode() - qc.Exec(query, n) - defer qc.Close() - - var parsedIssues []ParsedIssue - - // iterate over matches - for { - m, ok := qc.NextMatch() - if !ok { - break - } - - for _, c := range m.Captures { - node := c.Node - nodeContent := node.Content(content) - - // check if the comment contains raise annotation - if strings.Contains(nodeContent, "raise") { - // find match using expression - exp := regexp.MustCompile(`.+ raise: `) - submatches := exp.FindStringSubmatch(nodeContent) - - if len(submatches) != 0 { - substrings := exp.Split(nodeContent, -1) - if len(substrings) > 1 { - issueCodes := strings.Split(substrings[1], ",") - // add issue to parsedIssues - for _, issueCode := range issueCodes { - parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} - parsedIssues = append(parsedIssues, parsedIssue) - } - } - } - } - } - } - - // if number of issues don't match, exit early. - if len(parsedIssues) != len(report.Issues) { - return errors.New("mismatch between the number of reported issues and parsed issues") - } - - // compare the report's issues and parsed issues - for i, issue := range report.Issues { - if (parsedIssues[i].Line != issue.Location.Position.Begin.Line) && (parsedIssues[i].IssueCode != issue.IssueCode) { - return errors.New("mismatch between parsed issue and report issue") - } - } - - return nil -} - -// getLanguage is a helper for fetching tree-sitter language based on the file's extension. -func getLanguage(filename string) (*sitter.Language, error) { - extension := filepath.Ext(filename) - - switch extension { - case ".go": - return golang.GetLanguage(), nil - default: - return nil, errors.New("language not supported") - } -} diff --git a/sdk/utils/processors/staticcheck.go b/sdk/utils/processors/staticcheck.go deleted file mode 100644 index 19db6d9..0000000 --- a/sdk/utils/processors/staticcheck.go +++ /dev/null @@ -1,71 +0,0 @@ -package processors - -import ( - "encoding/json" - "strings" - - "github.com/deepsourcelabs/deepsource-go/sdk/types" -) - -// sccIssue represents a staticcheck issue. -type sccIssue struct { - Code string `json:"code"` - Severity string `json:"severity"` - Location sccIssueLocation `json:"location"` - Message string `json:"message"` -} - -type sccIssueLocation struct { - File string `json:"file"` - Line int `json:"line"` - Column int `json:"column"` -} - -// StaticCheck processor returns a DeepSource compatible analysis report from staticcheck's results. -func StaticCheck(result interface{}) (types.AnalysisReport, error) { - var issue sccIssue - var issues []types.Issue - - // trim newline from stdout - jsonStr := strings.TrimSuffix(result.(string), "\n") - - // parse output and generate issues - lines := strings.Split(jsonStr, "\n") - for _, l := range lines { - err := json.Unmarshal([]byte(l), &issue) - if err != nil { - return types.AnalysisReport{}, err - } - - // convert to a DeepSource issue - dsIssue := convertIssue(issue) - - issues = append(issues, dsIssue) - } - - // populate report - report := types.AnalysisReport{ - Issues: issues, - } - - // return report - return report, nil -} - -// convertIssue is a helper utility for converting a staticcheck issue to a DeepSource issue. -func convertIssue(issue sccIssue) types.Issue { - convertedIssue := types.Issue{ - IssueCode: issue.Code, - IssueText: issue.Message, - Location: types.Location{ - Path: issue.Location.File, - Position: types.Position{ - Begin: types.Coordinate{ - Line: issue.Location.Line, - Column: issue.Location.Column, - }, - }, - }, - } - return convertedIssue -} diff --git a/sdk/utils/utils.go b/sdk/utils/utils.go deleted file mode 100644 index a212fc7..0000000 --- a/sdk/utils/utils.go +++ /dev/null @@ -1,135 +0,0 @@ -package utils - -import ( - "bytes" - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "os" - "path" - - "github.com/BurntSushi/toml" - "github.com/deepsourcelabs/deepsource-go/sdk/types" - "github.com/microcosm-cc/bluemonday" - "github.com/yuin/goldmark" - "github.com/yuin/goldmark/extension" -) - -type IssueMeta struct { - Code string `json:"code"` - Text string `json:"text"` - ShortDescription string `json:"short_desc"` - Description string `json:"desc"` -} - -// SaveReport saves the analysis report to the local filesystem. -func SaveReport(report types.AnalysisReport, filename string, exportType string) error { - var err error - - switch exportType { - case "json": - err = exportJSON(report, filename) - default: - return errors.New("export type not supported. supported types include: json") - } - - return err -} - -// exportJSON is a helper utility for saving the analysis report in a JSON format. -func exportJSON(report types.AnalysisReport, filename string) error { - data, err := json.MarshalIndent(report, "", " ") - if err != nil { - return err - } - - if err = ioutil.WriteFile(filename, data, 0644); err != nil { - return err - } - - return nil -} - -// ParseIssues reads a JSON file containing all issues, and returns all issues. -func ParseIssues(filename string) ([]IssueMeta, error) { - content, err := ioutil.ReadFile(filename) - if err != nil { - return nil, err - } - - var issues []IssueMeta - var parsedIssues []IssueMeta - - err = json.Unmarshal(content, &issues) - if err != nil { - return nil, err - } - - for _, issue := range issues { - // read description from a markdown file - desc, err := readMarkdown(issue.Description) - if err != nil { - return nil, err - } - - issue.Description = desc - parsedIssues = append(parsedIssues, issue) - } - - return parsedIssues, nil -} - -// readMarkdown is a helper utility used for parsing a markdown file. -func readMarkdown(filename string) (string, error) { - content, err := ioutil.ReadFile(filename) - if err != nil { - return "", err - } - - // use the Github-flavored Markdown extension - md := goldmark.New( - goldmark.WithExtensions(extension.GFM), - ) - - var buf bytes.Buffer - if err := md.Convert(content, &buf); err != nil { - return "", err - } - - // sanitize markdown body - body := buf.String() - p := bluemonday.UGCPolicy() - sanitizedBody := p.Sanitize(body) - - return sanitizedBody, nil -} - -// BuildTOML uses issues to generate TOML files to a directory. -func BuildTOML(issues []IssueMeta, rootDir string) error { - for _, issue := range issues { - // The unique identifier (filename) is based on the issue code. TOML files cannot be generated for issues having an invalid/empty code. - if issue.Code == "" { - return errors.New("invalid issue code. cannot generate toml") - } - - // generate file path based on root directory and filename - filename := fmt.Sprintf("%s.toml", issue.Code) - tomlPath := path.Join(rootDir, filename) - - f, err := os.Create(tomlPath) - if err != nil { - return err - } - - if err := toml.NewEncoder(f).Encode(issue); err != nil { - return err - } - - if err := f.Close(); err != nil { - return err - } - } - - return nil -} From 318ccaa15fd56f92eb95212ae547f3f9246e2e10 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Sun, 12 Jun 2022 09:44:15 +0530 Subject: [PATCH 06/24] tests(*): test TOML generation * test TOML generation and verify content * add workflow for running tests --- .github/workflows/tests.yml | 20 ++++++++++++++++++ analyzers/sdk_test.go | 41 ++++++++++++++++++++++++++++++++++--- analyzers/utils/utils.go | 9 +++++++- 3 files changed, 66 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/tests.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..9f7628d --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,20 @@ +name: Tests +on: [push, pull_request] +jobs: + tests: + name: tests + runs-on: ubuntu-latest + strategy: + matrix: + go-version: [1.17.x] + steps: + - name: Install Go + uses: actions/setup-go@v3 + with: + go-version: ${{ matrix.go-version }} + - name: Checkout code + uses: actions/checkout@v3 + - name: Install analyzers + run: go install honnef.co/go/tools/cmd/staticcheck@latest + - name: Run tests + run: go test -v ./... diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index cb899c7..d1dacfc 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -5,11 +5,14 @@ import ( "encoding/json" "errors" "os" + "path" + "reflect" "regexp" "strconv" "strings" "testing" + "github.com/BurntSushi/toml" "github.com/deepsourcelabs/deepsource-go/analyzers/types" "github.com/deepsourcelabs/deepsource-go/analyzers/utils" ) @@ -103,8 +106,8 @@ func (s *StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, return report, nil } -func TestStaticCheck(t *testing.T) { - t.Run("staticcheck analyzer", func(t *testing.T) { +func TestAnalyzer(t *testing.T) { + t.Run("wet run", func(t *testing.T) { a := CLIAnalyzer{ Name: "staticcheck", Command: "staticcheck", @@ -153,11 +156,43 @@ func TestStaticCheck(t *testing.T) { } // test TOML generation - err = a.GenerateTOML("testdata/issues.toml", "testdata/toml") + + // fetch parsed issues + issues, err := utils.ParseIssues("testdata/issues.toml") + if err != nil { + t.Fatal(err) + } + + // generate TOML files + err = utils.BuildTOML(issues, "testdata/toml") + if err != nil { + t.Fatal(err) + } + + // traverse directory + files, err := os.ReadDir("testdata/toml") if err != nil { t.Fatal(err) } + // parse issues from each TOML file + var parsedIssue utils.IssueMeta + var parsedIssues []utils.IssueMeta + + for _, f := range files { + filePath := path.Join("testdata/toml", f.Name()) + _, err = toml.DecodeFile(filePath, &parsedIssue) + if err != nil { + t.Fatal(err) + } + parsedIssues = append(parsedIssues, parsedIssue) + } + + // check if the parsed issues and the issues present in the parent TOML are equal + if !reflect.DeepEqual(issues, parsedIssues) { + t.Fatal(errors.New("mismatch between issues in parent TOML file and parsed issues")) + } + // cleanup TOMLs err = os.RemoveAll("testdata/toml") if err != nil { diff --git a/analyzers/utils/utils.go b/analyzers/utils/utils.go index ec7aa54..fec9bb4 100644 --- a/analyzers/utils/utils.go +++ b/analyzers/utils/utils.go @@ -23,6 +23,7 @@ import ( "github.com/yuin/goldmark/extension" ) +// IssueMeta represents the issue present in a TOML file. type IssueMeta struct { Code string `json:"code"` Text string `json:"text"` @@ -30,6 +31,7 @@ type IssueMeta struct { Description string `json:"desc"` } +// IssueTOML is used for decoding issues from a TOML file. type IssueTOML struct { Issue []map[string]interface{} } @@ -62,7 +64,7 @@ func exportJSON(report types.AnalysisReport, filename string) error { return nil } -// ParseIssues reads a JSON file containing all issues, and returns all issues. +// ParseIssues reads a TOML file containing all issues, and returns all issues as []IssueMeta. func ParseIssues(filename string) ([]IssueMeta, error) { content, err := ioutil.ReadFile(filename) if err != nil { @@ -100,6 +102,11 @@ func ParseIssues(filename string) ([]IssueMeta, error) { parsedIssues = append(parsedIssues, issue) } + // sort issues (based on issue code) before returning + sort.Slice(parsedIssues, func(i, j int) bool { + return parsedIssues[i].Code < parsedIssues[j].Code + }) + return parsedIssues, nil } From a0928fae82e419e8c779d0a5d4acb6069d94ddfd Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Sun, 12 Jun 2022 09:56:18 +0530 Subject: [PATCH 07/24] fix(utils): check return value while creating directory --- analyzers/utils/utils.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/analyzers/utils/utils.go b/analyzers/utils/utils.go index fec9bb4..054c16f 100644 --- a/analyzers/utils/utils.go +++ b/analyzers/utils/utils.go @@ -140,7 +140,10 @@ func BuildTOML(issues []IssueMeta, rootDir string) error { // if rootDir doesn't exist, create one if _, err := os.Stat(rootDir); err != nil { - os.Mkdir(rootDir, 0700) + err = os.Mkdir(rootDir, 0700) + if err != nil { + return err + } } // generate file path based on root directory and filename From 7ce40b72e6bc3816fe904f6e39847b5f33e7b21d Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Sun, 12 Jun 2022 12:09:47 +0530 Subject: [PATCH 08/24] docs(*): add guide for writing an analyzer --- guides/writing-analyzers.md | 210 ++++++++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 guides/writing-analyzers.md diff --git a/guides/writing-analyzers.md b/guides/writing-analyzers.md new file mode 100644 index 0000000..8aa4bff --- /dev/null +++ b/guides/writing-analyzers.md @@ -0,0 +1,210 @@ +# Writing Custom Analyzers + +In this example, we will be writing a custom analyzer for [staticcheck](https://staticcheck.io/). + +## How to a write a custom analyzer? + +The flow of writing a custom analyzer using the SDK, is as follows: +- Create an analyzer (`CLIAnalyzer`) +- Run the analyzer (`Run()`) +- Use the processor to fetch a DeepSource-compatible report (`Processor.Process()`) +- Persist the report to the local filesystem using `SaveReport` + +## Setting up + +Let's add this to our `main.go`: + +```go +package main + +import ( + "log" + + "github.com/deepsourcelabs/deepsource-go/analyzers" + "github.com/deepsourcelabs/deepsource-go/analyzers/utils" +) + +func main() { + // create a CLI analyzer + a := analyzers.CLIAnalyzer{ + Name: "staticcheck", // name of the analyzer + Command: "staticcheck", // main command + Args: []string{"-f", "text", "./..."}, // args + Processor: &StaticCheckProcessor{}, // processor + } + + // run the analyzer + err := a.Run() + if err != nil { + log.Fatalln(err) + } + + // process the output from staticcheck using the stdout stream + report, err := a.Processor.Process(a.Stdout()) + if err != nil { + log.Fatalln(err) + } + + // save report to a JSON file + err = utils.SaveReport(report, "issues.json", "json") + if err != nil { + log.Fatalln(err) + } +} +``` + +## Implementing our custom processor + +A processor is used for converting the result returned by the custom analyzer into a DeepSource-compatible report. The processor must implement `Process()`. + +If the analyzer's output format is common in nature (unix-style, etc.), the SDK provides pre-built processors for usage. + +Since `staticcheck`'s output format is not common in nature, we need to implement the processor for our `staticcheck` analyzer. + +```go +type StaticCheckProcessor struct{} + +// StaticCheck processor returns a DeepSource-compatible analysis report from staticcheck's results. +func (s *StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { + var issues []types.Issue + + // trim newline from buffer output + lines := strings.Split(buf.String(), "\n") + + for _, line := range lines { + // trim spaces + line = strings.TrimSpace(line) + if line == "" { + break + } + + // compile regular expression for parsing unix format + + // group descriptions: + // 0: complete string + // 1: path + // 2: line number + // 3: column number + // 4: message + exp, err := regexp.Compile("(.+):(.):(.): (.+)") + if err != nil { + return types.AnalysisReport{}, err + } + + // get groups + groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) + if len(groups) == 0 { + return types.AnalysisReport{}, errors.New("failed to parse output string") + } + + // convert line and column numbers to int + line, err := strconv.Atoi(groups[0][2]) + if err != nil { + return types.AnalysisReport{}, err + } + + col, err := strconv.Atoi(groups[0][3]) + if err != nil { + return types.AnalysisReport{}, err + } + + // compile regular expression for parsing staticcheck message + + // group descriptions: + // 0: complete string + // 1: partial message string + // 2: issue code + // 3: parentheses + messageExp, err := regexp.Compile("(.+)[(](.+)(.+)") + if err != nil { + return types.AnalysisReport{}, err + } + messageGroups := messageExp.FindAllStringSubmatch(groups[0][4], -1) + if len(messageGroups) == 0 { + return types.AnalysisReport{}, errors.New("failed to parse message") + } + + // populate issue + issue := types.Issue{ + IssueCode: messageGroups[0][2], + IssueText: groups[0][4], + Location: types.Location{ + Path: groups[0][1], + Position: types.Position{ + Begin: types.Coordinate{ + Line: line, + Column: col, + }, + }, + }, + } + + issues = append(issues, issue) + } + + // populate report + report := types.AnalysisReport{ + Issues: issues, + } + + // return report + return report, nil +} +``` + +## Running our analyzer + +Wow! We just implemented our own custom analyzer! + +On running the analyzer, we must see the report saved as a JSON file: + +```json +[ + { + "code": "SA4017", + "text": "Sprint is a pure function but its return value is ignored", + "short_desc": "Sprint is a pure function but its return value is ignored", + "desc": "/home/aadhav/analyzer-go-sdk/playground/sa4017.md" + } +] +``` + +## Generating TOML files for issues + +The developer can define all issues in a single TOML file. This file acts as a single point of truth for generating TOML files for each issue. + +This is helpful for developers who wish to define custom issues for their analyzers. + +For example, we have `issues.toml` as the file containing details for all issues: + +```toml +[[issue]] + +Code = "SA4017" +Text = "Sprint is a pure function but its return value is ignored" +ShortDescription = "Sprint is a pure function but its return value is ignored" +Description = """ +## Sample +""" +``` + +`GenerateTOML` reads `issues.toml`, and generates TOML files for each issue, where the filename is the issue code: + +```go + // previous code + + // generate TOML files for each issue from a parent TOML file + err = a.GenerateTOML("issues.toml", "toml") + if err != nil { + log.Fatalln(err) + } +``` + +On inspecting `toml/SA4017.toml`, we can see the following contents: + +```toml +Code = "SA4017" +Text = "Sprint is a pure function but its return value is ignored" +ShortDescription = "Sprint is a pure function but its return value is ignored" +Description = "

Sample

\n" +``` From 3a0c99ab318defc6013e0439be1dbf709c9d94c4 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Sun, 12 Jun 2022 12:10:41 +0530 Subject: [PATCH 09/24] chore: add README --- README.md | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..45bcb95 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +

+ +

+ +

+ Documentation | + Get Started | + Discuss +

+ +

+ DeepSource helps you ship good quality code. +

+ +

+ +--- + +# DeepSource Go SDK + + + +Go SDK for [DeepSource](https://deepsource.io/). + +The Go SDK makes it easier for developers to integrate an existing analyzer with DeepSource. + +## Guides + +Here are some extensive guides on working with the SDK: + +- [Writing your own analyzer](guides/writing-analyzers.md) + +## Community + +Interested in DeepSource and want to chat with the community? Feel free to join our [Discord server](http://deepsource.io/discord). From f7d3b7e4f16fb517af3dff101a4bef13cc414837 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Mon, 13 Jun 2022 22:00:58 +0530 Subject: [PATCH 10/24] refactor(*): refactor SDK and cleanup --- .deepsource.toml | 14 ++ .github/workflows/lint.yml | 18 -- .golangci.yml | 3 - analyzers/analysistest/analysistest.go | 139 +++++++++++++ .../unix.go => processors/regex.go} | 12 +- analyzers/sdk.go | 24 +-- analyzers/sdk_test.go | 15 +- analyzers/testdata/issues.toml | 16 +- .../staticcheck/staticcheck.go | 0 analyzers/types/types.go | 15 +- analyzers/utils/utils.go | 190 +++--------------- guides/writing-analyzers.md | 4 +- 12 files changed, 218 insertions(+), 232 deletions(-) create mode 100644 .deepsource.toml delete mode 100644 .github/workflows/lint.yml delete mode 100644 .golangci.yml create mode 100644 analyzers/analysistest/analysistest.go rename analyzers/{utils/processors/unix.go => processors/regex.go} (83%) rename analyzers/testdata/{triggers => src}/staticcheck/staticcheck.go (100%) diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 0000000..7899478 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,14 @@ +version = 1 + +exclude_patterns = ["**/testdata/src/**"] + +[[analyzers]] +name = "go" +enabled = true + + [analyzers.meta] + import_root = "github.com/deepsourcelabs/deepsource-go" + +[[transformers]] +name = "gofumpt" +enabled = true diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml deleted file mode 100644 index 3de8d66..0000000 --- a/.github/workflows/lint.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Lint -on: [push, pull_request] -jobs: - golangci: - name: lint - runs-on: ubuntu-latest - strategy: - matrix: - go-version: [1.17.x] - steps: - - uses: actions/setup-go@v3 - with: - go-version: ${{ matrix.go-version }} - - uses: actions/checkout@v3 - - name: golangci-lint - uses: golangci/golangci-lint-action@v3 - with: - version: v1.43 diff --git a/.golangci.yml b/.golangci.yml deleted file mode 100644 index 02b2438..0000000 --- a/.golangci.yml +++ /dev/null @@ -1,3 +0,0 @@ -run: - skip-dirs: - - analyzers/triggers diff --git a/analyzers/analysistest/analysistest.go b/analyzers/analysistest/analysistest.go new file mode 100644 index 0000000..2f66f19 --- /dev/null +++ b/analyzers/analysistest/analysistest.go @@ -0,0 +1,139 @@ +package analysistest + +import ( + "context" + "errors" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/deepsourcelabs/deepsource-go/analyzers/types" + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/golang" +) + +// ParsedIssue represents an issue parsed using tree-sitter. +type ParsedIssue struct { + IssueCode string + Line int +} + +// Verify compares the generated report and parsed issues using tree-sitter. +func Verify(report types.AnalysisReport, filename string) error { + parser := sitter.NewParser() + + // get language + lang, err := getLanguage(filename) + if err != nil { + return err + } + parser.SetLanguage(lang) + + // read report + content, err := os.ReadFile(filename) + if err != nil { + return err + } + + // generate tree + ctx := context.Background() + tree, err := parser.ParseCtx(ctx, nil, content) + if err != nil { + return err + } + + // create a query for fetching comments + queryStr := "(comment) @comment" + query, err := sitter.NewQuery([]byte(queryStr), lang) + if err != nil { + return err + } + + // execute query on root node + qc := sitter.NewQueryCursor() + n := tree.RootNode() + qc.Exec(query, n) + defer qc.Close() + + var parsedIssues []ParsedIssue + + // iterate over matches + for { + m, ok := qc.NextMatch() + if !ok { + break + } + + for _, c := range m.Captures { + // get node content + node := c.Node + nodeContent := node.Content(content) + + // check if the comment contains raise annotation + if strings.Contains(nodeContent, "raise") { + // find match using expression + exp := regexp.MustCompile(`.+ raise: `) + submatches := exp.FindStringSubmatch(nodeContent) + + if len(submatches) != 0 { + substrings := exp.Split(nodeContent, -1) + if len(substrings) > 1 { + issueCodes := strings.Split(substrings[1], ",") + // add issue to parsedIssues + for _, issueCode := range issueCodes { + parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} + parsedIssues = append(parsedIssues, parsedIssue) + } + } + } + } + } + } + + // if number of issues don't match, exit early. + if len(parsedIssues) != len(report.Issues) { + return errors.New("mismatch between the number of reported issues and parsed issues") + } + + // compare the report's issues and parsed issues + match := compareReport(parsedIssues, report) + if !match { + return errors.New("mismatch between parsed issue and report issue") + } + + return nil +} + +// getLanguage is a helper for fetching a tree-sitter language based on the file's extension. +func getLanguage(filename string) (*sitter.Language, error) { + extension := filepath.Ext(filename) + + switch extension { + case ".go": + return golang.GetLanguage(), nil + default: + return nil, errors.New("language not supported") + } +} + +// compareReport is a helper which checks if the parsed issues are identical to the issues present in the report. +func compareReport(parsedIssues []ParsedIssue, report types.AnalysisReport) bool { + // sort report and parsedIssues by line number + sort.Slice(parsedIssues, func(i, j int) bool { + return parsedIssues[i].Line < parsedIssues[j].Line + }) + + sort.Slice(report.Issues, func(i, j int) bool { + return report.Issues[i].Location.Position.Begin.Line < report.Issues[j].Location.Position.Begin.Line + }) + + for i, issue := range report.Issues { + if (parsedIssues[i].Line != issue.Location.Position.Begin.Line) && (parsedIssues[i].IssueCode != issue.IssueCode) { + return false + } + } + + return true +} diff --git a/analyzers/utils/processors/unix.go b/analyzers/processors/regex.go similarity index 83% rename from analyzers/utils/processors/unix.go rename to analyzers/processors/regex.go index f88636f..cbd5bc0 100644 --- a/analyzers/utils/processors/unix.go +++ b/analyzers/processors/regex.go @@ -10,10 +10,12 @@ import ( "github.com/deepsourcelabs/deepsource-go/analyzers/types" ) -// UnixProcessor is a processor for unix-formatted strings. -type UnixProcessor struct{} +// RegexProcessor utilizes regular expressions for processing. +type RegexProcessor struct { + Pattern string +} -func (u *UnixProcessor) Unix(buf bytes.Buffer) (types.AnalysisReport, error) { +func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { var issues []types.Issue // trim newline from buffer output @@ -26,7 +28,7 @@ func (u *UnixProcessor) Unix(buf bytes.Buffer) (types.AnalysisReport, error) { break } - // compile regular expression for parsing unix format + // TODO: test processor // group descriptions: // 0: complete string @@ -34,7 +36,7 @@ func (u *UnixProcessor) Unix(buf bytes.Buffer) (types.AnalysisReport, error) { // 2: line number // 3: column number // 4: message - exp, err := regexp.Compile("(.+):(.):(.): (.+)") + exp, err := regexp.Compile(r.Pattern) if err != nil { return types.AnalysisReport{}, err } diff --git a/analyzers/sdk.go b/analyzers/sdk.go index 39b7068..974df92 100644 --- a/analyzers/sdk.go +++ b/analyzers/sdk.go @@ -2,18 +2,19 @@ package analyzers import ( "bytes" + "encoding/json" + "io/ioutil" "os/exec" "github.com/deepsourcelabs/deepsource-go/analyzers/types" - "github.com/deepsourcelabs/deepsource-go/analyzers/utils" ) type Processor interface { Process(bytes.Buffer) (types.AnalysisReport, error) } -// CLIAnalyzer is used for creating an analyzer. -type CLIAnalyzer struct { +// CLIRunner is used for creating an analyzer. +type CLIRunner struct { Name string Command string Args []string @@ -25,17 +26,17 @@ type CLIAnalyzer struct { } // Stdout returns the stdout buffer. -func (a *CLIAnalyzer) Stdout() bytes.Buffer { +func (a *CLIRunner) Stdout() bytes.Buffer { return *a.stdout } // Stderr returns the stderr buffer. -func (a *CLIAnalyzer) Stderr() bytes.Buffer { +func (a *CLIRunner) Stderr() bytes.Buffer { return *a.stderr } // Run executes the analyzer and streams the output to the processor. -func (a *CLIAnalyzer) Run() error { +func (a *CLIRunner) Run() error { outBuf, errBuf, exitCode, err := runCmd(a.Command, a.Args, a.AllowedExitCodes) if err != nil { return err @@ -83,17 +84,14 @@ func runCmd(command string, args []string, allowedExitCodes []int) (bytes.Buffer return outBuf, errBuf, 0, nil } -// GenerateTOML helps in generating TOML files for each issue from a JSON file. -func (a *CLIAnalyzer) GenerateTOML(filename string, rootDir string) error { - // fetch parsed issues - issues, err := utils.ParseIssues(filename) +// SaveReport saves the analysis report to the local filesystem. +func (a *CLIRunner) SaveReport(report types.AnalysisReport, filename string) error { + data, err := json.MarshalIndent(report, "", " ") if err != nil { return err } - // generate TOML files - err = utils.BuildTOML(issues, rootDir) - if err != nil { + if err = ioutil.WriteFile(filename, data, 0644); err != nil { return err } diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index d1dacfc..64143aa 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -13,6 +13,7 @@ import ( "testing" "github.com/BurntSushi/toml" + "github.com/deepsourcelabs/deepsource-go/analyzers/analysistest" "github.com/deepsourcelabs/deepsource-go/analyzers/types" "github.com/deepsourcelabs/deepsource-go/analyzers/utils" ) @@ -107,11 +108,11 @@ func (s *StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, } func TestAnalyzer(t *testing.T) { - t.Run("wet run", func(t *testing.T) { - a := CLIAnalyzer{ + t.Run("Run staticcheck as DeepSource Analyzer", func(t *testing.T) { + a := CLIRunner{ Name: "staticcheck", Command: "staticcheck", - Args: []string{"-f", "text", "./testdata/triggers/staticcheck/..."}, + Args: []string{"-f", "text", "./testdata/src/staticcheck/..."}, Processor: &StaticCheckProcessor{}, } @@ -126,13 +127,13 @@ func TestAnalyzer(t *testing.T) { } // save report - err = utils.SaveReport(processedReport, "testdata/triggers/staticcheck/issues.json", "json") + err = a.SaveReport(processedReport, "testdata/src/staticcheck/issues.json") if err != nil { t.Fatal(err) } // read the generated report - reportContent, err := os.ReadFile("testdata/triggers/staticcheck/issues.json") + reportContent, err := os.ReadFile("testdata/src/staticcheck/issues.json") if err != nil { t.Fatal(err) } @@ -144,13 +145,13 @@ func TestAnalyzer(t *testing.T) { } // do a verification check for the generated report - err = utils.Verify(report, "testdata/triggers/staticcheck/staticcheck.go") + err = analysistest.Verify(report, "testdata/src/staticcheck/staticcheck.go") if err != nil { t.Fatal(err) } // cleanup after test - err = os.Remove("testdata/triggers/staticcheck/issues.json") + err = os.Remove("testdata/src/staticcheck/issues.json") if err != nil { t.Fatal(err) } diff --git a/analyzers/testdata/issues.toml b/analyzers/testdata/issues.toml index 0867b1c..2f409b1 100644 --- a/analyzers/testdata/issues.toml +++ b/analyzers/testdata/issues.toml @@ -1,17 +1,17 @@ -[[issue]] +[[issues]] -Code = "SA4017" -Text = "Sprint is a pure function but its return value is ignored" -ShortDescription = "Sprint is a pure function but its return value is ignored" +IssueCode = "SA4017" +Category = "bug risk" +Title = "Sprint is a pure function but its return value is ignored" Description = """ ## hello """ -[[issue]] +[[issues]] -Code = "S1039" -Text = "unnecessary use of fmt.Sprint" -ShortDescription = "unnecessary use of fmt.Sprint" +IssueCode = "S1039" +Category = "style" +Title = "unnecessary use of fmt.Sprint" Description = """ ## Example """ diff --git a/analyzers/testdata/triggers/staticcheck/staticcheck.go b/analyzers/testdata/src/staticcheck/staticcheck.go similarity index 100% rename from analyzers/testdata/triggers/staticcheck/staticcheck.go rename to analyzers/testdata/src/staticcheck/staticcheck.go diff --git a/analyzers/types/types.go b/analyzers/types/types.go index 50370da..3ea2f95 100644 --- a/analyzers/types/types.go +++ b/analyzers/types/types.go @@ -15,19 +15,10 @@ type Location struct { Position Position `json:"position"` } -type SourceCode struct { - Rendered []byte `json:"rendered"` -} - -type ProcessedData struct { - SourceCode SourceCode `json:"source_code,omitempty"` -} - type Issue struct { - IssueCode string `json:"issue_code"` - IssueText string `json:"issue_text"` - Location Location `json:"location"` - ProcessedData ProcessedData `json:"processed_data,omitempty"` + IssueCode string `json:"issue_code"` + IssueText string `json:"issue_text"` + Location Location `json:"location"` } // Location of an issue diff --git a/analyzers/utils/utils.go b/analyzers/utils/utils.go index 054c16f..7f1785c 100644 --- a/analyzers/utils/utils.go +++ b/analyzers/utils/utils.go @@ -2,62 +2,44 @@ package utils import ( "bytes" - "context" - "encoding/json" "errors" "fmt" "io/ioutil" "os" "path" - "path/filepath" - "regexp" "sort" - "strings" "github.com/BurntSushi/toml" - "github.com/deepsourcelabs/deepsource-go/analyzers/types" "github.com/microcosm-cc/bluemonday" - sitter "github.com/smacker/go-tree-sitter" - "github.com/smacker/go-tree-sitter/golang" "github.com/yuin/goldmark" "github.com/yuin/goldmark/extension" ) // IssueMeta represents the issue present in a TOML file. type IssueMeta struct { - Code string `json:"code"` - Text string `json:"text"` - ShortDescription string `json:"short_desc"` - Description string `json:"desc"` + IssueCode string `toml:"code"` + Category string `toml:"category"` + Title string `toml:"title"` + Description string `toml:"description"` } // IssueTOML is used for decoding issues from a TOML file. type IssueTOML struct { - Issue []map[string]interface{} + Issues []map[string]interface{} } -// SaveReport saves the analysis report to the local filesystem. -func SaveReport(report types.AnalysisReport, filename string, exportType string) error { - var err error - - switch exportType { - case "json": - err = exportJSON(report, filename) - default: - return errors.New("export type not supported. supported types include: json") - } - - return err -} - -// exportJSON is a helper utility for saving the analysis report in a JSON format. -func exportJSON(report types.AnalysisReport, filename string) error { - data, err := json.MarshalIndent(report, "", " ") +// GenerateTOML helps in generating TOML files for each issue from a TOML file. +func GenerateTOML(filename string, rootDir string) error { + // fetch parsed issues + // TODO: move parse issues + issues, err := ParseIssues(filename) if err != nil { return err } - if err = ioutil.WriteFile(filename, data, 0644); err != nil { + // generate TOML files + err = BuildTOML(issues, rootDir) + if err != nil { return err } @@ -80,12 +62,12 @@ func ParseIssues(filename string) ([]IssueMeta, error) { return nil, err } - for _, issueTOML := range issuesTOML.Issue { + for _, issueTOML := range issuesTOML.Issues { is := IssueMeta{ - Code: issueTOML["Code"].(string), - Text: issueTOML["Text"].(string), - ShortDescription: issueTOML["ShortDescription"].(string), - Description: issueTOML["Description"].(string), + IssueCode: issueTOML["IssueCode"].(string), + Category: issueTOML["Category"].(string), + Title: issueTOML["Title"].(string), + Description: issueTOML["Description"].(string), } issues = append(issues, is) @@ -104,7 +86,7 @@ func ParseIssues(filename string) ([]IssueMeta, error) { // sort issues (based on issue code) before returning sort.Slice(parsedIssues, func(i, j int) bool { - return parsedIssues[i].Code < parsedIssues[j].Code + return parsedIssues[i].IssueCode < parsedIssues[j].IssueCode }) return parsedIssues, nil @@ -132,22 +114,26 @@ func readMarkdown(content string) (string, error) { // BuildTOML uses issues to generate TOML files to a directory. func BuildTOML(issues []IssueMeta, rootDir string) error { + if len(issues) == 0 { + return errors.New("no issues found") + } + for _, issue := range issues { // The unique identifier (filename) is based on the issue code. TOML files cannot be generated for issues having an invalid/empty code. - if issue.Code == "" { + if issue.IssueCode == "" { return errors.New("invalid issue code. cannot generate toml") } // if rootDir doesn't exist, create one if _, err := os.Stat(rootDir); err != nil { - err = os.Mkdir(rootDir, 0700) + err := os.Mkdir(rootDir, 0700) if err != nil { return err } } // generate file path based on root directory and filename - filename := fmt.Sprintf("%s.toml", issue.Code) + filename := fmt.Sprintf("%s.toml", issue.IssueCode) tomlPath := path.Join(rootDir, filename) f, err := os.Create(tomlPath) @@ -166,127 +152,3 @@ func BuildTOML(issues []IssueMeta, rootDir string) error { return nil } - -// ParsedIssue represents an issue parsed using tree-sitter. -type ParsedIssue struct { - IssueCode string - Line int -} - -// Verify compares the generated report and parsed issues using tree-sitter. -func Verify(report types.AnalysisReport, filename string) error { - parser := sitter.NewParser() - - // get language - lang, err := getLanguage(filename) - if err != nil { - return err - } - parser.SetLanguage(lang) - - // read report - content, err := os.ReadFile(filename) - if err != nil { - return err - } - - // generate tree - ctx := context.Background() - tree, err := parser.ParseCtx(ctx, nil, content) - if err != nil { - return err - } - - // create a query for fetching comments - queryStr := "(comment) @comment" - query, err := sitter.NewQuery([]byte(queryStr), lang) - if err != nil { - return err - } - - // execute query on root node - qc := sitter.NewQueryCursor() - n := tree.RootNode() - qc.Exec(query, n) - defer qc.Close() - - var parsedIssues []ParsedIssue - - // iterate over matches - for { - m, ok := qc.NextMatch() - if !ok { - break - } - - for _, c := range m.Captures { - // get node content - node := c.Node - nodeContent := node.Content(content) - - // check if the comment contains raise annotation - if strings.Contains(nodeContent, "raise") { - // find match using expression - exp := regexp.MustCompile(`.+ raise: `) - submatches := exp.FindStringSubmatch(nodeContent) - - if len(submatches) != 0 { - substrings := exp.Split(nodeContent, -1) - if len(substrings) > 1 { - issueCodes := strings.Split(substrings[1], ",") - // add issue to parsedIssues - for _, issueCode := range issueCodes { - parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} - parsedIssues = append(parsedIssues, parsedIssue) - } - } - } - } - } - } - - // if number of issues don't match, exit early. - if len(parsedIssues) != len(report.Issues) { - return errors.New("mismatch between the number of reported issues and parsed issues") - } - - // compare the report's issues and parsed issues - match := compareReport(parsedIssues, report) - if !match { - return errors.New("mismatch between parsed issue and report issue") - } - - return nil -} - -// getLanguage is a helper for fetching a tree-sitter language based on the file's extension. -func getLanguage(filename string) (*sitter.Language, error) { - extension := filepath.Ext(filename) - - switch extension { - case ".go": - return golang.GetLanguage(), nil - default: - return nil, errors.New("language not supported") - } -} - -// compareReport is a helper which checks if the parsed issues are identical to the issues present in the report. -func compareReport(parsedIssues []ParsedIssue, report types.AnalysisReport) bool { - // sort report and parsedIssues by line number - sort.Slice(parsedIssues, func(i, j int) bool { - return parsedIssues[i].Line < parsedIssues[j].Line - }) - - sort.Slice(report.Issues, func(i, j int) bool { - return report.Issues[i].Location.Position.Begin.Line < report.Issues[j].Location.Position.Begin.Line - }) - - for i, issue := range report.Issues { - if (parsedIssues[i].Line != issue.Location.Position.Begin.Line) && (parsedIssues[i].IssueCode != issue.IssueCode) { - return false - } - } - - return true -} diff --git a/guides/writing-analyzers.md b/guides/writing-analyzers.md index 8aa4bff..bf455aa 100644 --- a/guides/writing-analyzers.md +++ b/guides/writing-analyzers.md @@ -5,7 +5,7 @@ In this example, we will be writing a custom analyzer for [staticcheck](https:// ## How to a write a custom analyzer? The flow of writing a custom analyzer using the SDK, is as follows: -- Create an analyzer (`CLIAnalyzer`) +- Create an analyzer (`CLIRunner`) - Run the analyzer (`Run()`) - Use the processor to fetch a DeepSource-compatible report (`Processor.Process()`) - Persist the report to the local filesystem using `SaveReport` @@ -26,7 +26,7 @@ import ( func main() { // create a CLI analyzer - a := analyzers.CLIAnalyzer{ + a := analyzers.CLIRunner{ Name: "staticcheck", // name of the analyzer Command: "staticcheck", // main command Args: []string{"-f", "text", "./..."}, // args From 6bd254084115be934de887775f8ea8dfa1db963e Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 14 Jun 2022 12:00:06 +0530 Subject: [PATCH 11/24] fix(analyzers): use REPO_ROOT and TOOLBOX_PATH environment variables * use environment variable based exporting * use named groups for RegexProcessor --- analyzers/processors/regex.go | 64 ++++++++++++++++------------------- analyzers/sdk.go | 13 +++++-- analyzers/sdk_test.go | 14 +++++--- analyzers/utils/utils.go | 8 +++-- 4 files changed, 56 insertions(+), 43 deletions(-) diff --git a/analyzers/processors/regex.go b/analyzers/processors/regex.go index cbd5bc0..49ed085 100644 --- a/analyzers/processors/regex.go +++ b/analyzers/processors/regex.go @@ -28,49 +28,45 @@ func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) break } - // TODO: test processor - - // group descriptions: - // 0: complete string - // 1: path - // 2: line number - // 3: column number - // 4: message exp, err := regexp.Compile(r.Pattern) if err != nil { return types.AnalysisReport{}, err } // get groups - groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) - if len(groups) == 0 { - return types.AnalysisReport{}, errors.New("failed to parse message") - } + groupNames := exp.SubexpNames() - // convert line and column numbers to int - line, err := strconv.Atoi(groups[0][2]) - if err != nil { - return types.AnalysisReport{}, err - } + var issue types.Issue + groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) + for groupIdx, content := range groups[0] { + groupName := groupNames[groupIdx] - col, err := strconv.Atoi(groups[0][3]) - if err != nil { - return types.AnalysisReport{}, err + // populate issue using named groups + switch groupName { + case "filename": + issue.Location.Path = content + case "line": + line, err := strconv.Atoi(content) + if err != nil { + return types.AnalysisReport{}, err + } + issue.Location.Position.Begin.Line = line + case "column": + col, err := strconv.Atoi(content) + if err != nil { + return types.AnalysisReport{}, err + } + issue.Location.Position.Begin.Column = col + case "message": + issue.IssueText = content + case "issue_code": + issue.IssueCode = content + default: + continue + } } - - // populate issue - issue := types.Issue{ - IssueCode: "", - IssueText: groups[0][4], - Location: types.Location{ - Path: groups[0][1], - Position: types.Position{ - Begin: types.Coordinate{ - Line: line, - Column: col, - }, - }, - }, + if len(groups) == 0 { + return types.AnalysisReport{}, errors.New("failed to parse message") } issues = append(issues, issue) diff --git a/analyzers/sdk.go b/analyzers/sdk.go index 974df92..5001317 100644 --- a/analyzers/sdk.go +++ b/analyzers/sdk.go @@ -3,8 +3,9 @@ package analyzers import ( "bytes" "encoding/json" - "io/ioutil" + "os" "os/exec" + "path" "github.com/deepsourcelabs/deepsource-go/analyzers/types" ) @@ -85,13 +86,19 @@ func runCmd(command string, args []string, allowedExitCodes []int) (bytes.Buffer } // SaveReport saves the analysis report to the local filesystem. -func (a *CLIRunner) SaveReport(report types.AnalysisReport, filename string) error { +func (*CLIRunner) SaveReport(report types.AnalysisReport) error { + // get toolbox path from environment variable + toolboxPath := os.Getenv("TOOLBOX_PATH") + + // set report location + filename := path.Join(toolboxPath, "analysis_report.json") + data, err := json.MarshalIndent(report, "", " ") if err != nil { return err } - if err = ioutil.WriteFile(filename, data, 0644); err != nil { + if err = os.WriteFile(filename, data, 0644); err != nil { return err } diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index 64143aa..7e89ace 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -21,7 +21,7 @@ import ( type StaticCheckProcessor struct{} // StaticCheck processor returns a DeepSource-compatible analysis report from staticcheck's results. -func (s *StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { +func (*StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { var issues []types.Issue // trim newline from buffer output @@ -109,6 +109,11 @@ func (s *StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, func TestAnalyzer(t *testing.T) { t.Run("Run staticcheck as DeepSource Analyzer", func(t *testing.T) { + // set environment variables + tempDir := t.TempDir() + t.Setenv("TOOLBOX_PATH", tempDir) + t.Setenv("REPO_ROOT", tempDir) + a := CLIRunner{ Name: "staticcheck", Command: "staticcheck", @@ -127,13 +132,14 @@ func TestAnalyzer(t *testing.T) { } // save report - err = a.SaveReport(processedReport, "testdata/src/staticcheck/issues.json") + err = a.SaveReport(processedReport) if err != nil { t.Fatal(err) } // read the generated report - reportContent, err := os.ReadFile("testdata/src/staticcheck/issues.json") + generatedFile := path.Join(tempDir, "analysis_report.json") + reportContent, err := os.ReadFile(generatedFile) if err != nil { t.Fatal(err) } @@ -151,7 +157,7 @@ func TestAnalyzer(t *testing.T) { } // cleanup after test - err = os.Remove("testdata/src/staticcheck/issues.json") + err = os.Remove(generatedFile) if err != nil { t.Fatal(err) } diff --git a/analyzers/utils/utils.go b/analyzers/utils/utils.go index 7f1785c..0899562 100644 --- a/analyzers/utils/utils.go +++ b/analyzers/utils/utils.go @@ -29,15 +29,19 @@ type IssueTOML struct { } // GenerateTOML helps in generating TOML files for each issue from a TOML file. -func GenerateTOML(filename string, rootDir string) error { +func GenerateTOML() error { + // root directory for the repository + repoRoot := os.Getenv("REPO_ROOT") + // fetch parsed issues - // TODO: move parse issues + filename := path.Join(repoRoot, ".deepsource/analyzers/issues.toml") issues, err := ParseIssues(filename) if err != nil { return err } // generate TOML files + rootDir := path.Join(repoRoot, ".deepsource/analyzers/issues") err = BuildTOML(issues, rootDir) if err != nil { return err From a0ce3b13bb07a390db8529c98d3b1777962d84da Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 14 Jun 2022 12:48:32 +0530 Subject: [PATCH 12/24] chore: add gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7a6353d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.envrc From 7fc29405621ac3c161fd63ce386b757765d46484 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 14 Jun 2022 13:01:36 +0530 Subject: [PATCH 13/24] docs(*): update guide --- README.md | 2 +- guides/writing-analyzers.md | 187 +++++++++++++++--------------------- 2 files changed, 81 insertions(+), 108 deletions(-) diff --git a/README.md b/README.md index 45bcb95..cb281f2 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ The Go SDK makes it easier for developers to integrate an existing analyzer with Here are some extensive guides on working with the SDK: -- [Writing your own analyzer](guides/writing-analyzers.md) +- [Writing custom analyzers](guides/writing-analyzers.md) ## Community diff --git a/guides/writing-analyzers.md b/guides/writing-analyzers.md index bf455aa..9247c6c 100644 --- a/guides/writing-analyzers.md +++ b/guides/writing-analyzers.md @@ -10,146 +10,113 @@ The flow of writing a custom analyzer using the SDK, is as follows: - Use the processor to fetch a DeepSource-compatible report (`Processor.Process()`) - Persist the report to the local filesystem using `SaveReport` -## Setting up +## Getting Started -Let's add this to our `main.go`: +### Setting up our analyzer + +The analyzer should contain the following: +- `Name`: Name of the analyzer +- `Command`: The main command for the CLI tool (for example, `staticcheck`, etc.) +- `Args`: Arguments for `Command`. +- `Processor`: Processor used for parsing the output of the CLI analyzer + +The analyzer can be executed using `Run()`, which executes the CLI (`Command` along with its `Args`). The output of the CLI is stored to its respective buffers. (`stdout` and `stderr`; accessible through `a.Stdout()` and `a.Stderr()`) + +Here is the code for the analyzer: ```go package main import ( + "fmt" "log" "github.com/deepsourcelabs/deepsource-go/analyzers" - "github.com/deepsourcelabs/deepsource-go/analyzers/utils" + "github.com/deepsourcelabs/deepsource-go/analyzers/processors" ) func main() { - // create a CLI analyzer a := analyzers.CLIRunner{ - Name: "staticcheck", // name of the analyzer - Command: "staticcheck", // main command - Args: []string{"-f", "text", "./..."}, // args - Processor: &StaticCheckProcessor{}, // processor + Name: "staticcheck", + Command: "staticcheck", + Args: []string{"-f", "text", "./..."}, + Processor: &processor, <=== will be implemented later } - // run the analyzer err := a.Run() if err != nil { log.Fatalln(err) } +} +``` - // process the output from staticcheck using the stdout stream - report, err := a.Processor.Process(a.Stdout()) +### Using processors + +A processor is used for converting the result returned by the custom analyzer into a DeepSource-compatible report. The processor must implement `Process()`. + +Let's use the built-in `RegexProcessor`. The pattern used by `RegexProcessor` should have the following groups: + +- `filename` +- `line` +- `column` +- `message` +- `issue_code` + +`RegexProcessor` uses these named groups to populate issues. + +```go +package main + +import ( + "fmt" + "log" + + "github.com/deepsourcelabs/deepsource-go/analyzers" + "github.com/deepsourcelabs/deepsource-go/analyzers/processors" +) + +func main() { + processor := processors.RegexProcessor{ + Pattern: `(?P.+):(?P\d+):(?P\d+): (?P.+)\((?P\w+)\)`, + } + + a := analyzers.CLIRunner{ + Name: "staticcheck", + Command: "staticcheck", + Args: []string{"-f", "text", "./..."}, + Processor: &processor, + } + + err := a.Run() if err != nil { log.Fatalln(err) } - // save report to a JSON file - err = utils.SaveReport(report, "issues.json", "json") + report, err := a.Processor.Process(a.Stdout()) if err != nil { log.Fatalln(err) } } ``` -## Implementing our custom processor +### Saving the report -A processor is used for converting the result returned by the custom analyzer into a DeepSource-compatible report. The processor must implement `Process()`. +For persisting the report fetched from our processor, we can use `SaveReport`. -If the analyzer's output format is common in nature (unix-style, etc.), the SDK provides pre-built processors for usage. +> **Note**: +> +> `SaveReport` requires `TOOLBOX_PATH` to be set in the environment variables. -Since `staticcheck`'s output format is not common in nature, we need to implement the processor for our `staticcheck` analyzer. +The report is then saved to `$TOOLBOX_PATH/analysis_report.json`. ```go -type StaticCheckProcessor struct{} - -// StaticCheck processor returns a DeepSource-compatible analysis report from staticcheck's results. -func (s *StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { - var issues []types.Issue - - // trim newline from buffer output - lines := strings.Split(buf.String(), "\n") - - for _, line := range lines { - // trim spaces - line = strings.TrimSpace(line) - if line == "" { - break - } - - // compile regular expression for parsing unix format - - // group descriptions: - // 0: complete string - // 1: path - // 2: line number - // 3: column number - // 4: message - exp, err := regexp.Compile("(.+):(.):(.): (.+)") - if err != nil { - return types.AnalysisReport{}, err - } - - // get groups - groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) - if len(groups) == 0 { - return types.AnalysisReport{}, errors.New("failed to parse output string") - } - - // convert line and column numbers to int - line, err := strconv.Atoi(groups[0][2]) - if err != nil { - return types.AnalysisReport{}, err - } - - col, err := strconv.Atoi(groups[0][3]) - if err != nil { - return types.AnalysisReport{}, err - } - - // compile regular expression for parsing staticcheck message - - // group descriptions: - // 0: complete string - // 1: partial message string - // 2: issue code - // 3: parentheses - messageExp, err := regexp.Compile("(.+)[(](.+)(.+)") - if err != nil { - return types.AnalysisReport{}, err - } - messageGroups := messageExp.FindAllStringSubmatch(groups[0][4], -1) - if len(messageGroups) == 0 { - return types.AnalysisReport{}, errors.New("failed to parse message") - } - - // populate issue - issue := types.Issue{ - IssueCode: messageGroups[0][2], - IssueText: groups[0][4], - Location: types.Location{ - Path: groups[0][1], - Position: types.Position{ - Begin: types.Coordinate{ - Line: line, - Column: col, - }, - }, - }, - } - - issues = append(issues, issue) - } - // populate report - report := types.AnalysisReport{ - Issues: issues, - } + (previous code) - // return report - return report, nil -} + err = a.SaveReport(report) + if err != nil { + log.Fatalln(err) + } ``` ## Running our analyzer @@ -178,7 +145,7 @@ This is helpful for developers who wish to define custom issues for their analyz For example, we have `issues.toml` as the file containing details for all issues: ```toml -[[issue]] +[[issues]] Code = "SA4017" Text = "Sprint is a pure function but its return value is ignored" @@ -188,19 +155,25 @@ Description = """ """ ``` -`GenerateTOML` reads `issues.toml`, and generates TOML files for each issue, where the filename is the issue code: +> **Note**: +> +> `GenerateTOML` requires `REPO_ROOT` to be set in the environment variables. + +`GenerateTOML` reads `$REPO_ROOT/.deepsource/analyzers/issues.toml`, and generates TOML files for each issue, where the filename is the issue code. + +The TOML files are generated at `$REPO_ROOT/.deepsource/analyzers/issues/.toml`. ```go // previous code // generate TOML files for each issue from a parent TOML file - err = a.GenerateTOML("issues.toml", "toml") + err = GenerateTOML() if err != nil { log.Fatalln(err) } ``` -On inspecting `toml/SA4017.toml`, we can see the following contents: +On inspecting `$REPO_ROOT/.deepsource/analyzers/issues/SA4017.toml`, we can see the following contents: ```toml Code = "SA4017" From adfb69daf52772b2995411a8151380f07f8e882c Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 14 Jun 2022 14:52:56 +0530 Subject: [PATCH 14/24] docs(*): add guide for writing CSS analyzer * guide for writing CSS analyzer * add issueProcessor --- README.md | 1 + analyzers/processors/regex.go | 12 ++- guides/css-analyzer.md | 164 ++++++++++++++++++++++++++++++++++ guides/writing-analyzers.md | 69 +++++++++++--- 4 files changed, 233 insertions(+), 13 deletions(-) create mode 100644 guides/css-analyzer.md diff --git a/README.md b/README.md index cb281f2..b7652d9 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,7 @@ The Go SDK makes it easier for developers to integrate an existing analyzer with Here are some extensive guides on working with the SDK: - [Writing custom analyzers](guides/writing-analyzers.md) +- [Writing a CSS Analyzer](guides/css-analyzer.md) ## Community diff --git a/analyzers/processors/regex.go b/analyzers/processors/regex.go index 49ed085..c716d5e 100644 --- a/analyzers/processors/regex.go +++ b/analyzers/processors/regex.go @@ -10,9 +10,13 @@ import ( "github.com/deepsourcelabs/deepsource-go/analyzers/types" ) +// IssueCodeProcessor is used when an analyzer doesn't support issue codes. IssueCodeProcessor takes in the content of the "issue_code" named group and returns an appropriate issue code. If not implemented, it fallbacks to using the content as the issue code. +type IssueCodeProcessor func(string) string + // RegexProcessor utilizes regular expressions for processing. type RegexProcessor struct { - Pattern string + Pattern string + IssueCodeProcessor IssueCodeProcessor } func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { @@ -60,7 +64,11 @@ func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) case "message": issue.IssueText = content case "issue_code": - issue.IssueCode = content + if r.IssueCodeProcessor == nil { + issue.IssueCode = content + } else { + issue.IssueCode = r.IssueCodeProcessor(content) + } default: continue } diff --git a/guides/css-analyzer.md b/guides/css-analyzer.md new file mode 100644 index 0000000..910f952 --- /dev/null +++ b/guides/css-analyzer.md @@ -0,0 +1,164 @@ +# Writing a CSS Analyzer + +In this example, we will be writing a custom analyzer for [csslint](https://github.com/CSSLint/csslint). + +## Pre-requisites + +This guide assumes that you are familiar with [writing custom analyzers for DeepSource](writing-analyzers.md). + +## Getting Started + +We will be using csslint's `--format=compact` for getting results. Since the compact format can be easily parsed through regular expressions, let's use the built-in `RegexProcessor`. + +`csslint` doesn't natively support issue codes. Here is an example: + +``` +/home/testdir/file1.css: line 1, col 1, Warning - Rule is empty. (empty-rules) +/home/testdir/file1.css: line 4, col 2, Warning - Expected () but found '"blue" size'. (known-properties) +/home/testdir/file1.css: line 5, col 6, Error - Expected RBRACE at line 5, col 6. (errors) +``` + +In order to fulfill our requirements, let's make use of the `IssueCodeProcessor` provided by `RegexProcessor`. + +`IssueCodeProcessor` is used when an analyzer doesn't support issue codes. It takes the content of the `issue_code` named group (from `Pattern`) and returns the issue code. + +> **Note**: +> +> If `IssueCodeProcessor` is not implemented, it fallbacks to using the content as the issue code. + +```go +func issueProcessor(content string) string { + issueMap := map[string]string{ + "empty-rules": "E001", + "errors": "E002", + "known-properties": "K001", + } + + if issueMap[content] == "" { + return "U001" + } + + return issueMap[content] +} +``` + +Here is the complete code: + +```go +package main + +import ( + "fmt" + "log" + + "github.com/deepsourcelabs/deepsource-go/analyzers" + "github.com/deepsourcelabs/deepsource-go/analyzers/processors" +) + +func main() { + rp := processors.RegexProcessor{ + Pattern: `(?P.+): line (?P\d+), col (?P\d+), (?P.+) \((?P.+)\)`, + IssueCodeProcessor: issueProcessor, + } + + a := analyzers.CLIRunner{ + Name: "csslint", + Command: "csslint", + Args: []string{"--format=compact", "."}, + Processor: &rp, + } + + err := a.Run() + if err != nil { + log.Fatalln(err) + } + + report, err := a.Processor.Process(a.Stdout()) + if err != nil { + log.Fatalln(err) + } + + err = a.SaveReport(report) + if err != nil { + log.Fatalln(err) + } +} + +func issueProcessor(content string) string { + issueMap := map[string]string{ + "empty-rules": "E001", + "errors": "E002", + "known-properties": "K001", + } + + if issueMap[content] == "" { + return "U001" + } + + return issueMap[content] +} +``` + +## Running our analyzer + +Let's run the analyzer. We must see the report saved as a JSON file (under `$TOOLBOX_PATH/analysis_report.json`): + +```json +{ + "issues": [ + { + "issue_code": "E001", + "issue_text": "Warning - Rule is empty.", + "location": { + "path": "/home/testdir/file1.css", + "position": { + "begin": { + "line": 1, + "column": 1 + }, + "end": { + "line": 0, + "column": 0 + } + } + } + }, + { + "issue_code": "K001", + "issue_text": "Warning - Expected (\u003ccolor\u003e) but found '\"blue\" size'.", + "location": { + "path": "/home/testdir/file1.css", + "position": { + "begin": { + "line": 4, + "column": 2 + }, + "end": { + "line": 0, + "column": 0 + } + } + } + }, + { + "issue_code": "E002", + "issue_text": "Error - Expected RBRACE at line 5, col 6.", + "location": { + "path": "/home/testdir/file1.css", + "position": { + "begin": { + "line": 5, + "column": 6 + }, + "end": { + "line": 0, + "column": 0 + } + } + } + } + ], + "errors": null, + "extra_data": null +} +``` diff --git a/guides/writing-analyzers.md b/guides/writing-analyzers.md index 9247c6c..906ea29 100644 --- a/guides/writing-analyzers.md +++ b/guides/writing-analyzers.md @@ -121,19 +121,66 @@ The report is then saved to `$TOOLBOX_PATH/analysis_report.json`. ## Running our analyzer -Wow! We just implemented our own custom analyzer! - -On running the analyzer, we must see the report saved as a JSON file: +On running the analyzer, we must see the report saved as a JSON file (under `$TOOLBOX_PATH/analysis_report.json`): ```json -[ - { - "code": "SA4017", - "text": "Sprint is a pure function but its return value is ignored", - "short_desc": "Sprint is a pure function but its return value is ignored", - "desc": "/home/aadhav/analyzer-go-sdk/playground/sa4017.md" - } -] +{ + "issues": [ + { + "issue_code": "U1000", + "issue_text": "func trigger is unused ", + "location": { + "path": "analyzers/testdata/src/staticcheck/staticcheck.go", + "position": { + "begin": { + "line": 5, + "column": 6 + }, + "end": { + "line": 0, + "column": 0 + } + } + } + }, + { + "issue_code": "SA4017", + "issue_text": "Sprint is a pure function but its return value is ignored ", + "location": { + "path": "analyzers/testdata/src/staticcheck/staticcheck.go", + "position": { + "begin": { + "line": 6, + "column": 2 + }, + "end": { + "line": 0, + "column": 0 + } + } + } + }, + { + "issue_code": "S1039", + "issue_text": "unnecessary use of fmt.Sprint ", + "location": { + "path": "analyzers/testdata/src/staticcheck/staticcheck.go", + "position": { + "begin": { + "line": 6, + "column": 2 + }, + "end": { + "line": 0, + "column": 0 + } + } + } + } + ], + "errors": null, + "extra_data": null +} ``` ## Generating TOML files for issues From a101b861121d3a6755b4713f0e78ffac40c2df41 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 14 Jun 2022 16:47:51 +0530 Subject: [PATCH 15/24] tests(analysistest): add integration test for CSS * add integration test for CSS using csslint * cleanup tests --- analyzers/analysistest/analysistest.go | 3 + analyzers/sdk_test.go | 201 +++++++++------------ analyzers/testdata/src/csslint/csslint.css | 6 + 3 files changed, 90 insertions(+), 120 deletions(-) create mode 100644 analyzers/testdata/src/csslint/csslint.css diff --git a/analyzers/analysistest/analysistest.go b/analyzers/analysistest/analysistest.go index 2f66f19..d8002c8 100644 --- a/analyzers/analysistest/analysistest.go +++ b/analyzers/analysistest/analysistest.go @@ -11,6 +11,7 @@ import ( "github.com/deepsourcelabs/deepsource-go/analyzers/types" sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/css" "github.com/smacker/go-tree-sitter/golang" ) @@ -113,6 +114,8 @@ func getLanguage(filename string) (*sitter.Language, error) { switch extension { case ".go": return golang.GetLanguage(), nil + case ".css": + return css.GetLanguage(), nil default: return nil, errors.New("language not supported") } diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index 7e89ace..4848f90 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -1,112 +1,20 @@ package analyzers import ( - "bytes" "encoding/json" "errors" "os" "path" "reflect" - "regexp" - "strconv" - "strings" "testing" "github.com/BurntSushi/toml" "github.com/deepsourcelabs/deepsource-go/analyzers/analysistest" + "github.com/deepsourcelabs/deepsource-go/analyzers/processors" "github.com/deepsourcelabs/deepsource-go/analyzers/types" "github.com/deepsourcelabs/deepsource-go/analyzers/utils" ) -type StaticCheckProcessor struct{} - -// StaticCheck processor returns a DeepSource-compatible analysis report from staticcheck's results. -func (*StaticCheckProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { - var issues []types.Issue - - // trim newline from buffer output - lines := strings.Split(buf.String(), "\n") - - for _, line := range lines { - // trim spaces - line = strings.TrimSpace(line) - if line == "" { - break - } - - // compile regular expression for parsing unix format - - // group descriptions: - // 0: complete string - // 1: path - // 2: line number - // 3: column number - // 4: message - exp, err := regexp.Compile("(.+):(.):(.): (.+)") - if err != nil { - return types.AnalysisReport{}, err - } - - // get groups - groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) - if len(groups) == 0 { - return types.AnalysisReport{}, errors.New("failed to parse output string") - } - - // convert line and column numbers to int - line, err := strconv.Atoi(groups[0][2]) - if err != nil { - return types.AnalysisReport{}, err - } - - col, err := strconv.Atoi(groups[0][3]) - if err != nil { - return types.AnalysisReport{}, err - } - - // compile regular expression for parsing staticcheck message - - // group descriptions: - // 0: complete string - // 1: partial message string - // 2: issue code - // 3: parentheses - messageExp, err := regexp.Compile("(.+)[(](.+)(.+)") - if err != nil { - return types.AnalysisReport{}, err - } - messageGroups := messageExp.FindAllStringSubmatch(groups[0][4], -1) - if len(messageGroups) == 0 { - return types.AnalysisReport{}, errors.New("failed to parse message") - } - - // populate issue - issue := types.Issue{ - IssueCode: messageGroups[0][2], - IssueText: groups[0][4], - Location: types.Location{ - Path: groups[0][1], - Position: types.Position{ - Begin: types.Coordinate{ - Line: line, - Column: col, - }, - }, - }, - } - - issues = append(issues, issue) - } - - // populate report - report := types.AnalysisReport{ - Issues: issues, - } - - // return report - return report, nil -} - func TestAnalyzer(t *testing.T) { t.Run("Run staticcheck as DeepSource Analyzer", func(t *testing.T) { // set environment variables @@ -114,56 +22,109 @@ func TestAnalyzer(t *testing.T) { t.Setenv("TOOLBOX_PATH", tempDir) t.Setenv("REPO_ROOT", tempDir) + rp := processors.RegexProcessor{ + Pattern: `(?P.+):(?P\d+):(?P\d+): (?P.+)\((?P\w+)\)`, + } + a := CLIRunner{ Name: "staticcheck", Command: "staticcheck", Args: []string{"-f", "text", "./testdata/src/staticcheck/..."}, - Processor: &StaticCheckProcessor{}, + Processor: &rp, } - err := a.Run() + err := testAnalyzer(a, tempDir, "testdata/src/staticcheck/staticcheck.go") if err != nil { t.Fatal(err) } + }) - processedReport, err := a.Processor.Process(a.Stdout()) - if err != nil { - t.Fatal(err) - } + t.Run("Run csslint as DeepSource Analyzer", func(t *testing.T) { + // set environment variables + tempDir := t.TempDir() + t.Setenv("TOOLBOX_PATH", tempDir) + t.Setenv("REPO_ROOT", tempDir) - // save report - err = a.SaveReport(processedReport) - if err != nil { - t.Fatal(err) - } + issueProcessor := func(content string) string { + issueMap := map[string]string{ + "empty-rules": "E001", + "errors": "E002", + "known-properties": "K001", + } - // read the generated report - generatedFile := path.Join(tempDir, "analysis_report.json") - reportContent, err := os.ReadFile(generatedFile) - if err != nil { - t.Fatal(err) + if issueMap[content] == "" { + return "U001" + } + + return issueMap[content] } - var report types.AnalysisReport - err = json.Unmarshal(reportContent, &report) - if err != nil { - t.Fatal(err) + rp := processors.RegexProcessor{ + Pattern: `(?P.+): line (?P\d+), col (?P\d+), (?P.+) \((?P.+)\)`, + IssueCodeProcessor: issueProcessor, } - // do a verification check for the generated report - err = analysistest.Verify(report, "testdata/src/staticcheck/staticcheck.go") - if err != nil { - t.Fatal(err) + a := CLIRunner{ + Name: "csslint", + Command: "csslint", + Args: []string{"--format=compact", "."}, + Processor: &rp, } - // cleanup after test - err = os.Remove(generatedFile) + err := testAnalyzer(a, tempDir, "testdata/src/csslint/csslint.css") if err != nil { t.Fatal(err) } + }) +} - // test TOML generation +func testAnalyzer(a CLIRunner, tempDir string, triggerFilename string) error { + err := a.Run() + if err != nil { + return err + } + + processedReport, err := a.Processor.Process(a.Stdout()) + if err != nil { + return err + } + + // save report + err = a.SaveReport(processedReport) + if err != nil { + return err + } + + // read the generated report + generatedFile := path.Join(tempDir, "analysis_report.json") + reportContent, err := os.ReadFile(generatedFile) + if err != nil { + return err + } + + var report types.AnalysisReport + err = json.Unmarshal(reportContent, &report) + if err != nil { + return err + } + + // do a verification check for the generated report + err = analysistest.Verify(report, triggerFilename) + if err != nil { + return err + } + + // cleanup after test + err = os.Remove(generatedFile) + if err != nil { + return err + } + + return nil +} +func TestUtils(t *testing.T) { + t.Run("test TOML generation", func(t *testing.T) { // fetch parsed issues issues, err := utils.ParseIssues("testdata/issues.toml") if err != nil { diff --git a/analyzers/testdata/src/csslint/csslint.css b/analyzers/testdata/src/csslint/csslint.css new file mode 100644 index 0000000..be8731b --- /dev/null +++ b/analyzers/testdata/src/csslint/csslint.css @@ -0,0 +1,6 @@ +.button{} /* raise: E001 */ + +.h1{ + color: "blue" /* raise: E002 */ + size: "unknown" /* raise: K001 */ +} From bb2135c7c6059dec5f42a562405fedd2b4d4e3ca Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 14 Jun 2022 16:52:53 +0530 Subject: [PATCH 16/24] chore: fix DeepSource issues --- analyzers/sdk_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index 4848f90..9edc35a 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -26,14 +26,14 @@ func TestAnalyzer(t *testing.T) { Pattern: `(?P.+):(?P\d+):(?P\d+): (?P.+)\((?P\w+)\)`, } - a := CLIRunner{ + a := &CLIRunner{ Name: "staticcheck", Command: "staticcheck", Args: []string{"-f", "text", "./testdata/src/staticcheck/..."}, Processor: &rp, } - err := testAnalyzer(a, tempDir, "testdata/src/staticcheck/staticcheck.go") + err := testRunner(a, tempDir, "testdata/src/staticcheck/staticcheck.go") if err != nil { t.Fatal(err) } @@ -64,21 +64,21 @@ func TestAnalyzer(t *testing.T) { IssueCodeProcessor: issueProcessor, } - a := CLIRunner{ + a := &CLIRunner{ Name: "csslint", Command: "csslint", Args: []string{"--format=compact", "."}, Processor: &rp, } - err := testAnalyzer(a, tempDir, "testdata/src/csslint/csslint.css") + err := testRunner(a, tempDir, "testdata/src/csslint/csslint.css") if err != nil { t.Fatal(err) } }) } -func testAnalyzer(a CLIRunner, tempDir string, triggerFilename string) error { +func testRunner(a *CLIRunner, tempDir string, triggerFilename string) error { err := a.Run() if err != nil { return err From 180769b354b5180f6b7bd3b8b3bf9903e9783d1f Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Wed, 15 Jun 2022 17:37:53 +0530 Subject: [PATCH 17/24] refactor(*): refactor tests and add guides * refactor tests * add guide for testing analyzers --- README.md | 2 +- analyzers/analysistest/analysistest.go | 150 +++++++++++++------ analyzers/{utils/utils.go => build/build.go} | 6 +- analyzers/processors/regex.go | 10 +- analyzers/sdk_test.go | 95 +++++------- guides/css-analyzer.md | 12 +- guides/testing-analyzers.md | 113 ++++++++++++++ 7 files changed, 273 insertions(+), 115 deletions(-) rename analyzers/{utils/utils.go => build/build.go} (96%) create mode 100644 guides/testing-analyzers.md diff --git a/README.md b/README.md index b7652d9..aa28eff 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,8 @@ The Go SDK makes it easier for developers to integrate an existing analyzer with Here are some extensive guides on working with the SDK: - [Writing custom analyzers](guides/writing-analyzers.md) +- [Testing Analyzers](guides/testing-analyzers.md) - [Writing a CSS Analyzer](guides/css-analyzer.md) - ## Community Interested in DeepSource and want to chat with the community? Feel free to join our [Discord server](http://deepsource.io/discord). diff --git a/analyzers/analysistest/analysistest.go b/analyzers/analysistest/analysistest.go index d8002c8..d09a6ec 100644 --- a/analyzers/analysistest/analysistest.go +++ b/analyzers/analysistest/analysistest.go @@ -2,8 +2,11 @@ package analysistest import ( "context" + "encoding/json" "errors" + "io/fs" "os" + "path" "path/filepath" "regexp" "sort" @@ -21,71 +24,130 @@ type ParsedIssue struct { Line int } -// Verify compares the generated report and parsed issues using tree-sitter. -func Verify(report types.AnalysisReport, filename string) error { - parser := sitter.NewParser() - - // get language - lang, err := getLanguage(filename) +func Run(directory string) error { + // read the generated report from TOOLBOX_PATH + toolboxPath := os.Getenv("TOOLBOX_PATH") + generatedFile := path.Join(toolboxPath, "analysis_report.json") + reportContent, err := os.ReadFile(generatedFile) if err != nil { return err } - parser.SetLanguage(lang) - // read report - content, err := os.ReadFile(filename) + var report types.AnalysisReport + err = json.Unmarshal(reportContent, &report) if err != nil { return err } - // generate tree - ctx := context.Background() - tree, err := parser.ParseCtx(ctx, nil, content) + // do a verification check for the generated report + err = verifyReport(report, directory) if err != nil { return err } - // create a query for fetching comments - queryStr := "(comment) @comment" - query, err := sitter.NewQuery([]byte(queryStr), lang) + // cleanup after test + err = os.Remove(generatedFile) if err != nil { return err } - // execute query on root node - qc := sitter.NewQueryCursor() - n := tree.RootNode() - qc.Exec(query, n) - defer qc.Close() + return nil +} + +// getFilenames returns the filenames for a directory. +func getFilenames(directory string) ([]string, error) { + var files []string + filepath.Walk(directory, func(path string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + + // if not a directory, append to files + if !info.IsDir() { + filename := filepath.Join(directory, info.Name()) + files = append(files, filename) + } + + return nil + }) + return files, nil +} + +// Verify compares the generated report and parsed issues using tree-sitter. +func verifyReport(report types.AnalysisReport, directory string) error { var parsedIssues []ParsedIssue - // iterate over matches - for { - m, ok := qc.NextMatch() - if !ok { - break + // get filenames + files, err := getFilenames(directory) + if err != nil { + return err + } + + parser := sitter.NewParser() + + // walk through each file and get issues + for _, filename := range files { + // get language + lang, err := getLanguage(filename) + if err != nil { + return err + } + parser.SetLanguage(lang) + + // read report + content, err := os.ReadFile(filename) + if err != nil { + return err + } + + // generate tree + ctx := context.Background() + tree, err := parser.ParseCtx(ctx, nil, content) + if err != nil { + return err } - for _, c := range m.Captures { - // get node content - node := c.Node - nodeContent := node.Content(content) - - // check if the comment contains raise annotation - if strings.Contains(nodeContent, "raise") { - // find match using expression - exp := regexp.MustCompile(`.+ raise: `) - submatches := exp.FindStringSubmatch(nodeContent) - - if len(submatches) != 0 { - substrings := exp.Split(nodeContent, -1) - if len(substrings) > 1 { - issueCodes := strings.Split(substrings[1], ",") - // add issue to parsedIssues - for _, issueCode := range issueCodes { - parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} - parsedIssues = append(parsedIssues, parsedIssue) + // create a query for fetching comments + queryStr := "(comment) @comment" + query, err := sitter.NewQuery([]byte(queryStr), lang) + if err != nil { + return err + } + + // execute query on root node + qc := sitter.NewQueryCursor() + n := tree.RootNode() + qc.Exec(query, n) + defer qc.Close() + + // iterate over matches + for { + m, ok := qc.NextMatch() + if !ok { + break + } + + for _, c := range m.Captures { + // get node content + node := c.Node + nodeContent := node.Content(content) + + // check if the comment contains raise annotation + if strings.Contains(nodeContent, "raise") { + // find match using expression + exp := regexp.MustCompile(`.+ raise: `) + submatches := exp.FindStringSubmatch(nodeContent) + + if len(submatches) != 0 { + substrings := exp.Split(nodeContent, -1) + if len(substrings) > 1 { + issueCodes := strings.Split(substrings[1], ",") + // add issue to parsedIssues + for _, issueCode := range issueCodes { + parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} + parsedIssues = append(parsedIssues, parsedIssue) + } } } } diff --git a/analyzers/utils/utils.go b/analyzers/build/build.go similarity index 96% rename from analyzers/utils/utils.go rename to analyzers/build/build.go index 0899562..934973c 100644 --- a/analyzers/utils/utils.go +++ b/analyzers/build/build.go @@ -1,4 +1,4 @@ -package utils +package build import ( "bytes" @@ -78,7 +78,7 @@ func ParseIssues(filename string) ([]IssueMeta, error) { } for _, issue := range issues { - // parse markdown content + // parse and sanitize markdown content desc, err := readMarkdown(issue.Description) if err != nil { return nil, err @@ -96,7 +96,7 @@ func ParseIssues(filename string) ([]IssueMeta, error) { return parsedIssues, nil } -// readMarkdown is a helper utility used for parsing markdown content. +// readMarkdown is a helper utility used for parsing and sanitizing markdown content. func readMarkdown(content string) (string, error) { // use the Github-flavored Markdown extension md := goldmark.New( diff --git a/analyzers/processors/regex.go b/analyzers/processors/regex.go index c716d5e..cdda20f 100644 --- a/analyzers/processors/regex.go +++ b/analyzers/processors/regex.go @@ -10,13 +10,13 @@ import ( "github.com/deepsourcelabs/deepsource-go/analyzers/types" ) -// IssueCodeProcessor is used when an analyzer doesn't support issue codes. IssueCodeProcessor takes in the content of the "issue_code" named group and returns an appropriate issue code. If not implemented, it fallbacks to using the content as the issue code. -type IssueCodeProcessor func(string) string +// IssueCodeGenerator is used when an analyzer doesn't support issue codes. IssueCodeGenerator reads the content of the "issue_code" named group and returns an appropriate issue code. If not implemented, it fallbacks to using the content as the issue code. +type IssueCodeGenerator func(string) string // RegexProcessor utilizes regular expressions for processing. type RegexProcessor struct { Pattern string - IssueCodeProcessor IssueCodeProcessor + IssueCodeGenerator IssueCodeGenerator } func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) { @@ -64,10 +64,10 @@ func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) case "message": issue.IssueText = content case "issue_code": - if r.IssueCodeProcessor == nil { + if r.IssueCodeGenerator == nil { issue.IssueCode = content } else { - issue.IssueCode = r.IssueCodeProcessor(content) + issue.IssueCode = r.IssueCodeGenerator(content) } default: continue diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index 9edc35a..a9ae87c 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -1,7 +1,6 @@ package analyzers import ( - "encoding/json" "errors" "os" "path" @@ -10,9 +9,8 @@ import ( "github.com/BurntSushi/toml" "github.com/deepsourcelabs/deepsource-go/analyzers/analysistest" + "github.com/deepsourcelabs/deepsource-go/analyzers/build" "github.com/deepsourcelabs/deepsource-go/analyzers/processors" - "github.com/deepsourcelabs/deepsource-go/analyzers/types" - "github.com/deepsourcelabs/deepsource-go/analyzers/utils" ) func TestAnalyzer(t *testing.T) { @@ -33,7 +31,22 @@ func TestAnalyzer(t *testing.T) { Processor: &rp, } - err := testRunner(a, tempDir, "testdata/src/staticcheck/staticcheck.go") + err := a.Run() + if err != nil { + t.Fatal(err) + } + + report, err := a.Processor.Process(a.Stdout()) + if err != nil { + t.Fatal(err) + } + + err = a.SaveReport(report) + if err != nil { + t.Fatal(err) + } + + err = analysistest.Run("./testdata/src/staticcheck") if err != nil { t.Fatal(err) } @@ -45,7 +58,7 @@ func TestAnalyzer(t *testing.T) { t.Setenv("TOOLBOX_PATH", tempDir) t.Setenv("REPO_ROOT", tempDir) - issueProcessor := func(content string) string { + issueCodeGenerator := func(content string) string { issueMap := map[string]string{ "empty-rules": "E001", "errors": "E002", @@ -61,7 +74,7 @@ func TestAnalyzer(t *testing.T) { rp := processors.RegexProcessor{ Pattern: `(?P.+): line (?P\d+), col (?P\d+), (?P.+) \((?P.+)\)`, - IssueCodeProcessor: issueProcessor, + IssueCodeGenerator: issueCodeGenerator, } a := &CLIRunner{ @@ -71,68 +84,38 @@ func TestAnalyzer(t *testing.T) { Processor: &rp, } - err := testRunner(a, tempDir, "testdata/src/csslint/csslint.css") + err := a.Run() if err != nil { t.Fatal(err) } - }) -} -func testRunner(a *CLIRunner, tempDir string, triggerFilename string) error { - err := a.Run() - if err != nil { - return err - } - - processedReport, err := a.Processor.Process(a.Stdout()) - if err != nil { - return err - } - - // save report - err = a.SaveReport(processedReport) - if err != nil { - return err - } - - // read the generated report - generatedFile := path.Join(tempDir, "analysis_report.json") - reportContent, err := os.ReadFile(generatedFile) - if err != nil { - return err - } - - var report types.AnalysisReport - err = json.Unmarshal(reportContent, &report) - if err != nil { - return err - } - - // do a verification check for the generated report - err = analysistest.Verify(report, triggerFilename) - if err != nil { - return err - } - - // cleanup after test - err = os.Remove(generatedFile) - if err != nil { - return err - } - - return nil + report, err := a.Processor.Process(a.Stdout()) + if err != nil { + t.Fatal(err) + } + + err = a.SaveReport(report) + if err != nil { + t.Fatal(err) + } + + err = analysistest.Run("./testdata/src/csslint") + if err != nil { + t.Fatal(err) + } + }) } func TestUtils(t *testing.T) { t.Run("test TOML generation", func(t *testing.T) { // fetch parsed issues - issues, err := utils.ParseIssues("testdata/issues.toml") + issues, err := build.ParseIssues("testdata/issues.toml") if err != nil { t.Fatal(err) } // generate TOML files - err = utils.BuildTOML(issues, "testdata/toml") + err = build.BuildTOML(issues, "testdata/toml") if err != nil { t.Fatal(err) } @@ -144,8 +127,8 @@ func TestUtils(t *testing.T) { } // parse issues from each TOML file - var parsedIssue utils.IssueMeta - var parsedIssues []utils.IssueMeta + var parsedIssue build.IssueMeta + var parsedIssues []build.IssueMeta for _, f := range files { filePath := path.Join("testdata/toml", f.Name()) diff --git a/guides/css-analyzer.md b/guides/css-analyzer.md index 910f952..d5d4872 100644 --- a/guides/css-analyzer.md +++ b/guides/css-analyzer.md @@ -18,16 +18,16 @@ We will be using csslint's `--format=compact` for getting results. Since the com /home/testdir/file1.css: line 5, col 6, Error - Expected RBRACE at line 5, col 6. (errors) ``` -In order to fulfill our requirements, let's make use of the `IssueCodeProcessor` provided by `RegexProcessor`. +In order to fulfill our requirements, let's make use of the `IssueCodeGenerator` provided by `RegexProcessor`. -`IssueCodeProcessor` is used when an analyzer doesn't support issue codes. It takes the content of the `issue_code` named group (from `Pattern`) and returns the issue code. +`IssueCodeGenerator` is used when an analyzer doesn't support issue codes. It takes the content of the `issue_code` named group (from `Pattern`) and returns the issue code. > **Note**: > -> If `IssueCodeProcessor` is not implemented, it fallbacks to using the content as the issue code. +> If `IssueCodeGenerator` is not implemented, it fallbacks to using the content as the issue code. ```go -func issueProcessor(content string) string { +func issueCodeGenerator(content string) string { issueMap := map[string]string{ "empty-rules": "E001", "errors": "E002", @@ -58,7 +58,7 @@ import ( func main() { rp := processors.RegexProcessor{ Pattern: `(?P.+): line (?P\d+), col (?P\d+), (?P.+) \((?P.+)\)`, - IssueCodeProcessor: issueProcessor, + IssueCodeGenerator: issueCodeGenerator, } a := analyzers.CLIRunner{ @@ -84,7 +84,7 @@ func main() { } } -func issueProcessor(content string) string { +func issueCodeGenerator(content string) string { issueMap := map[string]string{ "empty-rules": "E001", "errors": "E002", diff --git a/guides/testing-analyzers.md b/guides/testing-analyzers.md new file mode 100644 index 0000000..c2d0a83 --- /dev/null +++ b/guides/testing-analyzers.md @@ -0,0 +1,113 @@ +# Testing custom analyzers + +In this guide, we will be testing our custom analyzer. + +## Pre-requisites + +This guide assumes that you are familiar with [writing custom analyzers for DeepSource](writing-analyzers.md). + +## Annotations for testing + +The test runner requires a file (or a set of files) to be annotated with an appropriate issue code in the form of a comment, with the format `raise: `. + +These files would then be used by the test runner for validating the reports generated by the analyzer. + +For example, `staticcheck` raises the following codes at these lines (for the code below): +- `U1000` at line 5 +- `SA4017` and `S1039` at line 6 + +```go +package pkg + +import "fmt" + +func trigger() { + fmt.Sprint("trigger") +} +``` + +The file should then be annotated with the following comments: + +```go +package pkg + +import "fmt" + +func trigger() { // raise: U1000 + fmt.Sprint("trigger") // raise: SA4017, S1039 +} +``` + +A single annotation can contain multiple issue codes. However, the annotation should be in the form of a comment, with the format `raise: `. + +## Writing tests for analyzers + +The DeepSource Go SDK provides a easy way to test your analyzers using the built-in test runner. The test runner reads the report generated by the analyzer and validates it with the annotated files. + +> **Note**: +> +> Validation is done by reading the annotated issue codes in the files. These issue codes are matched with the reported issues (`analysis_report.json`) - if the issue codes don't match during validation, the test fails. + +Here is the flow for testing custom analyzers: + +- Run the analyzer (`Run()`) +- Generate the report through the processor (`Processor.Process()`) +- Save the report (`SaveReport()`) +- Use the built-in test runner (`analysistest.Run()`) + +The test runner accepts the directory containing the annotated files as an argument. + +Here is an example on how to use the built-in test runner for writing tests: + +```go +package tests + +package analyzers + +import ( + "testing" + + "github.com/deepsourcelabs/deepsource-go/analyzers/analysistest" + "github.com/deepsourcelabs/deepsource-go/analyzers/processors" +) + +func TestAnalyzer(t *testing.T) { + t.Run("Run staticcheck as DeepSource Analyzer", func(t *testing.T) { + // set environment variables + tempDir := t.TempDir() + t.Setenv("TOOLBOX_PATH", tempDir) + t.Setenv("REPO_ROOT", tempDir) + + rp := processors.RegexProcessor{ + Pattern: `(?P.+):(?P\d+):(?P\d+): (?P.+)\((?P\w+)\)`, + } + + a := &CLIRunner{ + Name: "staticcheck", + Command: "staticcheck", + Args: []string{"-f", "text", "./testdata/src/staticcheck/..."}, + Processor: &rp, + } + + err := a.Run() + if err != nil { + t.Fatal(err) + } + + report, err := a.Processor.Process(a.Stdout()) + if err != nil { + t.Fatal(err) + } + + err = a.SaveReport(report) + if err != nil { + t.Fatal(err) + } + + err = analysistest.Run("./testdata/src/staticcheck") + if err != nil { + t.Fatal(err) + } + }) +} +``` From e5a755452840c0f69fe4fdd7604363a5055af69c Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Wed, 15 Jun 2022 17:42:56 +0530 Subject: [PATCH 18/24] docs(guides): fix TOML fields for issues.toml --- guides/writing-analyzers.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/guides/writing-analyzers.md b/guides/writing-analyzers.md index 906ea29..56a07de 100644 --- a/guides/writing-analyzers.md +++ b/guides/writing-analyzers.md @@ -194,9 +194,9 @@ For example, we have `issues.toml` as the file containing details for all issues ```toml [[issues]] -Code = "SA4017" -Text = "Sprint is a pure function but its return value is ignored" -ShortDescription = "Sprint is a pure function but its return value is ignored" +IssueCode = "SA4017" +Category = "bug-risk" +Title = "Sprint is a pure function but its return value is ignored" Description = """ ## Sample """ @@ -223,8 +223,8 @@ The TOML files are generated at `$REPO_ROOT/.deepsource/analyzers/issues/ Date: Wed, 15 Jun 2022 17:52:44 +0530 Subject: [PATCH 19/24] ci: install csslint --- .github/workflows/tests.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9f7628d..2287af2 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -14,7 +14,13 @@ jobs: go-version: ${{ matrix.go-version }} - name: Checkout code uses: actions/checkout@v3 - - name: Install analyzers + - name: Setup node + uses: actions/setup-node@v3 + with: + node-version: 14 + - name: Install staticcheck run: go install honnef.co/go/tools/cmd/staticcheck@latest + - name: Install csslint + run: npm install -g csslint - name: Run tests run: go test -v ./... From e9e47240ef02de92d394de4cfa53b7940932f87a Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Wed, 15 Jun 2022 18:18:10 +0530 Subject: [PATCH 20/24] feat(analysistest): add all tree-sitter languages --- analyzers/analysistest/analysistest.go | 67 +++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/analyzers/analysistest/analysistest.go b/analyzers/analysistest/analysistest.go index d09a6ec..3e26158 100644 --- a/analyzers/analysistest/analysistest.go +++ b/analyzers/analysistest/analysistest.go @@ -14,8 +14,29 @@ import ( "github.com/deepsourcelabs/deepsource-go/analyzers/types" sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/bash" + "github.com/smacker/go-tree-sitter/c" + "github.com/smacker/go-tree-sitter/cpp" + "github.com/smacker/go-tree-sitter/csharp" "github.com/smacker/go-tree-sitter/css" + "github.com/smacker/go-tree-sitter/elm" "github.com/smacker/go-tree-sitter/golang" + "github.com/smacker/go-tree-sitter/hcl" + "github.com/smacker/go-tree-sitter/html" + "github.com/smacker/go-tree-sitter/java" + "github.com/smacker/go-tree-sitter/javascript" + "github.com/smacker/go-tree-sitter/lua" + "github.com/smacker/go-tree-sitter/ocaml" + "github.com/smacker/go-tree-sitter/php" + "github.com/smacker/go-tree-sitter/protobuf" + "github.com/smacker/go-tree-sitter/python" + "github.com/smacker/go-tree-sitter/ruby" + "github.com/smacker/go-tree-sitter/rust" + "github.com/smacker/go-tree-sitter/scala" + "github.com/smacker/go-tree-sitter/svelte" + "github.com/smacker/go-tree-sitter/toml" + "github.com/smacker/go-tree-sitter/typescript/typescript" + "github.com/smacker/go-tree-sitter/yaml" ) // ParsedIssue represents an issue parsed using tree-sitter. @@ -174,10 +195,52 @@ func getLanguage(filename string) (*sitter.Language, error) { extension := filepath.Ext(filename) switch extension { - case ".go": - return golang.GetLanguage(), nil + case ".sh": + return bash.GetLanguage(), nil + case ".c": + return c.GetLanguage(), nil + case ".cpp": + return cpp.GetLanguage(), nil + case ".cs": + return csharp.GetLanguage(), nil case ".css": return css.GetLanguage(), nil + case ".elm": + return elm.GetLanguage(), nil + case ".go": + return golang.GetLanguage(), nil + case ".hcl": + return hcl.GetLanguage(), nil + case ".html": + return html.GetLanguage(), nil + case ".java": + return java.GetLanguage(), nil + case ".js": + return javascript.GetLanguage(), nil + case ".lua": + return lua.GetLanguage(), nil + case ".ml": + return ocaml.GetLanguage(), nil + case ".php": + return php.GetLanguage(), nil + case ".pb", ".proto": + return protobuf.GetLanguage(), nil + case ".py": + return python.GetLanguage(), nil + case ".rb": + return ruby.GetLanguage(), nil + case ".rs": + return rust.GetLanguage(), nil + case ".scala": + return scala.GetLanguage(), nil + case ".svelte": + return svelte.GetLanguage(), nil + case ".toml": + return toml.GetLanguage(), nil + case ".ts": + return typescript.GetLanguage(), nil + case ".yaml": + return yaml.GetLanguage(), nil default: return nil, errors.New("language not supported") } From f7d38a1d8b36b415d1882b1beb3a969464f18165 Mon Sep 17 00:00:00 2001 From: Sourya Vatsyayan Date: Wed, 15 Jun 2022 20:12:25 +0530 Subject: [PATCH 21/24] Apply suggestions from code review --- analyzers/build/build.go | 8 ++++---- analyzers/testdata/issues.toml | 16 ++++++++-------- guides/writing-analyzers.md | 20 ++++++++++---------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/analyzers/build/build.go b/analyzers/build/build.go index 934973c..fda328f 100644 --- a/analyzers/build/build.go +++ b/analyzers/build/build.go @@ -68,10 +68,10 @@ func ParseIssues(filename string) ([]IssueMeta, error) { for _, issueTOML := range issuesTOML.Issues { is := IssueMeta{ - IssueCode: issueTOML["IssueCode"].(string), - Category: issueTOML["Category"].(string), - Title: issueTOML["Title"].(string), - Description: issueTOML["Description"].(string), + IssueCode: issueTOML["issue_code"].(string), + Category: issueTOML["category"].(string), + Title: issueTOML["title"].(string), + Description: issueTOML["description"].(string), } issues = append(issues, is) diff --git a/analyzers/testdata/issues.toml b/analyzers/testdata/issues.toml index 2f409b1..df97cdd 100644 --- a/analyzers/testdata/issues.toml +++ b/analyzers/testdata/issues.toml @@ -1,17 +1,17 @@ [[issues]] -IssueCode = "SA4017" -Category = "bug risk" -Title = "Sprint is a pure function but its return value is ignored" -Description = """ +issue_code = "SA4017" +category = "bug risk" +title = "Sprint is a pure function but its return value is ignored" +description = """ ## hello """ [[issues]] -IssueCode = "S1039" -Category = "style" -Title = "unnecessary use of fmt.Sprint" -Description = """ +issue_code = "S1039" +category = "style" +title = "unnecessary use of fmt.Sprint" +description = """ ## Example """ diff --git a/guides/writing-analyzers.md b/guides/writing-analyzers.md index 56a07de..5e7e8a2 100644 --- a/guides/writing-analyzers.md +++ b/guides/writing-analyzers.md @@ -40,7 +40,7 @@ func main() { Name: "staticcheck", Command: "staticcheck", Args: []string{"-f", "text", "./..."}, - Processor: &processor, <=== will be implemented later + Processor: &processor, // <=== will be implemented later } err := a.Run() @@ -194,11 +194,11 @@ For example, we have `issues.toml` as the file containing details for all issues ```toml [[issues]] -IssueCode = "SA4017" -Category = "bug-risk" -Title = "Sprint is a pure function but its return value is ignored" -Description = """ -## Sample +issue_code = "SA4017" +category = "bug-risk" +title = "Sprint is a pure function but its return value is ignored" +description = """ +Pure functions do not change the passed value but return a new value that is meant to be used. This issue is raised when values returned by pure functions are discarded. """ ``` @@ -223,8 +223,8 @@ The TOML files are generated at `$REPO_ROOT/.deepsource/analyzers/issues/ Date: Fri, 17 Jun 2022 11:26:24 +0530 Subject: [PATCH 22/24] tests(*): add unit tests --- analyzers/analysistest/analysistest.go | 6 +- analyzers/analysistest/analysistest_test.go | 99 +++++++++ analyzers/build/build.go | 169 ++++++++++------ analyzers/build/build_test.go | 213 ++++++++++++++++++++ analyzers/processors/regex.go | 94 +++++---- analyzers/processors/regex_test.go | 61 ++++++ analyzers/sdk_test.go | 23 ++- analyzers/types/types.go | 39 ++-- go.mod | 1 + go.sum | 2 + 10 files changed, 572 insertions(+), 135 deletions(-) create mode 100644 analyzers/analysistest/analysistest_test.go create mode 100644 analyzers/build/build_test.go create mode 100644 analyzers/processors/regex_test.go diff --git a/analyzers/analysistest/analysistest.go b/analyzers/analysistest/analysistest.go index 3e26158..f829471 100644 --- a/analyzers/analysistest/analysistest.go +++ b/analyzers/analysistest/analysistest.go @@ -45,6 +45,8 @@ type ParsedIssue struct { Line int } +type ParsedIssues []ParsedIssue + func Run(directory string) error { // read the generated report from TOOLBOX_PATH toolboxPath := os.Getenv("TOOLBOX_PATH") @@ -97,7 +99,7 @@ func getFilenames(directory string) ([]string, error) { // Verify compares the generated report and parsed issues using tree-sitter. func verifyReport(report types.AnalysisReport, directory string) error { - var parsedIssues []ParsedIssue + var parsedIssues ParsedIssues // get filenames files, err := getFilenames(directory) @@ -247,7 +249,7 @@ func getLanguage(filename string) (*sitter.Language, error) { } // compareReport is a helper which checks if the parsed issues are identical to the issues present in the report. -func compareReport(parsedIssues []ParsedIssue, report types.AnalysisReport) bool { +func compareReport(parsedIssues ParsedIssues, report types.AnalysisReport) bool { // sort report and parsedIssues by line number sort.Slice(parsedIssues, func(i, j int) bool { return parsedIssues[i].Line < parsedIssues[j].Line diff --git a/analyzers/analysistest/analysistest_test.go b/analyzers/analysistest/analysistest_test.go new file mode 100644 index 0000000..2a497af --- /dev/null +++ b/analyzers/analysistest/analysistest_test.go @@ -0,0 +1,99 @@ +package analysistest + +import ( + "testing" + + "github.com/deepsourcelabs/deepsource-go/analyzers/types" + "github.com/go-test/deep" +) + +type reportFactory struct { + values []reportMeta +} + +// reportMeta contains the metadata for an issue present in the report. +type reportMeta struct { + IssueCode string + Line int +} + +// Generate generates a report based on reportMeta values in reportFactory. +func (r *reportFactory) Generate() types.AnalysisReport { + var report types.AnalysisReport + var issues []types.Issue + + for _, rm := range r.values { + issue := types.Issue{ + IssueCode: rm.IssueCode, + } + + issue.Location.Position.Begin.Line = rm.Line + + issues = append(issues, issue) + } + + report.Issues = issues + return report +} + +func TestCompareReport(t *testing.T) { + var factory reportFactory + factory.values = []reportMeta{ + { + IssueCode: "E001", + Line: 1, + }, + { + IssueCode: "E002", + Line: 4, + }, + } + + // mock factory with dissimilar issue codes + var factoryMismatch reportFactory + factoryMismatch.values = []reportMeta{ + { + IssueCode: "E010", + Line: 1, + }, + { + IssueCode: "E002", + Line: 4, + }, + } + + cases := []struct { + description string + issues ParsedIssues + report types.AnalysisReport + expected bool + }{ + {"must return true for identical reports", []ParsedIssue{ + { + IssueCode: "E001", + Line: 1, + }, + { + IssueCode: "E002", + Line: 4, + }, + }, factory.Generate(), true}, + {"must return false for dissimilar reports", []ParsedIssue{ + { + IssueCode: "E001", + Line: 2, + }, + { + IssueCode: "E002", + Line: 4, + }, + }, factoryMismatch.Generate(), false}, + } + + for _, tc := range cases { + actual := compareReport(tc.issues, tc.report) + if diff := deep.Equal(actual, tc.expected); diff != nil { + t.Errorf("description: %s, %s", tc.description, diff) + } + } +} diff --git a/analyzers/build/build.go b/analyzers/build/build.go index 934973c..3a9248e 100644 --- a/analyzers/build/build.go +++ b/analyzers/build/build.go @@ -4,7 +4,7 @@ import ( "bytes" "errors" "fmt" - "io/ioutil" + "io" "os" "path" "sort" @@ -17,12 +17,14 @@ import ( // IssueMeta represents the issue present in a TOML file. type IssueMeta struct { - IssueCode string `toml:"code"` + IssueCode string `toml:"issue_code"` Category string `toml:"category"` Title string `toml:"title"` Description string `toml:"description"` } +type IssuesMeta []IssueMeta + // IssueTOML is used for decoding issues from a TOML file. type IssueTOML struct { Issues []map[string]interface{} @@ -33,9 +35,14 @@ func GenerateTOML() error { // root directory for the repository repoRoot := os.Getenv("REPO_ROOT") - // fetch parsed issues filename := path.Join(repoRoot, ".deepsource/analyzers/issues.toml") - issues, err := ParseIssues(filename) + f, err := os.Open(filename) + if err != nil { + return err + } + + // fetch issues + issues, err := FetchIssues(f) if err != nil { return err } @@ -50,50 +57,117 @@ func GenerateTOML() error { return nil } -// ParseIssues reads a TOML file containing all issues, and returns all issues as []IssueMeta. -func ParseIssues(filename string) ([]IssueMeta, error) { - content, err := ioutil.ReadFile(filename) +// FetchIssues reads a TOML file containing all issues, and returns all issues as IssuesMeta. +func FetchIssues(r io.Reader) (IssuesMeta, error) { + issues, err := readTOML(r) if err != nil { return nil, err } - var issues []IssueMeta - var parsedIssues []IssueMeta + parsedIssues, err := parseIssues(issues) + if err != nil { + return nil, err + } + + // sort issues (based on issue code) before returning + sort.Slice(parsedIssues, func(i, j int) bool { + return parsedIssues[i].IssueCode < parsedIssues[j].IssueCode + }) + + return parsedIssues, nil +} +// BuildTOML uses issues to generate TOML files to a directory. +func BuildTOML(issues IssuesMeta, rootDir string) error { + if len(issues) == 0 { + return errors.New("no issues found") + } + + for _, issue := range issues { + // The unique identifier (filename) is based on the issue code. TOML files cannot be generated for issues having an invalid/empty code. + if issue.IssueCode == "" { + return errors.New("invalid issue code. cannot generate toml") + } + + // generate file path based on root directory and filename + filename := fmt.Sprintf("%s.toml", issue.IssueCode) + tomlPath := path.Join(rootDir, filename) + + f, err := os.Create(tomlPath) + if err != nil { + return err + } + + // write to file + err = writeTOML(f, issue) + if err != nil { + return err + } + } + + return nil +} + +// readTOML reads content from a reader and returns issues. +func readTOML(r io.Reader) (IssuesMeta, error) { + // read content from reader + content, err := io.ReadAll(r) + if err != nil { + return nil, err + } + + // unmarshal TOML var issuesTOML IssueTOML err = toml.Unmarshal(content, &issuesTOML) if err != nil { return nil, err } + // generate issues + var issues IssuesMeta for _, issueTOML := range issuesTOML.Issues { + issueCode := "" + category := "" + title := "" + description := "" + + // handle interface conversions + if issueTOML["IssueCode"] != nil { + issueCode = issueTOML["IssueCode"].(string) + } + + if issueTOML["Category"] != nil { + category = issueTOML["Category"].(string) + } + + if issueTOML["Title"] != nil { + title = issueTOML["Title"].(string) + } + + if issueTOML["Description"] != nil { + description = issueTOML["Description"].(string) + } + is := IssueMeta{ - IssueCode: issueTOML["IssueCode"].(string), - Category: issueTOML["Category"].(string), - Title: issueTOML["Title"].(string), - Description: issueTOML["Description"].(string), + IssueCode: issueCode, + Category: category, + Title: title, + Description: description, } issues = append(issues, is) } - for _, issue := range issues { - // parse and sanitize markdown content - desc, err := readMarkdown(issue.Description) - if err != nil { - return nil, err - } + return issues, nil +} - issue.Description = desc - parsedIssues = append(parsedIssues, issue) +// writeTOML writes issue data to the writer. +func writeTOML(w io.Writer, issue IssueMeta) error { + if err := toml.NewEncoder(w).Encode(issue); err != nil { + return err } - // sort issues (based on issue code) before returning - sort.Slice(parsedIssues, func(i, j int) bool { - return parsedIssues[i].IssueCode < parsedIssues[j].IssueCode - }) - - return parsedIssues, nil + return nil } // readMarkdown is a helper utility used for parsing and sanitizing markdown content. @@ -116,43 +190,20 @@ func readMarkdown(content string) (string, error) { return sanitizedBody, nil } -// BuildTOML uses issues to generate TOML files to a directory. -func BuildTOML(issues []IssueMeta, rootDir string) error { - if len(issues) == 0 { - return errors.New("no issues found") - } +// parseIssues returns issues after parsing and sanitizing markdown content. +func parseIssues(issues IssuesMeta) (IssuesMeta, error) { + var parsedIssues IssuesMeta for _, issue := range issues { - // The unique identifier (filename) is based on the issue code. TOML files cannot be generated for issues having an invalid/empty code. - if issue.IssueCode == "" { - return errors.New("invalid issue code. cannot generate toml") - } - - // if rootDir doesn't exist, create one - if _, err := os.Stat(rootDir); err != nil { - err := os.Mkdir(rootDir, 0700) - if err != nil { - return err - } - } - - // generate file path based on root directory and filename - filename := fmt.Sprintf("%s.toml", issue.IssueCode) - tomlPath := path.Join(rootDir, filename) - - f, err := os.Create(tomlPath) + // parse and sanitize markdown content + desc, err := readMarkdown(issue.Description) if err != nil { - return err - } - - if err := toml.NewEncoder(f).Encode(issue); err != nil { - return err + return nil, err } - if err := f.Close(); err != nil { - return err - } + issue.Description = desc + parsedIssues = append(parsedIssues, issue) } - return nil + return parsedIssues, nil } diff --git a/analyzers/build/build_test.go b/analyzers/build/build_test.go new file mode 100644 index 0000000..aa8bd2c --- /dev/null +++ b/analyzers/build/build_test.go @@ -0,0 +1,213 @@ +package build + +import ( + "bytes" + "strings" + "testing" + + "github.com/go-test/deep" +) + +func TestReadMarkdown(t *testing.T) { + cases := []struct { + content string + expected string + }{ + {"# Sample", "

Sample

\n"}, + {"## Sample", "

Sample

\n"}, + {"`Sample`", "

Sample

\n"}, + {"```Sample```", "

Sample

\n"}, + {"[link](https://example.com)", `

link

` + "\n"}, + {"![image](https://sample.org/image.png)", `

image

` + "\n"}, + } + + for _, tc := range cases { + actual, err := readMarkdown(tc.content) + if err != nil { + t.Error(err) + } + + if actual != tc.expected { + t.Errorf("expected: %s, got: %s\n", tc.expected, actual) + } + } +} + +func TestReadTOML(t *testing.T) { + tomlNormal := ` +[[issues]] + +IssueCode = "SA4017" +Category = "bug-risk" +Title = "Sprint is a pure function but its return value is ignored" +Description = """ +## Description +""" + +[[issues]] + +IssueCode = "S1039" +Category = "style" +Title = "unnecessary use of fmt.Sprint" +Description = """ +# Example +""" +` + + expectedTOMLNormal := IssuesMeta{ + { + IssueCode: "SA4017", + Category: "bug-risk", + Title: "Sprint is a pure function but its return value is ignored", + Description: "## Description\n", + }, + { + IssueCode: "S1039", + Category: "style", + Title: "unnecessary use of fmt.Sprint", + Description: "# Example\n", + }, + } + + tomlBlank := `` + + var expectedTOMLBlank IssuesMeta + + tomlMissingDescription := ` +[[issues]] + +IssueCode = "SA4017" +Category = "bug-risk" +Title = "Sprint is a pure function but its return value is ignored" +` + + expectedTOMLMissingDescription := IssuesMeta{ + { + IssueCode: "SA4017", + Category: "bug-risk", + Title: "Sprint is a pure function but its return value is ignored", + }, + } + + cases := []struct { + description string + tomlContent string + expected IssuesMeta + }{ + {"normal TOML content with issues", tomlNormal, expectedTOMLNormal}, + {"blank TOML", tomlBlank, expectedTOMLBlank}, + {"TOML content with missing descriptions", tomlMissingDescription, expectedTOMLMissingDescription}, + } + + for _, tc := range cases { + r := strings.NewReader(tc.tomlContent) + actual, err := readTOML(r) + if err != nil { + t.Error(err) + } + + if diff := deep.Equal(actual, tc.expected); diff != nil { + t.Errorf("description: %s, %s", tc.description, diff) + } + } +} + +func TestWriteTOML(t *testing.T) { + // test buffer for writing TOML content + var testBuffer bytes.Buffer + + expectedTOML := `issue_code = "SA4017" +category = "bug-risk" +title = "Sprint is a pure function but its return value is ignored" +description = "example"` + "\n" + + expectedTOMLMissingDescription := + `issue_code = "SA4017" +category = "bug-risk" +title = "Sprint is a pure function but its return value is ignored" +description = ""` + "\n" + + cases := []struct { + description string + writer bytes.Buffer + issue IssueMeta + expected string + }{ + {"must write to writer", testBuffer, IssueMeta{ + IssueCode: "SA4017", + Category: "bug-risk", + Title: "Sprint is a pure function but its return value is ignored", + Description: "example", + }, expectedTOML}, + {"must write to writer in case fields are missing", testBuffer, IssueMeta{ + IssueCode: "SA4017", + Category: "bug-risk", + Title: "Sprint is a pure function but its return value is ignored", + }, expectedTOMLMissingDescription}, + } + + for _, tc := range cases { + err := writeTOML(&tc.writer, tc.issue) + if err != nil { + t.Error(err) + } + + // read content and reset test buffer + content := tc.writer.String() + defer tc.writer.Reset() + + if diff := deep.Equal(content, tc.expected); diff != nil { + t.Errorf("description: %s, %s", tc.description, diff) + } + } +} + +func TestParseIssues(t *testing.T) { + cases := []struct { + description string + issues IssuesMeta + expected IssuesMeta + }{ + {"must parse markdown", []IssueMeta{ + { + IssueCode: "E001", + Category: "bug-risk", + Title: "Handle error", + Description: "## Example", + }, + }, []IssueMeta{ + { + IssueCode: "E001", + Category: "bug-risk", + Title: "Handle error", + Description: "

Example

\n", + }, + }}, + {"must wrap text in paragraph", []IssueMeta{ + { + IssueCode: "E001", + Category: "bug-risk", + Title: "Handle error", + Description: "Example", + }, + }, []IssueMeta{ + { + IssueCode: "E001", + Category: "bug-risk", + Title: "Handle error", + Description: "

Example

\n", + }, + }}, + } + + for _, tc := range cases { + actual, err := parseIssues(tc.issues) + if err != nil { + t.Error(err) + } + + if diff := deep.Equal(actual, tc.expected); diff != nil { + t.Errorf("description: %s, %s", tc.description, diff) + } + } +} diff --git a/analyzers/processors/regex.go b/analyzers/processors/regex.go index cdda20f..b5d3271 100644 --- a/analyzers/processors/regex.go +++ b/analyzers/processors/regex.go @@ -32,51 +32,11 @@ func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) break } - exp, err := regexp.Compile(r.Pattern) + issue, err := populateIssue(line, r.Pattern, r.IssueCodeGenerator) if err != nil { return types.AnalysisReport{}, err } - // get groups - groupNames := exp.SubexpNames() - - var issue types.Issue - groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) - for groupIdx, content := range groups[0] { - groupName := groupNames[groupIdx] - - // populate issue using named groups - switch groupName { - case "filename": - issue.Location.Path = content - case "line": - line, err := strconv.Atoi(content) - if err != nil { - return types.AnalysisReport{}, err - } - issue.Location.Position.Begin.Line = line - case "column": - col, err := strconv.Atoi(content) - if err != nil { - return types.AnalysisReport{}, err - } - issue.Location.Position.Begin.Column = col - case "message": - issue.IssueText = content - case "issue_code": - if r.IssueCodeGenerator == nil { - issue.IssueCode = content - } else { - issue.IssueCode = r.IssueCodeGenerator(content) - } - default: - continue - } - } - if len(groups) == 0 { - return types.AnalysisReport{}, errors.New("failed to parse message") - } - issues = append(issues, issue) } @@ -88,3 +48,55 @@ func (r *RegexProcessor) Process(buf bytes.Buffer) (types.AnalysisReport, error) // return report return report, nil } + +// populateIssue returns an issue with the help of a regular expression based pattern and an issue code generator. +func populateIssue(line string, pattern string, issueCodeGenerator IssueCodeGenerator) (types.Issue, error) { + // compile regular expression + exp, err := regexp.Compile(pattern) + if err != nil { + return types.Issue{}, err + } + + // get groups + groupNames := exp.SubexpNames() + + var issue types.Issue + groups := exp.FindAllStringSubmatch(strings.TrimSuffix(line, "\n"), -1) + if len(groups) == 0 { + return types.Issue{}, errors.New("failed to parse message") + } + + for groupIdx, content := range groups[0] { + groupName := groupNames[groupIdx] + + // populate issue using named groups + switch groupName { + case "filename": + issue.Location.Path = content + case "line": + line, err := strconv.Atoi(content) + if err != nil { + return types.Issue{}, err + } + issue.Location.Position.Begin.Line = line + case "column": + col, err := strconv.Atoi(content) + if err != nil { + return types.Issue{}, err + } + issue.Location.Position.Begin.Column = col + case "message": + issue.IssueText = content + case "issue_code": + if issueCodeGenerator == nil { + issue.IssueCode = content + } else { + issue.IssueCode = issueCodeGenerator(content) + } + default: + continue + } + } + + return issue, nil +} diff --git a/analyzers/processors/regex_test.go b/analyzers/processors/regex_test.go new file mode 100644 index 0000000..33147df --- /dev/null +++ b/analyzers/processors/regex_test.go @@ -0,0 +1,61 @@ +package processors + +import ( + "testing" + + "github.com/deepsourcelabs/deepsource-go/analyzers/types" + "github.com/go-test/deep" +) + +func mockIssueCodeGenerator(content string) string { + issueMap := map[string]string{ + "empty-rules": "E001", + "errors": "E002", + "known-properties": "K001", + } + if issueMap[content] == "" { + return "U001" + } + return issueMap[content] +} + +func TestRegex_PopulateIssue(t *testing.T) { + mockIssue := types.Issue{ + IssueCode: "E001", + IssueText: "Warning - Rule is empty.", + } + mockIssue.Location.Path = "/home/testdir/file1.css" + mockIssue.Location.Position.Begin.Line = 1 + mockIssue.Location.Position.Begin.Column = 1 + + // mock issue for staticcheck + mockStaticCheckIssue := types.Issue{ + IssueCode: "U1000", + IssueText: "func trigger is unused ", + } + mockStaticCheckIssue.Location.Path = "staticcheck/staticcheck.go" + mockStaticCheckIssue.Location.Position.Begin.Line = 5 + mockStaticCheckIssue.Location.Position.Begin.Column = 6 + + cases := []struct { + description string + line string + pattern string + issueCodeGenerator IssueCodeGenerator + expected types.Issue + }{ + {"issue code generator must work", "/home/testdir/file1.css: line 1, col 1, Warning - Rule is empty. (empty-rules)", `(?P.+): line (?P\d+), col (?P\d+), (?P.+) \((?P.+)\)`, mockIssueCodeGenerator, mockIssue}, + {"processor must work without issue code generator", "staticcheck/staticcheck.go:5:6: func trigger is unused (U1000)", `(?P.+):(?P\d+):(?P\d+): (?P.+)\((?P\w+)\)`, nil, mockStaticCheckIssue}, + } + + for _, tc := range cases { + actual, err := populateIssue(tc.line, tc.pattern, tc.issueCodeGenerator) + if err != nil { + t.Error(err) + } + + if diff := deep.Equal(actual, tc.expected); diff != nil { + t.Errorf("description: %s, %s", tc.description, diff) + } + } +} diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index a9ae87c..5a9f87b 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -1,16 +1,15 @@ package analyzers import ( - "errors" "os" "path" - "reflect" "testing" "github.com/BurntSushi/toml" "github.com/deepsourcelabs/deepsource-go/analyzers/analysistest" "github.com/deepsourcelabs/deepsource-go/analyzers/build" "github.com/deepsourcelabs/deepsource-go/analyzers/processors" + "github.com/go-test/deep" ) func TestAnalyzer(t *testing.T) { @@ -108,30 +107,36 @@ func TestAnalyzer(t *testing.T) { func TestUtils(t *testing.T) { t.Run("test TOML generation", func(t *testing.T) { + rootDir := t.TempDir() // fetch parsed issues - issues, err := build.ParseIssues("testdata/issues.toml") + f, err := os.Open("testdata/issues.toml") + if err != nil { + t.Fatal(err) + } + + issues, err := build.FetchIssues(f) if err != nil { t.Fatal(err) } // generate TOML files - err = build.BuildTOML(issues, "testdata/toml") + err = build.BuildTOML(issues, rootDir) if err != nil { t.Fatal(err) } // traverse directory - files, err := os.ReadDir("testdata/toml") + files, err := os.ReadDir(rootDir) if err != nil { t.Fatal(err) } // parse issues from each TOML file var parsedIssue build.IssueMeta - var parsedIssues []build.IssueMeta + var parsedIssues build.IssuesMeta for _, f := range files { - filePath := path.Join("testdata/toml", f.Name()) + filePath := path.Join(rootDir, f.Name()) _, err = toml.DecodeFile(filePath, &parsedIssue) if err != nil { t.Fatal(err) @@ -140,8 +145,8 @@ func TestUtils(t *testing.T) { } // check if the parsed issues and the issues present in the parent TOML are equal - if !reflect.DeepEqual(issues, parsedIssues) { - t.Fatal(errors.New("mismatch between issues in parent TOML file and parsed issues")) + if diff := deep.Equal(issues, parsedIssues); diff != nil { + t.Errorf("mismatch between parsed issues and report's issues: %s\n", diff) } // cleanup TOMLs diff --git a/analyzers/types/types.go b/analyzers/types/types.go index 3ea2f95..73bf96b 100644 --- a/analyzers/types/types.go +++ b/analyzers/types/types.go @@ -1,30 +1,21 @@ package types -type Coordinate struct { - Line int `json:"line"` - Column int `json:"column"` -} - -type Position struct { - Begin Coordinate `json:"begin"` - End Coordinate `json:"end"` -} - -type Location struct { - Path string `json:"path"` - Position Position `json:"position"` -} - type Issue struct { - IssueCode string `json:"issue_code"` - IssueText string `json:"issue_text"` - Location Location `json:"location"` -} - -// Location of an issue -type IssueLocation struct { - Path string `json:"path"` - Position Position `json:"position"` + IssueCode string `json:"issue_code"` + IssueText string `json:"issue_text"` + Location struct { + Path string `json:"path"` + Position struct { + Begin struct { + Line int `json:"line"` + Column int `json:"column"` + } `json:"begin"` + End struct { + Line int `json:"line"` + Column int `json:"column"` + } `json:"end"` + } `json:"position"` + } `json:"location"` } type AnalysisError struct { diff --git a/go.mod b/go.mod index d8fc1b5..bdfebaa 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.17 require ( github.com/BurntSushi/toml v1.1.0 + github.com/go-test/deep v1.0.8 github.com/microcosm-cc/bluemonday v1.0.18 github.com/smacker/go-tree-sitter v0.0.0-20220421092837-ec55f7cfeaf4 github.com/yuin/goldmark v1.4.12 diff --git a/go.sum b/go.sum index 5b8fcaf..6c75407 100644 --- a/go.sum +++ b/go.sum @@ -4,6 +4,8 @@ github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuP github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-test/deep v1.0.8 h1:TDsG77qcSprGbC6vTN8OuXp5g+J+b5Pcguhf7Zt61VM= +github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY= github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c= github.com/microcosm-cc/bluemonday v1.0.18 h1:6HcxvXDAi3ARt3slx6nTesbvorIc3QeTzBNRvWktHBo= From 1112721b011b5350fed13bd8502443aeb87e2328 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Fri, 17 Jun 2022 18:08:23 +0530 Subject: [PATCH 23/24] refactor(*): allow issue parsing via structs --- analyzers/analysistest/analysistest.go | 34 +++++++++-- analyzers/build/build.go | 84 ++++++++++++++------------ analyzers/build/build_test.go | 25 ++++---- analyzers/sdk_test.go | 6 +- 4 files changed, 91 insertions(+), 58 deletions(-) diff --git a/analyzers/analysistest/analysistest.go b/analyzers/analysistest/analysistest.go index f829471..0c6fcdf 100644 --- a/analyzers/analysistest/analysistest.go +++ b/analyzers/analysistest/analysistest.go @@ -47,6 +47,8 @@ type ParsedIssue struct { type ParsedIssues []ParsedIssue +const queryStr = "(comment) @comment" + func Run(directory string) error { // read the generated report from TOOLBOX_PATH toolboxPath := os.Getenv("TOOLBOX_PATH") @@ -80,7 +82,7 @@ func Run(directory string) error { // getFilenames returns the filenames for a directory. func getFilenames(directory string) ([]string, error) { var files []string - filepath.Walk(directory, func(path string, info fs.FileInfo, err error) error { + err := filepath.Walk(directory, func(path string, info fs.FileInfo, err error) error { if err != nil { return err } @@ -93,6 +95,9 @@ func getFilenames(directory string) ([]string, error) { return nil }) + if err != nil { + return nil, err + } return files, nil } @@ -111,20 +116,20 @@ func verifyReport(report types.AnalysisReport, directory string) error { // walk through each file and get issues for _, filename := range files { - // get language + // set language for the parser lang, err := getLanguage(filename) if err != nil { return err } parser.SetLanguage(lang) - // read report + // read the report content, err := os.ReadFile(filename) if err != nil { return err } - // generate tree + // generate the tree using tree-sitter ctx := context.Background() tree, err := parser.ParseCtx(ctx, nil, content) if err != nil { @@ -132,7 +137,6 @@ func verifyReport(report types.AnalysisReport, directory string) error { } // create a query for fetching comments - queryStr := "(comment) @comment" query, err := sitter.NewQuery([]byte(queryStr), lang) if err != nil { return err @@ -140,17 +144,28 @@ func verifyReport(report types.AnalysisReport, directory string) error { // execute query on root node qc := sitter.NewQueryCursor() + defer qc.Close() n := tree.RootNode() qc.Exec(query, n) - defer qc.Close() // iterate over matches for { + // fetch a match m, ok := qc.NextMatch() if !ok { break } + // We iterate over the query captures for each match. This traversal consists of various steps: + // 1. get the node content + // 2. check if the node has a "raise" annotation + // 2.1 if true, the node's content is matched with a regular expression. the regular expression matches comments having a raise annotation. + // 2.2 the submatches are fetched using the regular expression. + // 2.3 if there exists some submatches, then: + // 2.3.1 split the node content into substrings + // 2.3.2 check if there exists at least 2 substrings. a valid annotation contains at least 2 substrings: "raise" and issue codes separated by a delimiter (,) + // 2.3.3 if true, the issue codes are separated on the basis of the delimiter (,) + // 2.3.4 the issue is then populated with the issue code and line numbers for _, c := range m.Captures { // get node content node := c.Node @@ -158,14 +173,21 @@ func verifyReport(report types.AnalysisReport, directory string) error { // check if the comment contains raise annotation if strings.Contains(nodeContent, "raise") { + // find match using expression exp := regexp.MustCompile(`.+ raise: `) submatches := exp.FindStringSubmatch(nodeContent) if len(submatches) != 0 { + // get substrings substrings := exp.Split(nodeContent, -1) + + // the annotation must have at least 2 substrings: "raise" and issue codes separated by a delimiter (,) if len(substrings) > 1 { + + // fetch issue codes by splitting on the basis of the delimiter issueCodes := strings.Split(substrings[1], ",") + // add issue to parsedIssues for _, issueCode := range issueCodes { parsedIssue := ParsedIssue{IssueCode: strings.TrimSpace(issueCode), Line: int(node.StartPoint().Row) + 1} diff --git a/analyzers/build/build.go b/analyzers/build/build.go index 24c80e9..a1e85b7 100644 --- a/analyzers/build/build.go +++ b/analyzers/build/build.go @@ -23,7 +23,9 @@ type IssueMeta struct { Description string `toml:"description"` } -type IssuesMeta []IssueMeta +type IssueMetas struct { + Issues []IssueMeta +} // IssueTOML is used for decoding issues from a TOML file. type IssueTOML struct { @@ -49,7 +51,7 @@ func GenerateTOML() error { // generate TOML files rootDir := path.Join(repoRoot, ".deepsource/analyzers/issues") - err = BuildTOML(issues, rootDir) + err = issues.BuildTOML(rootDir) if err != nil { return err } @@ -57,33 +59,37 @@ func GenerateTOML() error { return nil } -// FetchIssues reads a TOML file containing all issues, and returns all issues as IssuesMeta. -func FetchIssues(r io.Reader) (IssuesMeta, error) { - issues, err := readTOML(r) +// FetchIssues reads a TOML file containing all issues, and returns all issues as IssueMetas. +func FetchIssues(r io.Reader) (IssueMetas, error) { + // get issues from TOML file + var issueTOML IssueTOML + err := issueTOML.Read(r) if err != nil { - return nil, err + return IssueMetas{}, err } + issues := issueTOML.IssueMetas() + // parse issues parsedIssues, err := parseIssues(issues) if err != nil { - return nil, err + return IssueMetas{}, err } // sort issues (based on issue code) before returning - sort.Slice(parsedIssues, func(i, j int) bool { - return parsedIssues[i].IssueCode < parsedIssues[j].IssueCode + sort.Slice(parsedIssues.Issues, func(i, j int) bool { + return parsedIssues.Issues[i].IssueCode < parsedIssues.Issues[j].IssueCode }) return parsedIssues, nil } // BuildTOML uses issues to generate TOML files to a directory. -func BuildTOML(issues IssuesMeta, rootDir string) error { - if len(issues) == 0 { +func (i *IssueMetas) BuildTOML(rootDir string) error { + if len(i.Issues) == 0 { return errors.New("no issues found") } - for _, issue := range issues { + for _, issue := range i.Issues { // The unique identifier (filename) is based on the issue code. TOML files cannot be generated for issues having an invalid/empty code. if issue.IssueCode == "" { return errors.New("invalid issue code. cannot generate toml") @@ -99,7 +105,7 @@ func BuildTOML(issues IssuesMeta, rootDir string) error { } // write to file - err = writeTOML(f, issue) + err = issue.Write(f) if err != nil { return err } @@ -108,62 +114,64 @@ func BuildTOML(issues IssuesMeta, rootDir string) error { return nil } -// readTOML reads content from a reader and returns issues. -func readTOML(r io.Reader) (IssuesMeta, error) { +// Read reads content from a reader and unmarshals it to IssueTOML. +func (i *IssueTOML) Read(r io.Reader) error { // read content from reader content, err := io.ReadAll(r) if err != nil { - return nil, err + return err } // unmarshal TOML - var issuesTOML IssueTOML - err = toml.Unmarshal(content, &issuesTOML) + err = toml.Unmarshal(content, &i) if err != nil { - return nil, err + return err } - // generate issues - var issues IssuesMeta - for _, issueTOML := range issuesTOML.Issues { + return nil +} + +// IssueMetas returns issues from a IssueTOML struct. +func (i *IssueTOML) IssueMetas() IssueMetas { + var issueMetas IssueMetas + for _, issueTOML := range i.Issues { issueCode := "" category := "" title := "" description := "" - // handle interface conversions if issueTOML["issue_code"] != nil { - issueCode = issueTOML["issue_code"].(string) + issueCode = fmt.Sprintf("%v", issueTOML["issue_code"]) } if issueTOML["category"] != nil { - category = issueTOML["category"].(string) + category = fmt.Sprintf("%v", issueTOML["category"]) } if issueTOML["title"] != nil { - title = issueTOML["title"].(string) + title = fmt.Sprintf("%v", issueTOML["title"]) } if issueTOML["description"] != nil { - description = issueTOML["description"].(string) + description = fmt.Sprintf("%v", issueTOML["description"]) } - is := IssueMeta{ + issueMeta := IssueMeta{ IssueCode: issueCode, Category: category, Title: title, Description: description, } - issues = append(issues, is) + issueMetas.Issues = append(issueMetas.Issues, issueMeta) } - return issues, nil + return issueMetas } -// writeTOML writes issue data to the writer. -func writeTOML(w io.Writer, issue IssueMeta) error { - if err := toml.NewEncoder(w).Encode(issue); err != nil { +// Write writes the issue data to the writer. +func (i *IssueMeta) Write(w io.Writer) error { + if err := toml.NewEncoder(w).Encode(i); err != nil { return err } @@ -191,18 +199,18 @@ func readMarkdown(content string) (string, error) { } // parseIssues returns issues after parsing and sanitizing markdown content. -func parseIssues(issues IssuesMeta) (IssuesMeta, error) { - var parsedIssues IssuesMeta +func parseIssues(issues IssueMetas) (IssueMetas, error) { + var parsedIssues IssueMetas - for _, issue := range issues { + for _, issue := range issues.Issues { // parse and sanitize markdown content desc, err := readMarkdown(issue.Description) if err != nil { - return nil, err + return IssueMetas{}, err } issue.Description = desc - parsedIssues = append(parsedIssues, issue) + parsedIssues.Issues = append(parsedIssues.Issues, issue) } return parsedIssues, nil diff --git a/analyzers/build/build_test.go b/analyzers/build/build_test.go index 72c2094..bd6b73b 100644 --- a/analyzers/build/build_test.go +++ b/analyzers/build/build_test.go @@ -54,7 +54,7 @@ description = """ """ ` - expectedTOMLNormal := IssuesMeta{ + expectedTOMLNormal := []IssueMeta{ { IssueCode: "SA4017", Category: "bug-risk", @@ -71,7 +71,7 @@ description = """ tomlBlank := `` - var expectedTOMLBlank IssuesMeta + var expectedTOMLBlank []IssueMeta tomlMissingDescription := ` [[issues]] @@ -81,7 +81,7 @@ category = "bug-risk" title = "Sprint is a pure function but its return value is ignored" ` - expectedTOMLMissingDescription := IssuesMeta{ + expectedTOMLMissingDescription := []IssueMeta{ { IssueCode: "SA4017", Category: "bug-risk", @@ -92,7 +92,7 @@ title = "Sprint is a pure function but its return value is ignored" cases := []struct { description string tomlContent string - expected IssuesMeta + expected []IssueMeta }{ {"normal TOML content with issues", tomlNormal, expectedTOMLNormal}, {"blank TOML", tomlBlank, expectedTOMLBlank}, @@ -101,12 +101,13 @@ title = "Sprint is a pure function but its return value is ignored" for _, tc := range cases { r := strings.NewReader(tc.tomlContent) - actual, err := readTOML(r) + var issue IssueTOML + err := issue.Read(r) if err != nil { t.Error(err) } - if diff := deep.Equal(actual, tc.expected); diff != nil { + if diff := deep.Equal(issue.IssueMetas().Issues, tc.expected); diff != nil { t.Errorf("description: %s, %s", tc.description, diff) } } @@ -147,7 +148,7 @@ description = ""` + "\n" } for _, tc := range cases { - err := writeTOML(&tc.writer, tc.issue) + err := tc.issue.Write(&tc.writer) if err != nil { t.Error(err) } @@ -165,8 +166,8 @@ description = ""` + "\n" func TestParseIssues(t *testing.T) { cases := []struct { description string - issues IssuesMeta - expected IssuesMeta + issues []IssueMeta + expected []IssueMeta }{ {"must parse markdown", []IssueMeta{ { @@ -201,12 +202,14 @@ func TestParseIssues(t *testing.T) { } for _, tc := range cases { - actual, err := parseIssues(tc.issues) + var issueMetas IssueMetas + issueMetas.Issues = tc.issues + actual, err := parseIssues(issueMetas) if err != nil { t.Error(err) } - if diff := deep.Equal(actual, tc.expected); diff != nil { + if diff := deep.Equal(actual.Issues, tc.expected); diff != nil { t.Errorf("description: %s, %s", tc.description, diff) } } diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index 5a9f87b..01648e4 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -120,7 +120,7 @@ func TestUtils(t *testing.T) { } // generate TOML files - err = build.BuildTOML(issues, rootDir) + err = issues.BuildTOML(rootDir) if err != nil { t.Fatal(err) } @@ -133,7 +133,7 @@ func TestUtils(t *testing.T) { // parse issues from each TOML file var parsedIssue build.IssueMeta - var parsedIssues build.IssuesMeta + var parsedIssues build.IssueMetas for _, f := range files { filePath := path.Join(rootDir, f.Name()) @@ -141,7 +141,7 @@ func TestUtils(t *testing.T) { if err != nil { t.Fatal(err) } - parsedIssues = append(parsedIssues, parsedIssue) + parsedIssues.Issues = append(parsedIssues.Issues, parsedIssue) } // check if the parsed issues and the issues present in the parent TOML are equal From ad43322ca7a94b94f910391d1145a8b83bc9624c Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Mon, 20 Jun 2022 12:42:47 +0530 Subject: [PATCH 24/24] refactor(*): pass env vars through params --- analyzers/analysistest/analysistest.go | 8 +++----- analyzers/build/build.go | 5 +---- analyzers/sdk.go | 5 +---- analyzers/sdk_test.go | 11 +++++++---- guides/writing-analyzers.md | 6 ++++-- 5 files changed, 16 insertions(+), 19 deletions(-) diff --git a/analyzers/analysistest/analysistest.go b/analyzers/analysistest/analysistest.go index 0c6fcdf..a0cecc3 100644 --- a/analyzers/analysistest/analysistest.go +++ b/analyzers/analysistest/analysistest.go @@ -49,9 +49,8 @@ type ParsedIssues []ParsedIssue const queryStr = "(comment) @comment" -func Run(directory string) error { +func Run(directory string, toolboxPath string, ctx context.Context) error { // read the generated report from TOOLBOX_PATH - toolboxPath := os.Getenv("TOOLBOX_PATH") generatedFile := path.Join(toolboxPath, "analysis_report.json") reportContent, err := os.ReadFile(generatedFile) if err != nil { @@ -65,7 +64,7 @@ func Run(directory string) error { } // do a verification check for the generated report - err = verifyReport(report, directory) + err = verifyReport(report, directory, ctx) if err != nil { return err } @@ -103,7 +102,7 @@ func getFilenames(directory string) ([]string, error) { } // Verify compares the generated report and parsed issues using tree-sitter. -func verifyReport(report types.AnalysisReport, directory string) error { +func verifyReport(report types.AnalysisReport, directory string, ctx context.Context) error { var parsedIssues ParsedIssues // get filenames @@ -130,7 +129,6 @@ func verifyReport(report types.AnalysisReport, directory string) error { } // generate the tree using tree-sitter - ctx := context.Background() tree, err := parser.ParseCtx(ctx, nil, content) if err != nil { return err diff --git a/analyzers/build/build.go b/analyzers/build/build.go index a1e85b7..b97f14f 100644 --- a/analyzers/build/build.go +++ b/analyzers/build/build.go @@ -33,10 +33,7 @@ type IssueTOML struct { } // GenerateTOML helps in generating TOML files for each issue from a TOML file. -func GenerateTOML() error { - // root directory for the repository - repoRoot := os.Getenv("REPO_ROOT") - +func GenerateTOML(repoRoot string) error { filename := path.Join(repoRoot, ".deepsource/analyzers/issues.toml") f, err := os.Open(filename) if err != nil { diff --git a/analyzers/sdk.go b/analyzers/sdk.go index 5001317..4e3e0ba 100644 --- a/analyzers/sdk.go +++ b/analyzers/sdk.go @@ -86,10 +86,7 @@ func runCmd(command string, args []string, allowedExitCodes []int) (bytes.Buffer } // SaveReport saves the analysis report to the local filesystem. -func (*CLIRunner) SaveReport(report types.AnalysisReport) error { - // get toolbox path from environment variable - toolboxPath := os.Getenv("TOOLBOX_PATH") - +func (*CLIRunner) SaveReport(report types.AnalysisReport, toolboxPath string) error { // set report location filename := path.Join(toolboxPath, "analysis_report.json") diff --git a/analyzers/sdk_test.go b/analyzers/sdk_test.go index 01648e4..0fde9c9 100644 --- a/analyzers/sdk_test.go +++ b/analyzers/sdk_test.go @@ -1,6 +1,7 @@ package analyzers import ( + "context" "os" "path" "testing" @@ -40,12 +41,13 @@ func TestAnalyzer(t *testing.T) { t.Fatal(err) } - err = a.SaveReport(report) + err = a.SaveReport(report, tempDir) if err != nil { t.Fatal(err) } - err = analysistest.Run("./testdata/src/staticcheck") + ctx := context.Background() + err = analysistest.Run("./testdata/src/staticcheck", tempDir, ctx) if err != nil { t.Fatal(err) } @@ -93,12 +95,13 @@ func TestAnalyzer(t *testing.T) { t.Fatal(err) } - err = a.SaveReport(report) + err = a.SaveReport(report, tempDir) if err != nil { t.Fatal(err) } - err = analysistest.Run("./testdata/src/csslint") + ctx := context.Background() + err = analysistest.Run("./testdata/src/csslint", tempDir, ctx) if err != nil { t.Fatal(err) } diff --git a/guides/writing-analyzers.md b/guides/writing-analyzers.md index 5e7e8a2..f6909ac 100644 --- a/guides/writing-analyzers.md +++ b/guides/writing-analyzers.md @@ -113,7 +113,8 @@ The report is then saved to `$TOOLBOX_PATH/analysis_report.json`. (previous code) - err = a.SaveReport(report) + toolboxPath := os.Getenv("TOOLBOX_PATH") + err = a.SaveReport(report, toolboxPath) if err != nil { log.Fatalln(err) } @@ -214,7 +215,8 @@ The TOML files are generated at `$REPO_ROOT/.deepsource/analyzers/issues/