a73x

scanner/scanner.go

Ref:   Size: 3.1 KiB

package scanner

import (
	"fmt"
	"os"
	"regexp"

	"gopkg.in/yaml.v3"
)

// Finding represents a detected sensitive pattern match.
type Finding struct {
	Rule  string
	Match string
}

type rule struct {
	name        string
	pattern     *regexp.Regexp
	exemptHosts []string
}

// Scanner holds compiled rules for scanning request bodies.
type Scanner struct {
	rules []rule
}

type yamlRule struct {
	Name        string   `yaml:"name"`
	Pattern     string   `yaml:"pattern"`
	ExemptHosts []string `yaml:"exempt_hosts"`
}

type yamlConfig struct {
	Rules []yamlRule `yaml:"rules"`
}

// New reads a YAML rules file, parses rules, and compiles regexes.
// Returns an error if the file cannot be read, parsed, or if any regex is invalid.
func New(path string) (*Scanner, error) {
	data, err := os.ReadFile(path)
	if err != nil {
		return nil, fmt.Errorf("reading rules file: %w", err)
	}

	var cfg yamlConfig
	if err := yaml.Unmarshal(data, &cfg); err != nil {
		return nil, fmt.Errorf("parsing rules YAML: %w", err)
	}

	rules := make([]rule, 0, len(cfg.Rules))
	for _, yr := range cfg.Rules {
		re, err := regexp.Compile(yr.Pattern)
		if err != nil {
			return nil, fmt.Errorf("compiling pattern for rule %q: %w", yr.Name, err)
		}
		rules = append(rules, rule{name: yr.Name, pattern: re, exemptHosts: yr.ExemptHosts})
	}

	return &Scanner{rules: rules}, nil
}

// RuleCount returns the number of loaded rules.
func (s *Scanner) RuleCount() int {
	return len(s.rules)
}

// Scan checks body against all rules and returns any findings.
// Match snippets are truncated to 40 characters.
// Rules with exempt_hosts are skipped when host matches.
func (s *Scanner) Scan(body []byte, host string) []Finding {
	var findings []Finding
	for _, r := range s.rules {
		if r.isExempt(host) {
			continue
		}
		match := r.pattern.Find(body)
		if match == nil {
			continue
		}
		snippet := string(match)
		if len(snippet) > 40 {
			snippet = snippet[:40]
		}
		findings = append(findings, Finding{Rule: r.name, Match: snippet})
	}
	return findings
}

func (r *rule) isExempt(host string) bool {
	for _, h := range r.exemptHosts {
		if h == host {
			return true
		}
	}
	return false
}

const defaultRulesYAML = `rules:
  - name: ssh-private-key
    pattern: "-----BEGIN (OPENSSH|RSA|DSA|EC|ED25519) PRIVATE KEY-----"
  - name: pgp-private-key
    pattern: "-----BEGIN PGP PRIVATE KEY BLOCK-----"
  - name: basic-auth
    pattern: "Authorization:\\s*Basic\\s+"
  - name: bearer-token
    pattern: "Authorization:\\s*Bearer\\s+"
    exempt_hosts:
      - api.anthropic.com
  - name: aws-access-key
    pattern: "AKIA[0-9A-Z]{16}"
  - name: github-token
    pattern: "gh[ps]_[A-Za-z0-9_]{36,}"
  - name: openai-key
    pattern: "sk-[A-Za-z0-9]{32,}"
  - name: password-field
    pattern: "(password=|\"password\":\\s*\")"
  - name: env-file
    pattern: "(?m)^[A-Z_]+=.+\\n[A-Z_]+=.+\\n[A-Z_]+=.+"
`

// WriteDefaultRules writes the default rules YAML to path.
// Does nothing if the file already exists.
func WriteDefaultRules(path string) error {
	if _, err := os.Stat(path); err == nil {
		// file already exists, do not overwrite
		return nil
	}
	return os.WriteFile(path, []byte(defaultRulesYAML), 0644)
}