scanner/scanner.go
Ref: Size: 3.1 KiB
package scanner
import (
"fmt"
"os"
"regexp"
"gopkg.in/yaml.v3"
)
// Finding represents a detected sensitive pattern match.
type Finding struct {
Rule string
Match string
}
type rule struct {
name string
pattern *regexp.Regexp
exemptHosts []string
}
// Scanner holds compiled rules for scanning request bodies.
type Scanner struct {
rules []rule
}
type yamlRule struct {
Name string `yaml:"name"`
Pattern string `yaml:"pattern"`
ExemptHosts []string `yaml:"exempt_hosts"`
}
type yamlConfig struct {
Rules []yamlRule `yaml:"rules"`
}
// New reads a YAML rules file, parses rules, and compiles regexes.
// Returns an error if the file cannot be read, parsed, or if any regex is invalid.
func New(path string) (*Scanner, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("reading rules file: %w", err)
}
var cfg yamlConfig
if err := yaml.Unmarshal(data, &cfg); err != nil {
return nil, fmt.Errorf("parsing rules YAML: %w", err)
}
rules := make([]rule, 0, len(cfg.Rules))
for _, yr := range cfg.Rules {
re, err := regexp.Compile(yr.Pattern)
if err != nil {
return nil, fmt.Errorf("compiling pattern for rule %q: %w", yr.Name, err)
}
rules = append(rules, rule{name: yr.Name, pattern: re, exemptHosts: yr.ExemptHosts})
}
return &Scanner{rules: rules}, nil
}
// RuleCount returns the number of loaded rules.
func (s *Scanner) RuleCount() int {
return len(s.rules)
}
// Scan checks body against all rules and returns any findings.
// Match snippets are truncated to 40 characters.
// Rules with exempt_hosts are skipped when host matches.
func (s *Scanner) Scan(body []byte, host string) []Finding {
var findings []Finding
for _, r := range s.rules {
if r.isExempt(host) {
continue
}
match := r.pattern.Find(body)
if match == nil {
continue
}
snippet := string(match)
if len(snippet) > 40 {
snippet = snippet[:40]
}
findings = append(findings, Finding{Rule: r.name, Match: snippet})
}
return findings
}
func (r *rule) isExempt(host string) bool {
for _, h := range r.exemptHosts {
if h == host {
return true
}
}
return false
}
const defaultRulesYAML = `rules:
- name: ssh-private-key
pattern: "-----BEGIN (OPENSSH|RSA|DSA|EC|ED25519) PRIVATE KEY-----"
- name: pgp-private-key
pattern: "-----BEGIN PGP PRIVATE KEY BLOCK-----"
- name: basic-auth
pattern: "Authorization:\\s*Basic\\s+"
- name: bearer-token
pattern: "Authorization:\\s*Bearer\\s+"
exempt_hosts:
- api.anthropic.com
- name: aws-access-key
pattern: "AKIA[0-9A-Z]{16}"
- name: github-token
pattern: "gh[ps]_[A-Za-z0-9_]{36,}"
- name: openai-key
pattern: "sk-[A-Za-z0-9]{32,}"
- name: password-field
pattern: "(password=|\"password\":\\s*\")"
- name: env-file
pattern: "(?m)^[A-Z_]+=.+\\n[A-Z_]+=.+\\n[A-Z_]+=.+"
`
// WriteDefaultRules writes the default rules YAML to path.
// Does nothing if the file already exists.
func WriteDefaultRules(path string) error {
if _, err := os.Stat(path); err == nil {
// file already exists, do not overwrite
return nil
}
return os.WriteFile(path, []byte(defaultRulesYAML), 0644)
}