a73x

c181bad1

feat: add scanner package with YAML rules, body scanning, and defaults

a73x   2026-03-29 16:23


diff --git a/go.mod b/go.mod
index 79a248e..5819882 100644
--- a/go.mod
+++ b/go.mod
@@ -1,3 +1,5 @@
module github.com/xanderle/nono

go 1.26.1

require gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go.sum b/go.sum
new file mode 100644
index 0000000..4bc0337
--- /dev/null
+++ b/go.sum
@@ -0,0 +1,3 @@
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/scanner/scanner.go b/scanner/scanner.go
new file mode 100644
index 0000000..54670aa
--- /dev/null
+++ b/scanner/scanner.go
@@ -0,0 +1,113 @@
package scanner

import (
	"fmt"
	"os"
	"regexp"

	"gopkg.in/yaml.v3"
)

// Finding represents a detected sensitive pattern match.
type Finding struct {
	Rule  string
	Match string
}

type rule struct {
	name    string
	pattern *regexp.Regexp
}

// Scanner holds compiled rules for scanning request bodies.
type Scanner struct {
	rules []rule
}

type yamlRule struct {
	Name    string `yaml:"name"`
	Pattern string `yaml:"pattern"`
}

type yamlConfig struct {
	Rules []yamlRule `yaml:"rules"`
}

// New reads a YAML rules file, parses rules, and compiles regexes.
// Returns an error if the file cannot be read, parsed, or if any regex is invalid.
func New(path string) (*Scanner, error) {
	data, err := os.ReadFile(path)
	if err != nil {
		return nil, fmt.Errorf("reading rules file: %w", err)
	}

	var cfg yamlConfig
	if err := yaml.Unmarshal(data, &cfg); err != nil {
		return nil, fmt.Errorf("parsing rules YAML: %w", err)
	}

	rules := make([]rule, 0, len(cfg.Rules))
	for _, yr := range cfg.Rules {
		re, err := regexp.Compile(yr.Pattern)
		if err != nil {
			return nil, fmt.Errorf("compiling pattern for rule %q: %w", yr.Name, err)
		}
		rules = append(rules, rule{name: yr.Name, pattern: re})
	}

	return &Scanner{rules: rules}, nil
}

// RuleCount returns the number of loaded rules.
func (s *Scanner) RuleCount() int {
	return len(s.rules)
}

// Scan checks body against all rules and returns any findings.
// Match snippets are truncated to 40 characters.
func (s *Scanner) Scan(body []byte) []Finding {
	var findings []Finding
	for _, r := range s.rules {
		match := r.pattern.Find(body)
		if match == nil {
			continue
		}
		snippet := string(match)
		if len(snippet) > 40 {
			snippet = snippet[:40]
		}
		findings = append(findings, Finding{Rule: r.name, Match: snippet})
	}
	return findings
}

const defaultRulesYAML = `rules:
  - name: ssh-private-key
    pattern: "-----BEGIN (OPENSSH|RSA|DSA|EC|ED25519) PRIVATE KEY-----"
  - name: pgp-private-key
    pattern: "-----BEGIN PGP PRIVATE KEY BLOCK-----"
  - name: basic-auth
    pattern: "Authorization:\\s*Basic\\s+"
  - name: bearer-token
    pattern: "Authorization:\\s*Bearer\\s+"
  - name: aws-access-key
    pattern: "AKIA[0-9A-Z]{16}"
  - name: github-token
    pattern: "gh[ps]_[A-Za-z0-9_]{36,}"
  - name: openai-key
    pattern: "sk-[A-Za-z0-9]{32,}"
  - name: password-field
    pattern: "(password=|\"password\":\\s*\")"
  - name: env-file
    pattern: "(?m)^[A-Z_]+=.+\\n[A-Z_]+=.+\\n[A-Z_]+=.+"
`

// WriteDefaultRules writes the default rules YAML to path.
// Does nothing if the file already exists.
func WriteDefaultRules(path string) error {
	if _, err := os.Stat(path); err == nil {
		// file already exists, do not overwrite
		return nil
	}
	return os.WriteFile(path, []byte(defaultRulesYAML), 0644)
}
diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go
new file mode 100644
index 0000000..6f824bf
--- /dev/null
+++ b/scanner/scanner_test.go
@@ -0,0 +1,133 @@
package scanner_test

import (
	"os"
	"path/filepath"
	"testing"

	"github.com/xanderle/nono/scanner"
)

// Task 3: Load Rules from YAML

func TestNewScanner_LoadsRulesFromYAML(t *testing.T) {
	dir := t.TempDir()
	rulesPath := filepath.Join(dir, "rules.yaml")
	os.WriteFile(rulesPath, []byte("rules:\n  - name: ssh-key\n    pattern: \"-----BEGIN RSA PRIVATE KEY-----\"\n  - name: aws-key\n    pattern: \"AKIA[0-9A-Z]{16}\"\n"), 0644)

	s, err := scanner.New(rulesPath)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	if s.RuleCount() != 2 {
		t.Errorf("expected 2 rules, got %d", s.RuleCount())
	}
}

func TestNewScanner_RejectsInvalidRegex(t *testing.T) {
	dir := t.TempDir()
	rulesPath := filepath.Join(dir, "rules.yaml")
	os.WriteFile(rulesPath, []byte("rules:\n  - name: bad-rule\n    pattern: \"[invalid\"\n"), 0644)

	_, err := scanner.New(rulesPath)
	if err == nil {
		t.Fatal("expected error for invalid regex")
	}
}

// Task 4: Scan for Sensitive Patterns

func writeRules(t *testing.T, content string) string {
	t.Helper()
	dir := t.TempDir()
	path := filepath.Join(dir, "rules.yaml")
	os.WriteFile(path, []byte(content), 0644)
	return path
}

func TestScan_DetectsSSHPrivateKey(t *testing.T) {
	path := writeRules(t, "rules:\n  - name: ssh-private-key\n    pattern: \"-----BEGIN (OPENSSH|RSA|DSA|EC|ED25519) PRIVATE KEY-----\"\n")
	s, _ := scanner.New(path)
	findings := s.Scan([]byte("some data\n-----BEGIN RSA PRIVATE KEY-----\nMIIE..."))
	if len(findings) != 1 {
		t.Fatalf("expected 1 finding, got %d", len(findings))
	}
	if findings[0].Rule != "ssh-private-key" {
		t.Errorf("expected rule 'ssh-private-key', got %q", findings[0].Rule)
	}
}

func TestScan_DetectsAWSKey(t *testing.T) {
	path := writeRules(t, "rules:\n  - name: aws-access-key\n    pattern: \"AKIA[0-9A-Z]{16}\"\n")
	s, _ := scanner.New(path)
	findings := s.Scan([]byte("{\"key\": \"AKIAIOSFODNN7EXAMPLE\"}"))
	if len(findings) != 1 {
		t.Fatalf("expected 1 finding, got %d", len(findings))
	}
	if findings[0].Rule != "aws-access-key" {
		t.Errorf("expected rule 'aws-access-key', got %q", findings[0].Rule)
	}
}

func TestScan_ReturnsMultipleFindings(t *testing.T) {
	path := writeRules(t, "rules:\n  - name: ssh-private-key\n    pattern: \"-----BEGIN RSA PRIVATE KEY-----\"\n  - name: aws-access-key\n    pattern: \"AKIA[0-9A-Z]{16}\"\n")
	s, _ := scanner.New(path)
	body := []byte("-----BEGIN RSA PRIVATE KEY-----\nkey\nAKIAIOSFODNN7EXAMPLE")
	findings := s.Scan(body)
	if len(findings) != 2 {
		t.Fatalf("expected 2 findings, got %d", len(findings))
	}
}

func TestScan_ReturnsEmptyForCleanBody(t *testing.T) {
	path := writeRules(t, "rules:\n  - name: ssh-private-key\n    pattern: \"-----BEGIN RSA PRIVATE KEY-----\"\n")
	s, _ := scanner.New(path)
	findings := s.Scan([]byte("just some normal POST data"))
	if len(findings) != 0 {
		t.Errorf("expected 0 findings, got %d", len(findings))
	}
}

func TestScan_TruncatesMatchSnippet(t *testing.T) {
	path := writeRules(t, "rules:\n  - name: ssh-private-key\n    pattern: \"-----BEGIN RSA PRIVATE KEY-----\"\n")
	s, _ := scanner.New(path)
	findings := s.Scan([]byte("-----BEGIN RSA PRIVATE KEY-----"))
	if len(findings) != 1 {
		t.Fatalf("expected 1 finding, got %d", len(findings))
	}
	if len(findings[0].Match) > 40 {
		t.Errorf("expected match snippet to be truncated, got %d chars", len(findings[0].Match))
	}
}

// Task 5: Default Rules File

func TestWriteDefaultRules_CreatesFile(t *testing.T) {
	dir := t.TempDir()
	path := filepath.Join(dir, "rules.yaml")
	err := scanner.WriteDefaultRules(path)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	s, err := scanner.New(path)
	if err != nil {
		t.Fatalf("failed to load default rules: %v", err)
	}
	if s.RuleCount() < 9 {
		t.Errorf("expected at least 9 default rules, got %d", s.RuleCount())
	}
}

func TestWriteDefaultRules_DoesNotOverwrite(t *testing.T) {
	dir := t.TempDir()
	path := filepath.Join(dir, "rules.yaml")
	os.WriteFile(path, []byte("rules:\n  - name: custom\n    pattern: \"custom\"\n"), 0644)
	err := scanner.WriteDefaultRules(path)
	if err != nil {
		t.Fatalf("unexpected error: %v", err)
	}
	s, _ := scanner.New(path)
	if s.RuleCount() != 1 {
		t.Errorf("expected 1 rule (not overwritten), got %d", s.RuleCount())
	}
}