vdjagilev · vdjagilev · Oct 27, 2025 · Oct 18, 2025 · Oct 18, 2025 · Oct 27, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,133 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+nmap-formatter is a CLI tool that converts NMAP XML scan output into multiple formats: HTML, CSV, JSON, Markdown, Excel, SQLite, Graphviz (dot), and D2 lang. It's built in Go and uses a workflow-based architecture with pluggable formatters.
+
+## Development Commands
+
+### Build and Test
+```bash
+# Build the project
+go build -v ./
+
+# Run all tests
+go test -v ./...
+
+# Run tests with coverage
+go test ./... -race -coverprofile=coverage.txt -covermode=atomic
+
+# Run linter (same as CI)
+golangci-lint run --timeout 10m
+```
+
+### Running the Application
+```bash
+# Basic usage
+./nmap-formatter [html|csv|md|json|dot|sqlite|excel|d2] [path-to-nmap.xml]
+
+# From stdin
+cat example.xml | ./nmap-formatter json
+
+# With output file
+./nmap-formatter html scan.xml -f output.html
+```
+
+### Testing Individual Packages
+```bash
+# Test only the formatter package
+go test -v ./formatter/...
+
+# Test only the cmd package
+go test -v ./cmd/...
+
+# Run a specific test
+go test -v -run TestWorkflow_Execute ./formatter/
+```
+
+## Architecture
+
+### Core Components
+
+**Entry Point (main.go)**: Minimal entry point that delegates to `cmd.Execute()`.
+
+**Command Layer (cmd/)**: Uses Cobra for CLI argument parsing and flag management. The `root.go` file defines all CLI flags and initializes the `MainWorkflow`.
+
+**Formatter Package (formatter/)**: Contains the core business logic:
+
+- **Workflow Pattern**: `MainWorkflow` (implements `Workflow` interface) orchestrates the conversion pipeline:
+  1. Parse NMAP XML input into `NMAPRun` struct
+  2. Apply filter expressions using the expr-lang library
+  3. Build `TemplateData` with scan results and output options
+  4. Delegate to format-specific `Formatter` implementation
+  5. Write output to file or STDOUT
+
+- **Data Models**: NMAP XML is mapped to Go structs:
+  - `NMAPRun`: Root scan metadata (scanner version, args, timestamps)
+  - `Host`: Individual scanned host (addresses, hostnames, status, OS, trace, ports)
+  - `Port`: Port details (ID, protocol, state, service, scripts)
+  - These structs use XML struct tags for unmarshaling
+
+- **Formatter Interface**: All output formats implement `Formatter` interface:
+  ```go
+  type Formatter interface {
+      Format(td *TemplateData, templateContent string) error
+      defaultTemplateContent() string
+  }
+  ```
+  The factory function `formatter.New(config)` returns the appropriate formatter based on config.
+
+- **Format Implementations**:
+  - `HTMLFormatter` / `MarkdownFormatter`: Use Go templates from `resources/templates/`
+  - `JSONFormatter`: Uses `encoding/json`
+  - `CSVFormatter`: Custom CSV generation
+  - `ExcelFormatter`: Uses excelize library
+  - `SqliteFormatter`: Writes to SQLite using multiple repository types (hosts, ports, OS, scans)
+  - `DotFormatter`: Generates Graphviz syntax
+  - `D2LangFormatter`: Generates D2 diagram language
+
+- **Filtering**: The `expr.go` file integrates the expr-lang library to filter hosts based on expressions. The `--filter` flag and `--skip-down-hosts` both use this mechanism.
+
+### Configuration Flow
+
+1. `cmd/root.go` initializes `formatter.Config` with all CLI flags
+2. Config contains `OutputOptions` with format-specific settings (e.g., `HTMLOptions`, `MarkdownOptions`)
+3. Config is passed to `MainWorkflow.SetConfig()`, then to formatter instances
+4. Each formatter reads its relevant options from `config.OutputOptions`
+
+### Important Patterns
+
+- **Input/Output Handling**: Supports both file paths and STDIN/STDOUT. The workflow sets up `config.InputFileConfig.Source` and `config.Writer` before execution.
+
+- **Template Customization**: HTML and Markdown formatters support custom templates via `--html-use-template` and `--md-use-template` flags. Templates are loaded by `TemplateContent()` function.
+
+- **SQLite Architecture**: Uses repository pattern with separate repositories for hosts, ports, OS, and scans. The `sqlite_db.go` manages schema creation.
+
+- **Version Management**: Version is hardcoded in `cmd/root.go` as `const VERSION`. This should be updated for releases.
+
+## Testing
+
+All major components have corresponding `_test.go` files. Tests typically:
+- Use sample XML fixtures for parsing tests
+- Mock `io.Writer` for output validation
+- Test filter expressions with various host/port conditions
+- Validate format-specific output structure
+
+The CI runs tests on Go 1.24.x across Linux, macOS, and Windows.
+
+## Module Information
+
+- Module path: `github.com/vdjagilev/nmap-formatter/v3`
+- Go version: 1.24
+- Uses go modules for dependency management
+
+## Key Dependencies
+
+- `github.com/spf13/cobra`: CLI framework
+- `github.com/expr-lang/expr`: Expression language for filtering
+- `github.com/xuri/excelize/v2`: Excel file generation
+- `github.com/mattn/go-sqlite3`: SQLite driver (CGO-based)
+- `oss.terrastruct.com/d2`: D2 diagram language library
+- `golang.org/x/net`: Used for HTML parsing in certain formatters
diff --git a/cmd/root.go b/cmd/root.go
@@ -115,6 +115,9 @@ func init() {
 	// Pretty-print json
 	rootCmd.Flags().BoolVar(&config.OutputOptions.JSONOptions.PrettyPrint, "json-pretty", true, "--json-pretty=false (pretty prints JSON output)")
 
+	// Snake case json keys
+	rootCmd.Flags().BoolVar(&config.OutputOptions.JSONOptions.SnakeCase, "json-snake-case", false, "--json-snake-case=true (converts JSON keys to snake_case)")
+
 	// Configs related to SQLite
 	rootCmd.Flags().StringVar(&config.OutputOptions.SqliteOutputOptions.DSN, "sqlite-dsn", "nmap.sqlite", "--sqlite-dsn nmap.sqlite")
 	rootCmd.Flags().StringVar(&config.OutputOptions.SqliteOutputOptions.ScanIdentifier, "scan-id", "", "--scan-id abc123")

diff --git a/formatter/formatter_json.go b/formatter/formatter_json.go
@@ -12,6 +12,13 @@ type JSONFormatter struct {
 
 // Format the data and output it to appropriate io.Writer
 func (f *JSONFormatter) Format(td *TemplateData, templateContent string) (err error) {
+	// Use snake_case encoder if requested
+	if td.OutputOptions.JSONOptions.SnakeCase {
+		encoder := newSnakeCaseEncoder(f.config.Writer, td.OutputOptions.JSONOptions.PrettyPrint)
+		return encoder.Encode(td.NMAPRun)
+	}
+
+	// Default JSON encoding
 	jsonData := new(bytes.Buffer)
 	encoder := json.NewEncoder(jsonData)
 	if td.OutputOptions.JSONOptions.PrettyPrint {

diff --git a/formatter/json_snake_case.go b/formatter/json_snake_case.go
@@ -0,0 +1,151 @@
+package formatter
+
+import (
+	"bytes"
+	"encoding/json"
+	"io"
+	"unicode"
+)
+
+// snakeCaseEncoder wraps a standard JSON encoder and converts keys to snake_case
+type snakeCaseEncoder struct {
+	writer      io.Writer
+	indent      string
+	prettyPrint bool
+}
+
+// newSnakeCaseEncoder creates a new encoder that converts JSON keys to snake_case
+func newSnakeCaseEncoder(w io.Writer, prettyPrint bool) *snakeCaseEncoder {
+	indent := ""
+	if prettyPrint {
+		indent = "  "
+	}
+	return &snakeCaseEncoder{
+		writer:      w,
+		indent:      indent,
+		prettyPrint: prettyPrint,
+	}
+}
+
+// Encode encodes the value to JSON with snake_case keys
+func (e *snakeCaseEncoder) Encode(v interface{}) error {
+	// First, encode normally to a buffer
+	buf := new(bytes.Buffer)
+	encoder := json.NewEncoder(buf)
+	if e.prettyPrint {
+		encoder.SetIndent("", e.indent)
+	}
+	if err := encoder.Encode(v); err != nil {
+		return err
+	}
+
+	// Convert keys to snake_case
+	converted := convertKeysToSnakeCase(buf.Bytes())
+
+	// Write to the actual writer
+	_, err := e.writer.Write(converted)
+	return err
+}
+
+// toSnakeCase converts a CamelCase string to snake_case
+// This is optimized for performance - it processes the string in a single pass
+func toSnakeCase(s string) string {
+	if s == "" {
+		return s
+	}
+
+	// Pre-allocate buffer with estimated size (original length + 30% for underscores)
+	var buf bytes.Buffer
+	buf.Grow(len(s) + len(s)/3)
+
+	var prevLower bool
+	var prevUnderscore bool
+
+	for i, r := range s {
+		if unicode.IsUpper(r) {
+			// Add underscore before uppercase if:
+			// 1. Not the first character
+			// 2. Previous character was lowercase
+			// 3. Previous character wasn't already an underscore
+			if i > 0 && prevLower && !prevUnderscore {
+				buf.WriteByte('_')
+			}
+			buf.WriteRune(unicode.ToLower(r))
+			prevLower = false
+			prevUnderscore = false
+		} else if r == '_' {
+			buf.WriteRune(r)
+			prevLower = false
+			prevUnderscore = true
+		} else {
+			buf.WriteRune(r)
+			prevLower = true
+			prevUnderscore = false
+		}
+	}
+
+	return buf.String()
+}
+
+// convertKeysToSnakeCase converts all JSON keys in the byte slice to snake_case
+// This processes the JSON in a streaming fashion for better performance
+func convertKeysToSnakeCase(data []byte) []byte {
+	result := make([]byte, 0, len(data))
+	inString := false
+	escaped := false
+	keyStart := 0
+
+	for i := 0; i < len(data); i++ {
+		b := data[i]
+
+		if escaped {
+			result = append(result, b)
+			escaped = false
+			continue
+		}
+
+		switch b {
+		case '\\':
+			if inString {
+				escaped = true
+			}
+			result = append(result, b)
+
+		case '"':
+			if !inString {
+				// Starting a potential key
+				inString = true
+				result = append(result, b)
+				keyStart = len(result)
+			} else {
+				// Ending a string
+				inString = false
+
+				// Check if this was a key (followed by colon after whitespace)
+				isKey := false
+				j := i + 1
+				for j < len(data) && (data[j] == ' ' || data[j] == '\t' || data[j] == '\n' || data[j] == '\r') {
+					j++
+				}
+				if j < len(data) && data[j] == ':' {
+					isKey = true
+				}
+
+				if isKey {
+					// Extract the key and convert it
+					key := result[keyStart:]
+					snakeKey := toSnakeCase(string(key))
+					result = result[:keyStart]
+					result = append(result, []byte(snakeKey)...)
+				}
+
+				result = append(result, b)
+			}
+
+		default:
+			result = append(result, b)
+		}
+	}
+
+	return result
+}