102 redactor newline corruption clean (#1947)

* fix: prevent redactors from corrupting binary files (#102)

Redactors were adding newlines to files without them, corrupting binary
files during support bundle collection (51 bytes → 53 bytes).

Created LineReader to track original newline state and only restore
newlines when they were present in the original file.

- Added pkg/redact/line_reader.go
- Refactored single_line.go, multi_line.go, literal.go
- Added 48 tests, all passing
- Verified: binary files now preserved byte-for-byte

Fixes #102


* fix: handle empty lines correctly in MultiLineRedactor

- Check line1 == nil instead of len(line1) == 0 for empty file detection
- Fixes edge case where file containing only '\n' would be dropped
- Addresses bugbot finding about empty line handling


* fix: handle empty lines correctly in MultiLineRedactor

- Check line1 != nil instead of len(line1) > 0 in both locations
- Fixes edge case where empty trailing lines would be dropped
- Fix test isolation in literal_test.go (move ResetRedactionList to parent)
- Addresses bugbot findings about empty line handling

* fmt

* chore: update regression baselines from run 20107431959

* adding defense

* fix: propagate non-EOF errors in all early return paths

Ensure non-EOF errors (like buffer overflow) are properly propagated
to caller in both pre-loop early returns. Addresses bugbot finding.

* fix: use unique test names to prevent redaction list pollution

Use t.Name() instead of hardcoded 'test' to ensure each test
has unique redactor name, preventing parallel test interference

---------

Co-authored-by: hedge-sparrow <sparrow@spooky.academy>
This commit is contained in:
Benjamin Yang
2025-12-10 16:55:54 -06:00
committed by GitHub
parent b69a8a9b8c
commit a9d2180dd6
15 changed files with 1667 additions and 143 deletions

View File

@@ -44,8 +44,7 @@ pwd=somethinggoeshere;`,
want: map[string]string{ want: map[string]string{
"data/datacollectorname": ` 123 "data/datacollectorname": ` 123
another***HIDDEN***here another***HIDDEN***here
pwd=***HIDDEN***; pwd=***HIDDEN***;`,
`,
}, },
}, },
{ {
@@ -78,8 +77,7 @@ pwd=somethinggoeshere;`,
want: map[string]string{ want: map[string]string{
"data/datacollectorname": `abc 123 "data/datacollectorname": `abc 123
another***HIDDEN***here another***HIDDEN***here
pwd=***HIDDEN***; pwd=***HIDDEN***;`,
`,
}, },
}, },
{ {
@@ -112,8 +110,7 @@ pwd=somethinggoeshere;`,
want: map[string]string{ want: map[string]string{
"data/datacollectorname": `abc 123 "data/datacollectorname": `abc 123
another line here another line here
pwd=***HIDDEN***; pwd=***HIDDEN***;`,
`,
}, },
}, },
{ {
@@ -149,8 +146,7 @@ pwd=somethinggoeshere;`,
want: map[string]string{ want: map[string]string{
"data/datacollectorname": `abc 123 "data/datacollectorname": `abc 123
another***HIDDEN***here another***HIDDEN***here
pwd=***HIDDEN***; pwd=***HIDDEN***;`,
`,
}, },
}, },
{ {
@@ -186,8 +182,7 @@ pwd=somethinggoeshere;`,
want: map[string]string{ want: map[string]string{
"data/data/collectorname": `***HIDDEN*** ***HIDDEN*** "data/data/collectorname": `***HIDDEN*** ***HIDDEN***
***HIDDEN*** line here ***HIDDEN*** line here
pwd=***HIDDEN***; pwd=***HIDDEN***;`,
`,
}, },
}, },
{ {
@@ -213,8 +208,7 @@ another line here`,
}, },
want: map[string]string{ want: map[string]string{
"data/datacollectorname": `abc 123 "data/datacollectorname": `abc 123
another line here another line here`,
`,
}, },
}, },
{ {
@@ -249,8 +243,7 @@ abc`,
abc abc
123 123
xyz123 xyz123
abc abc`,
`,
}, },
}, },
{ {

104
pkg/redact/line_reader.go Normal file
View File

@@ -0,0 +1,104 @@
package redact
import (
"bufio"
"io"
"github.com/replicatedhq/troubleshoot/pkg/constants"
)
// LineReader reads lines from an io.Reader while tracking whether each line
// ended with a newline character. This is essential for preserving the exact
// structure of input files during redaction - binary files and text files
// without trailing newlines should not have newlines added to them.
//
// Unlike bufio.Scanner which strips newlines and requires the caller to add
// them back, LineReader explicitly tracks the presence of newlines so callers
// can conditionally restore them only when they were originally present.
type LineReader struct {
reader *bufio.Reader
}
// NewLineReader creates a new LineReader that reads from the given io.Reader.
// The reader is wrapped in a bufio.Reader for efficient byte-by-byte reading.
func NewLineReader(r io.Reader) *LineReader {
return &LineReader{
reader: bufio.NewReader(r),
}
}
// ReadLine reads the next line from the reader and returns:
// - line content (without the newline character if present)
// - whether the line ended with a newline (\n)
// - any error encountered
//
// Return values:
// - (content, true, nil) - line ended with \n, more content may follow
// - (content, false, io.EOF) - last line without \n (file doesn't end with newline)
// - (nil, false, io.EOF) - reached EOF with no content (empty file or end of file)
// - (content, false, error) - encountered a non-EOF error
//
// The function respects constants.SCANNER_MAX_SIZE and returns an error if a single
// line exceeds this limit. This prevents memory exhaustion on files with extremely
// long lines or binary files without newlines that are larger than the limit.
//
// Example usage:
//
// lr := NewLineReader(input)
// for {
// line, hadNewline, err := lr.ReadLine()
// if err == io.EOF && len(line) == 0 {
// break // End of file, no more content
// }
//
// // Process line...
// fmt.Print(string(line))
// if hadNewline {
// fmt.Print("\n")
// }
//
// if err == io.EOF {
// break // Last line processed
// }
// if err != nil {
// return err
// }
// }
func (lr *LineReader) ReadLine() ([]byte, bool, error) {
// Initialize line as empty slice (not nil) to ensure consistent return values
// Empty lines (just \n) should return []byte{}, not nil
line := []byte{}
for {
b, err := lr.reader.ReadByte()
// Handle errors
if err == io.EOF {
if len(line) > 0 {
// Last line without newline - return the content we have
return line, false, io.EOF
}
// Nothing left to read - empty file or end of content
return nil, false, io.EOF
}
if err != nil {
// Non-EOF error encountered
return line, false, err
}
// Found newline character
if b == '\n' {
// Return the line (may be empty for blank lines)
return line, true, nil
}
// Accumulate byte into line buffer
line = append(line, b)
// Check buffer limit to prevent memory exhaustion
// This is especially important for binary files without newlines
if len(line) > constants.SCANNER_MAX_SIZE {
return nil, false, bufio.ErrTooLong
}
}
}

View File

@@ -0,0 +1,302 @@
package redact
import (
"bufio"
"bytes"
"io"
"strings"
"testing"
"github.com/replicatedhq/troubleshoot/pkg/constants"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// Test 1.2 & 1.3: NewLineReader creates instance correctly
func TestNewLineReader(t *testing.T) {
input := strings.NewReader("test")
lr := NewLineReader(input)
require.NotNil(t, lr)
require.NotNil(t, lr.reader)
}
// Test 1.8: Empty file → (nil, false, io.EOF)
func TestLineReader_EmptyFile(t *testing.T) {
lr := NewLineReader(strings.NewReader(""))
line, hadNewline, err := lr.ReadLine()
assert.Nil(t, line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Test 1.9: Single line with \n → (content, true, nil)
func TestLineReader_SingleLineWithNewline(t *testing.T) {
lr := NewLineReader(strings.NewReader("hello world\n"))
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte("hello world"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Second read should return EOF
line, hadNewline, err = lr.ReadLine()
assert.Nil(t, line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Test 1.10: Single line without \n → (content, false, io.EOF)
func TestLineReader_SingleLineWithoutNewline(t *testing.T) {
lr := NewLineReader(strings.NewReader("hello world"))
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte("hello world"), line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Test 1.11: Multiple lines with \n → correct for each
func TestLineReader_MultipleLinesWithNewlines(t *testing.T) {
input := "line1\nline2\nline3\n"
lr := NewLineReader(strings.NewReader(input))
// First line
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte("line1"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Second line
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte("line2"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Third line
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte("line3"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// EOF
line, hadNewline, err = lr.ReadLine()
assert.Nil(t, line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Test 1.12: Last line without \n → (content, false, io.EOF)
func TestLineReader_LastLineWithoutNewline(t *testing.T) {
input := "line1\nline2\nline3"
lr := NewLineReader(strings.NewReader(input))
// First line
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte("line1"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Second line
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte("line2"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Third line (no trailing newline)
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte("line3"), line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Test 1.13: Binary data (no \n) → (all content, false, io.EOF)
func TestLineReader_BinaryData(t *testing.T) {
binaryData := []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0xFF, 0xFE}
lr := NewLineReader(bytes.NewReader(binaryData))
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, binaryData, line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Test 1.14: Line exceeding max size → error
func TestLineReader_LineExceedingMaxSize(t *testing.T) {
// Create a line that exceeds SCANNER_MAX_SIZE
largeData := make([]byte, constants.SCANNER_MAX_SIZE+100)
for i := range largeData {
largeData[i] = 'a'
}
lr := NewLineReader(bytes.NewReader(largeData))
line, hadNewline, err := lr.ReadLine()
assert.Nil(t, line)
assert.False(t, hadNewline)
assert.Error(t, err)
assert.ErrorIs(t, err, bufio.ErrTooLong)
}
// Test 1.15: File with only \n → ([], true, nil)
func TestLineReader_OnlyNewline(t *testing.T) {
lr := NewLineReader(strings.NewReader("\n"))
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte{}, line) // Empty line
assert.True(t, hadNewline)
assert.NoError(t, err)
// Second read should return EOF
line, hadNewline, err = lr.ReadLine()
assert.Nil(t, line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Additional test: File with empty lines (multiple newlines)
func TestLineReader_EmptyLines(t *testing.T) {
input := "\n\n\n"
lr := NewLineReader(strings.NewReader(input))
// First empty line
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte{}, line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Second empty line
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte{}, line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Third empty line
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte{}, line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// EOF
line, hadNewline, err = lr.ReadLine()
assert.Nil(t, line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Additional test: Mixed content with and without newlines
func TestLineReader_MixedContent(t *testing.T) {
input := "line1\n\nline3"
lr := NewLineReader(strings.NewReader(input))
// First line
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte("line1"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Empty line
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte{}, line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Last line without newline
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte("line3"), line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Additional test: Large but valid file (under max size)
func TestLineReader_LargeValidFile(t *testing.T) {
// Create a line that's large but under the limit
largeData := make([]byte, constants.SCANNER_MAX_SIZE-100)
for i := range largeData {
largeData[i] = 'x'
}
largeData = append(largeData, '\n')
lr := NewLineReader(bytes.NewReader(largeData))
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, constants.SCANNER_MAX_SIZE-100, len(line))
assert.True(t, hadNewline)
assert.NoError(t, err)
}
// Additional test: Binary file with embedded newlines
func TestLineReader_BinaryWithEmbeddedNewlines(t *testing.T) {
binaryData := []byte{0x01, 0x02, '\n', 0x03, 0x04, '\n', 0x05}
lr := NewLineReader(bytes.NewReader(binaryData))
// First "line" (up to first \n)
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte{0x01, 0x02}, line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Second "line"
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte{0x03, 0x04}, line)
assert.True(t, hadNewline)
assert.NoError(t, err)
// Last "line" without newline
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte{0x05}, line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Test edge case: Very small reads
func TestLineReader_SingleByteReads(t *testing.T) {
input := "a\nb\nc"
lr := NewLineReader(strings.NewReader(input))
line, hadNewline, err := lr.ReadLine()
assert.Equal(t, []byte("a"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte("b"), line)
assert.True(t, hadNewline)
assert.NoError(t, err)
line, hadNewline, err = lr.ReadLine()
assert.Equal(t, []byte("c"), line)
assert.False(t, hadNewline)
assert.Equal(t, io.EOF, err)
}
// Benchmark: LineReader vs bufio.Scanner performance
func BenchmarkLineReader(b *testing.B) {
// Create test data
var buf bytes.Buffer
for i := 0; i < 1000; i++ {
buf.WriteString("This is line number ")
buf.WriteString(string(rune(i)))
buf.WriteString(" with some content\n")
}
data := buf.Bytes()
b.ResetTimer()
for i := 0; i < b.N; i++ {
lr := NewLineReader(bytes.NewReader(data))
for {
_, _, err := lr.ReadLine()
if err == io.EOF {
break
}
}
}
}

View File

@@ -3,6 +3,7 @@ package redact
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"errors"
"fmt" "fmt"
"io" "io"
@@ -25,6 +26,12 @@ func literalString(match []byte, path, name string) Redactor {
} }
} }
// Redact processes the input reader line-by-line, replacing literal string matches.
// Unlike the previous implementation using bufio.Scanner, this now uses LineReader
// to preserve the exact newline structure of the input file. Lines that originally
// ended with \n will have \n added back, while lines without \n (like the last line
// of a file without a trailing newline, or binary files) will not have \n added.
// This ensures binary files and text files without trailing newlines are not corrupted.
func (r literalRedactor) Redact(input io.Reader, path string) io.Reader { func (r literalRedactor) Redact(input io.Reader, path string) io.Reader {
out, writer := io.Pipe() out, writer := io.Pipe()
@@ -34,7 +41,8 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader {
if err == nil || err == io.EOF { if err == nil || err == io.EOF {
writer.Close() writer.Close()
} else { } else {
if err == bufio.ErrTooLong { // Check if error is about line exceeding maximum size
if errors.Is(err, bufio.ErrTooLong) {
s := fmt.Sprintf("Error redacting %q. A line in the file exceeded %d MB max length", path, constants.SCANNER_MAX_SIZE/1024/1024) s := fmt.Sprintf("Error redacting %q. A line in the file exceeded %d MB max length", path, constants.SCANNER_MAX_SIZE/1024/1024)
klog.V(2).Info(s) klog.V(2).Info(s)
} else { } else {
@@ -44,17 +52,24 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader {
} }
}() }()
buf := make([]byte, constants.BUF_INIT_SIZE) // Use LineReader instead of bufio.Scanner to track newline presence
scanner := bufio.NewScanner(input) lineReader := NewLineReader(input)
scanner.Buffer(buf, constants.SCANNER_MAX_SIZE) tokenizer := GetGlobalTokenizer()
lineNum := 0 lineNum := 0
for scanner.Scan() {
lineNum++
line := scanner.Bytes()
for {
line, hadNewline, readErr := lineReader.ReadLine()
// Handle EOF with no content - we're done
if readErr == io.EOF && len(line) == 0 {
break
}
// We have content to process
lineNum++
// Perform literal string replacement
var clean []byte var clean []byte
tokenizer := GetGlobalTokenizer()
if tokenizer.IsEnabled() { if tokenizer.IsEnabled() {
// For literal redaction, we tokenize the matched value // For literal redaction, we tokenize the matched value
matchStr := string(r.match) matchStr := string(r.match)
@@ -66,12 +81,20 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader {
clean = bytes.ReplaceAll(line, r.match, maskTextBytes) clean = bytes.ReplaceAll(line, r.match, maskTextBytes)
} }
// Append newline since scanner strips it // Write the line (redacted or original)
err = writeBytes(writer, clean, NEW_LINE) err = writeBytes(writer, clean)
if err != nil { if err != nil {
return return
} }
// Only add newline if original line had one
if hadNewline {
err = writeBytes(writer, NEW_LINE)
if err != nil {
return
}
}
// Track redaction if content changed
if !bytes.Equal(clean, line) { if !bytes.Equal(clean, line) {
addRedaction(Redaction{ addRedaction(Redaction{
RedactorName: r.redactName, RedactorName: r.redactName,
@@ -81,9 +104,16 @@ func (r literalRedactor) Redact(input io.Reader, path string) io.Reader {
IsDefaultRedactor: r.isDefault, IsDefaultRedactor: r.isDefault,
}) })
} }
}
if scanErr := scanner.Err(); scanErr != nil { // Check if we hit EOF after processing this line
err = scanErr if readErr == io.EOF {
break
}
// Check for non-EOF errors
if readErr != nil {
err = readErr
return
}
} }
}() }()
return out return out

386
pkg/redact/literal_test.go Normal file
View File

@@ -0,0 +1,386 @@
package redact
import (
"bytes"
"io"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
// Test basic literal redaction functionality
func TestLiteralRedactor_BasicRedaction(t *testing.T) {
tests := []struct {
name string
match string
inputString string
wantString string
}{
{
name: "Simple literal match",
match: "secret123",
inputString: "password=secret123",
wantString: "password=***HIDDEN***", // No trailing newline in input
},
{
name: "Multiple occurrences",
match: "secret",
inputString: "secret is secret here secret",
wantString: "***HIDDEN*** is ***HIDDEN*** here ***HIDDEN***",
},
{
name: "No match",
match: "xyz",
inputString: "no match here",
wantString: "no match here",
},
{
name: "With trailing newline",
match: "secret",
inputString: "secret\n",
wantString: "***HIDDEN***\n",
},
{
name: "Multiline with newlines",
match: "secret",
inputString: "line1 secret\nline2 secret\n",
wantString: "line1 ***HIDDEN***\nline2 ***HIDDEN***\n",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
redactor := literalString([]byte(tt.match), "testfile", tt.name)
out := redactor.Redact(bytes.NewReader([]byte(tt.inputString)), "")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, tt.wantString, string(result))
})
}
}
// Test 4.12: Binary file → unchanged
func TestLiteralRedactor_BinaryFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Binary content with no newlines and no match
binaryData := []byte{0x01, 0x02, 0x03, 0x04, 0x00, 0xFF, 0xFE, 0xAB, 0xCD}
redactor := literalString([]byte("notfound"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file should be unchanged")
}
// Test: Binary file with every single byte value (0x00 -> 0xFF)
func TestLiteralRedactor_AllSingleByteValues(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create binary data with every possible byte value
binaryData := make([]byte, 256)
for i := 0; i < 256; i++ {
binaryData[i] = byte(i)
}
redactor := literalString([]byte("notfound"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file with all byte values should be unchanged")
require.Len(t, result, 256, "Should preserve all 256 bytes")
}
// Test: Binary file with every two-byte combination (0x00+0x00 -> 0xFF+0xFF)
func TestLiteralRedactor_AllTwoByteValues(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create binary data with all 65536 two-byte combinations (128KB)
binaryData := make([]byte, 256*256*2)
pos := 0
for i := 0; i < 256; i++ {
for j := 0; j < 256; j++ {
binaryData[pos] = byte(i)
binaryData[pos+1] = byte(j)
pos += 2
}
}
redactor := literalString([]byte("notfound"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file with all two-byte combinations should be unchanged")
require.Len(t, result, 256*256*2, "Should preserve all 131072 bytes")
}
// Test 4.12 (variant): Binary file with literal match → redacted, no extra newlines
func TestLiteralRedactor_BinaryFileWithMatch(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Binary content with a literal match (0xFF 0xFE sequence)
binaryData := []byte{0x01, 0x02, 0xFF, 0xFE, 0x03, 0x04}
redactor := literalString([]byte{0xFF, 0xFE}, "testfile", t.Name())
// We need to mock maskTextBytes for this test to work predictably
// For now, test that no newlines are added
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.NotEqual(t, binaryData, result, "Binary should be redacted")
require.NotContains(t, result, []byte{0xFF, 0xFE}, "Match should be replaced")
// Most importantly: no trailing newline added to binary file
require.NotEqual(t, byte('\n'), result[len(result)-1], "Should not add trailing newline")
}
// Test 4.13: Text with trailing \n → preserved
func TestLiteralRedactor_TextWithTrailingNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "hello world\n"
redactor := literalString([]byte("xyz"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "hello world\n", string(result), "Trailing newline should be preserved")
}
// Test 4.14: Text without trailing \n → preserved
func TestLiteralRedactor_TextWithoutTrailingNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "hello world"
redactor := literalString([]byte("xyz"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "hello world", string(result), "No newline should be added")
}
// Test 4.15: Empty file → unchanged
func TestLiteralRedactor_EmptyFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := ""
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "", string(result), "Empty file should remain empty")
}
// Test 4.16: Literal match and replacement works
func TestLiteralRedactor_LiteralMatch(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "password=secret123"
redactor := literalString([]byte("secret123"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "password=***HIDDEN***", string(result))
}
// Test 4.17: Multiple occurrences replaced
func TestLiteralRedactor_MultipleOccurrences(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "secret here and secret there and secret everywhere"
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "***HIDDEN*** here and ***HIDDEN*** there and ***HIDDEN*** everywhere", string(result))
}
// Test 4.17 (variant): Multiple occurrences across lines
func TestLiteralRedactor_MultipleOccurrencesMultiline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "line1 secret\nline2 secret\nline3 secret\n"
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := "line1 ***HIDDEN***\nline2 ***HIDDEN***\nline3 ***HIDDEN***\n"
require.Equal(t, expected, string(result))
}
// Test 4.18: Tokenization works
func TestLiteralRedactor_Tokenization(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Enable tokenization for this test
EnableTokenization()
defer DisableTokenization()
input := "password=secret123"
redactor := literalString([]byte("secret123"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
// Result should contain a token, not the original or ***HIDDEN***
require.NotContains(t, string(result), "secret123")
require.NotContains(t, string(result), "***HIDDEN***")
require.Contains(t, string(result), "password=")
}
// Test 4.19: Redaction count accurate
func TestLiteralRedactor_RedactionCount(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "secret here\nsecret there"
// Use unique redactor name and filename to avoid pollution from parallel tests
uniqueFile := "TestLiteralRedactor_RedactionCount_file"
uniqueRedactor := "TestLiteralRedactor_RedactionCount_redactor"
redactor := literalString([]byte("secret"), uniqueFile, uniqueRedactor)
out := redactor.Redact(bytes.NewReader([]byte(input)), "")
_, err := io.ReadAll(out)
require.NoError(t, err)
redactions := GetRedactionList()
// Two lines, each with one match = 2 redaction events
require.Len(t, redactions.ByRedactor[uniqueRedactor], 2, "Should record 2 redactions (one per line)")
require.Len(t, redactions.ByFile[uniqueFile], 2, "Should record 2 redactions for file")
}
// Test 4.20: Backward compatibility - existing behavior preserved for text with newlines
func TestLiteralRedactor_BackwardCompatibility(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "line1 secret\nline2 secret\nline3\n"
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := "line1 ***HIDDEN***\nline2 ***HIDDEN***\nline3\n"
require.Equal(t, expected, string(result), "Behavior for text with newlines should be unchanged")
}
// Test 4.20 (variant): Literal match on last line without \n
func TestLiteralRedactor_LastLineWithoutNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "line1\nline2 secret"
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := "line1\nline2 ***HIDDEN***"
require.Equal(t, expected, string(result), "Should not add newline to last line")
}
// Additional test: Empty line handling
func TestLiteralRedactor_EmptyLines(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "\n\n\n"
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "\n\n\n", string(result), "Empty lines should be preserved")
}
// Additional test: Large file with many matches
func TestLiteralRedactor_LargeFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create large file with many occurrences
var input strings.Builder
for i := 0; i < 1000; i++ {
input.WriteString("line ")
input.WriteString("secret")
input.WriteString(" here\n")
}
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(strings.NewReader(input.String()), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.NotContains(t, string(result), "secret", "All secrets should be redacted")
require.Contains(t, string(result), "***HIDDEN***")
}
// Additional test: Partial match should not be replaced
func TestLiteralRedactor_PartialMatchNotReplaced(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "secret secretive secrets"
// Should only replace exact literal "secret", not "secretive" or "secrets"
redactor := literalString([]byte("secret"), "testfile", t.Name())
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "***HIDDEN*** ***HIDDEN***ive ***HIDDEN***s", string(result))
}

View File

@@ -1,12 +1,18 @@
package redact package redact
import ( import (
"bufio"
"bytes" "bytes"
"io" "io"
"regexp" "regexp"
) )
// lineState represents a line and whether it ended with a newline character.
// This is used by MultiLineRedactor to track newline state for line pairs.
type lineState struct {
content []byte
hadNewline bool
}
type MultiLineRedactor struct { type MultiLineRedactor struct {
scan *regexp.Regexp scan *regexp.Regexp
re1 *regexp.Regexp re1 *regexp.Regexp
@@ -39,6 +45,16 @@ func NewMultiLineRedactor(re1 LineRedactor, re2 string, maskText, path, name str
return &MultiLineRedactor{scan: scanCompiled, re1: compiled1, re2: compiled2, maskText: maskText, filePath: path, redactName: name, isDefault: isDefault}, nil return &MultiLineRedactor{scan: scanCompiled, re1: compiled1, re2: compiled2, maskText: maskText, filePath: path, redactName: name, isDefault: isDefault}, nil
} }
// Redact processes the input reader in pairs of lines, applying redaction patterns.
// Unlike the previous implementation using bufio.Reader with readLine(), this now
// uses LineReader to preserve the exact newline structure of the input file.
//
// The MultiLineRedactor works by:
// 1. Reading pairs of lines (line1, line2)
// 2. If line1 matches the selector pattern (re1), redact line2 using re2
// 3. Write both lines with their original newline structure preserved
//
// This ensures binary files and text files without trailing newlines are not corrupted.
func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader { func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
out, writer := io.Pipe() out, writer := io.Pipe()
go func() { go func() {
@@ -48,51 +64,74 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
}() }()
tokenizer := GetGlobalTokenizer() tokenizer := GetGlobalTokenizer()
lineReader := NewLineReader(input)
reader := bufio.NewReader(input) // Try to read first two lines
line1, line2, err := getNextTwoLines(reader, nil) line1, nl1, line2, nl2, readErr := getNextTwoLines(lineReader, nil)
if err != nil {
// this will print 2 blank lines for empty input... // Handle case where we can't read 2 lines (empty file or single line)
// Append newlines since scanner strips them // Note: We check line1 == nil (not len(line1) == 0) because:
err = writeBytes(writer, line1, NEW_LINE, line2, NEW_LINE) // - nil means truly empty file with no content
if err != nil { // - []byte{} (len==0) means an empty line that had a newline (e.g., "\n")
return if readErr != nil && line1 == nil {
// Empty file - nothing to write
// Propagate non-EOF errors (EOF is expected for empty files)
if readErr != io.EOF {
err = readErr
} }
return return
} }
if readErr != nil {
// Only 1 line available (or empty line with newline) - write it and exit
// FIX: This is the bug fix - only add newline if original had one
// Also handles empty lines (line1 == []byte{} with nl1 == true)
err = writeLine(writer, line1, nl1)
if err != nil {
return
}
// Propagate non-EOF errors (EOF is expected for single-line files)
if readErr != io.EOF {
err = readErr
}
return
}
// Process line pairs
flushLastLine := false flushLastLine := false
lineNum := 1 lineNum := 1
for err == nil {
for readErr == nil {
lineNum++ // the first line that can be redacted is line 2 lineNum++ // the first line that can be redacted is line 2
// is scan is not nil, then check if line1 matches scan by lowercasing it // Pre-filter: if scan is not nil, check if line1 matches scan by lowercasing it
if r.scan != nil { if r.scan != nil {
lowerLine1 := bytes.ToLower(line1) lowerLine1 := bytes.ToLower(line1)
if !r.scan.Match(lowerLine1) { if !r.scan.Match(lowerLine1) {
// Append newline since scanner strips it // No match - write line1 and advance
err = writeBytes(writer, line1, NEW_LINE) err = writeLine(writer, line1, nl1)
if err != nil { if err != nil {
return return
} }
line1, line2, err = getNextTwoLines(reader, &line2) line1, nl1, line2, nl2, readErr = getNextTwoLines(lineReader, &lineState{line2, nl2})
flushLastLine = true flushLastLine = true
continue continue
} }
} }
// If line1 matches re1, then transform line2 using re2 // Check if line1 matches the selector pattern (re1)
if !r.re1.Match(line1) { if !r.re1.Match(line1) {
// Append newline since scanner strips it // No match - write line1 and advance
err = writeBytes(writer, line1, NEW_LINE) err = writeLine(writer, line1, nl1)
if err != nil { if err != nil {
return return
} }
line1, line2, err = getNextTwoLines(reader, &line2) line1, nl1, line2, nl2, readErr = getNextTwoLines(lineReader, &lineState{line2, nl2})
flushLastLine = true flushLastLine = true
continue continue
} }
// line1 matched selector - redact line2
flushLastLine = false flushLastLine = false
var clean []byte var clean []byte
if tokenizer.IsEnabled() { if tokenizer.IsEnabled() {
@@ -105,13 +144,17 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
clean = r.re2.ReplaceAll(line2, substStr) clean = r.re2.ReplaceAll(line2, substStr)
} }
// Append newlines since scanner strips them // Write line1 (selector line) and line2 (redacted line)
err = writeBytes(writer, line1, NEW_LINE, clean, NEW_LINE) err = writeLine(writer, line1, nl1)
if err != nil {
return
}
err = writeLine(writer, clean, nl2)
if err != nil { if err != nil {
return return
} }
// if clean is not equal to line2, a redaction was performed // Track redaction if content changed
if !bytes.Equal(clean, line2) { if !bytes.Equal(clean, line2) {
addRedaction(Redaction{ addRedaction(Redaction{
RedactorName: r.redactName, RedactorName: r.redactName,
@@ -122,42 +165,92 @@ func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
}) })
} }
line1, line2, err = getNextTwoLines(reader, nil) // Get next pair
line1, nl1, line2, nl2, readErr = getNextTwoLines(lineReader, nil)
} }
if flushLastLine { // After loop exits (readErr != nil), check if we have an unwritten line1
// Append newline since scanner strip it // This happens in two cases:
err = writeBytes(writer, line1, NEW_LINE) // 1. flushLastLine=true: line1 was advanced but not written (scan/re1 didn't match)
// 2. line1 != nil: we read line1 but couldn't get line2 (unpaired line at end)
// Note: We check line1 != nil (not len(line1) > 0) to handle empty lines ([]byte{})
if flushLastLine || line1 != nil {
err = writeLine(writer, line1, nl1)
if err != nil { if err != nil {
return return
} }
} }
// Propagate non-EOF read errors to the caller
// EOF is expected (end of file) and not an error condition
// Note: readErr is always non-nil here (loop exited), but we only propagate non-EOF errors
if readErr != io.EOF {
err = readErr
}
}() }()
return out return out
} }
func getNextTwoLines(reader *bufio.Reader, curLine2 *[]byte) (line1 []byte, line2 []byte, err error) { // getNextTwoLines reads the next pair of lines from the LineReader.
line2 = []byte{} // It returns the content and newline state for both lines.
//
// If curLine2 is provided, it's used as line1 (optimization for advancing through file).
// Otherwise, both lines are read fresh from the reader.
//
// Returns:
// - line1, hadNewline1: First line content and newline state
// - line2, hadNewline2: Second line content and newline state
// - err: Error only if we couldn't read line1, or if line2 read failed with non-EOF error
//
// Note: If line2 returns (content, false, io.EOF), we treat this as SUCCESS because
// we got the content. The EOF just means it didn't have a trailing newline.
func getNextTwoLines(lr *LineReader, curLine2 *lineState) (
line1 []byte, hadNewline1 bool,
line2 []byte, hadNewline2 bool,
err error,
) {
if curLine2 == nil { if curLine2 == nil {
line1, err = readLine(reader) // Read both lines fresh
line1, hadNewline1, err = lr.ReadLine()
if err != nil { if err != nil {
return return
} }
line2, err = readLine(reader) line2, hadNewline2, err = lr.ReadLine()
// If we got line2 content but hit EOF, that's OK - it just means no trailing newline
if err == io.EOF && len(line2) > 0 {
err = nil // Clear the error - we successfully read both lines
}
return return
} }
line1 = *curLine2 // Use cached line2 as new line1 (optimization)
line2, err = readLine(reader) line1 = curLine2.content
if err != nil { hadNewline1 = curLine2.hadNewline
return
}
// Read new line2
line2, hadNewline2, err = lr.ReadLine()
// If we got line2 content but hit EOF, that's OK - it just means no trailing newline
if err == io.EOF && len(line2) > 0 {
err = nil // Clear the error - we successfully read both lines
}
return return
} }
// writeLine writes a line to the writer, optionally adding a newline if hadNewline is true.
// This helper reduces code duplication in the Redact function.
func writeLine(w io.Writer, line []byte, hadNewline bool) error {
if err := writeBytes(w, line); err != nil {
return err
}
if hadNewline {
if err := writeBytes(w, NEW_LINE); err != nil {
return err
}
}
return nil
}
// writeBytes writes all byte slices to the writer // writeBytes writes all byte slices to the writer
// in the order they are passed in the variadic argument // in the order they are passed in the variadic argument
func writeBytes(w io.Writer, bs ...[]byte) error { func writeBytes(w io.Writer, bs ...[]byte) error {

View File

@@ -28,8 +28,7 @@ func Test_NewMultiLineRedactor(t *testing.T) {
inputString: `"name": "secret_access_key" inputString: `"name": "secret_access_key"
"value": "dfeadsfsdfe"`, "value": "dfeadsfsdfe"`,
wantString: `"name": "secret_access_key" wantString: `"name": "secret_access_key"
"value": "***HIDDEN***" "value": "***HIDDEN***"`, // No trailing newline in input, so none in output
`,
}, },
{ {
name: "Redact multiline with AWS secret id", name: "Redact multiline with AWS secret id",
@@ -40,8 +39,7 @@ func Test_NewMultiLineRedactor(t *testing.T) {
inputString: `"name": "ACCESS_KEY_ID" inputString: `"name": "ACCESS_KEY_ID"
"value": "dfeadsfsdfe"`, "value": "dfeadsfsdfe"`,
wantString: `"name": "ACCESS_KEY_ID" wantString: `"name": "ACCESS_KEY_ID"
"value": "***HIDDEN***" "value": "***HIDDEN***"`, // No trailing newline in input, so none in output
`,
}, },
{ {
name: "Redact multiline with OSD", name: "Redact multiline with OSD",
@@ -52,8 +50,7 @@ func Test_NewMultiLineRedactor(t *testing.T) {
inputString: `"entity": "osd.1abcdef" inputString: `"entity": "osd.1abcdef"
"key": "Gjt8s0WkfPtxZUo7gI8a0awbQGHgzuprdaedfb=="`, "key": "Gjt8s0WkfPtxZUo7gI8a0awbQGHgzuprdaedfb=="`,
wantString: `"entity": "osd.1abcdef" wantString: `"entity": "osd.1abcdef"
"key": "***HIDDEN***" "key": "***HIDDEN***"`, // No trailing newline in input, so none in output
`,
}, },
{ {
name: "Redact multiline with AWS secret access key and scan regex", name: "Redact multiline with AWS secret access key and scan regex",
@@ -65,8 +62,7 @@ func Test_NewMultiLineRedactor(t *testing.T) {
inputString: `"name": "secret_access_key" inputString: `"name": "secret_access_key"
"value": "dfeadsfsdfe"`, "value": "dfeadsfsdfe"`,
wantString: `"name": "secret_access_key" wantString: `"name": "secret_access_key"
"value": "***HIDDEN***" "value": "***HIDDEN***"`, // No trailing newline in input, so none in output
`,
}, },
{ {
name: "Redact multiline with AWS secret id and scan regex", name: "Redact multiline with AWS secret id and scan regex",
@@ -78,8 +74,7 @@ func Test_NewMultiLineRedactor(t *testing.T) {
inputString: `"name": "ACCESS_KEY_ID" inputString: `"name": "ACCESS_KEY_ID"
"value": "dfeadsfsdfe"`, "value": "dfeadsfsdfe"`,
wantString: `"name": "ACCESS_KEY_ID" wantString: `"name": "ACCESS_KEY_ID"
"value": "***HIDDEN***" "value": "***HIDDEN***"`, // No trailing newline in input, so none in output
`,
}, },
{ {
name: "Redact multiline with OSD and scan regex", name: "Redact multiline with OSD and scan regex",
@@ -91,8 +86,7 @@ func Test_NewMultiLineRedactor(t *testing.T) {
inputString: `"entity": "osd.1abcdef" inputString: `"entity": "osd.1abcdef"
"key": "Gjt8s0WkfPtxZUo7gI8a0awbQGHgzuprdaedfb=="`, "key": "Gjt8s0WkfPtxZUo7gI8a0awbQGHgzuprdaedfb=="`,
wantString: `"entity": "osd.1abcdef" wantString: `"entity": "osd.1abcdef"
"key": "***HIDDEN***" "key": "***HIDDEN***"`, // No trailing newline in input, so none in output
`,
}, },
{ {
name: "Multiple newlines with no match", name: "Multiple newlines with no match",
@@ -102,7 +96,7 @@ func Test_NewMultiLineRedactor(t *testing.T) {
}, },
redactor: `(?i)("value": *")(?P<mask>.*[^\"]*)(")`, redactor: `(?i)("value": *")(?P<mask>.*[^\"]*)(")`,
inputString: "no match\n\n no match \n\n", inputString: "no match\n\n no match \n\n",
wantString: "no match\n\n no match \n\n", wantString: "no match\n\n no match \n\n", // Input has trailing newline, should be preserved
}, },
} }
for _, tt := range tests { for _, tt := range tests {
@@ -158,3 +152,281 @@ func Test_writeBytes(t *testing.T) {
}) })
} }
} }
// Test 3.16: Binary file (no newlines) → unchanged
func TestMultiLineRedactor_BinaryFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Binary content with no newlines - the bug that caused 2 extra bytes
binaryData := []byte{0x01, 0x02, 0x03, 0x04, 0x00, 0xFF, 0xFE, 0xAB, 0xCD}
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `"name":`},
`"value":`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file should be unchanged (no extra newlines)")
}
// Test: Binary file with every single byte value (0x00 -> 0xFF)
func TestMultiLineRedactor_AllSingleByteValues(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create binary data with every possible byte value
binaryData := make([]byte, 256)
for i := 0; i < 256; i++ {
binaryData[i] = byte(i)
}
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `"name":`},
`"value":`,
MASK_TEXT, "testfile", t.Name(), false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file with all byte values should be unchanged")
require.Len(t, result, 256, "Should preserve all 256 bytes")
}
// Test: Binary file with every two-byte combination (0x00+0x00 -> 0xFF+0xFF)
func TestMultiLineRedactor_AllTwoByteValues(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create binary data with all 65,536 two-byte combinations (128KB)
binaryData := make([]byte, 256*256*2)
pos := 0
for i := 0; i < 256; i++ {
for j := 0; j < 256; j++ {
binaryData[pos] = byte(i)
binaryData[pos+1] = byte(j)
pos += 2
}
}
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `"name":`},
`"value":`,
MASK_TEXT, "testfile", t.Name(), false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file with all two-byte combinations should be unchanged")
require.Len(t, result, 256*256*2, "Should preserve all 131,072 bytes (64k combinations)")
}
// Test 3.17: Single line with \n → unchanged
func TestMultiLineRedactor_SingleLineWithNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "single line\n"
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `"name":`},
`"value":`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "single line\n", string(result))
}
// Test 3.18: Single line without \n → unchanged
func TestMultiLineRedactor_SingleLineWithoutNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "single line"
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `"name":`},
`"value":`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "single line", string(result), "No newline should be added")
}
// Test 3.19: Empty file → unchanged
func TestMultiLineRedactor_EmptyFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := ""
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `"name":`},
`"value":`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "", string(result), "Empty file should remain empty")
}
// Test 3.20: Two lines, matches selector → line2 redacted
func TestMultiLineRedactor_TwoLinesMatch(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := `"name": "PASSWORD"
"value": "secret123"`
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `(?i)"name": *"PASSWORD"`},
`(?i)("value": *")(?P<mask>[^"]*)(")`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := `"name": "PASSWORD"
"value": "***HIDDEN***"`
require.Equal(t, expected, string(result))
}
// Test 3.21: Two lines, no selector match → unchanged
func TestMultiLineRedactor_TwoLinesNoMatch(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := `"name": "USERNAME"
"value": "admin"`
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `(?i)"name": *"PASSWORD"`},
`(?i)("value": *")(?P<mask>[^"]*)(")`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := `"name": "USERNAME"
"value": "admin"`
require.Equal(t, expected, string(result))
}
// Test 3.22: Multiple line pairs → correct redactions
func TestMultiLineRedactor_MultiplePairs(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := `"name": "PASSWORD"
"value": "secret1"
"name": "TOKEN"
"value": "secret2"
"name": "USERNAME"
"value": "admin"
`
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `(?i)"name": *"(PASSWORD|TOKEN)"`},
`(?i)("value": *")(?P<mask>[^"]*)(")`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := `"name": "PASSWORD"
"value": "***HIDDEN***"
"name": "TOKEN"
"value": "***HIDDEN***"
"name": "USERNAME"
"value": "admin"
`
require.Equal(t, expected, string(result))
}
// Test 3.23: Three lines (pair + unpaired)
func TestMultiLineRedactor_ThreeLines(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := `"name": "PASSWORD"
"value": "secret"
unpaired line`
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `(?i)"name": *"PASSWORD"`},
`(?i)("value": *")(?P<mask>[^"]*)(")`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := `"name": "PASSWORD"
"value": "***HIDDEN***"
unpaired line`
require.Equal(t, expected, string(result))
}
// Test 3.24: Large file with selector matches
func TestMultiLineRedactor_LargeFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
var input strings.Builder
for i := 0; i < 1000; i++ {
input.WriteString(`"name": "PASSWORD"` + "\n")
input.WriteString(`"value": "secret"` + "\n")
}
redactor, err := NewMultiLineRedactor(
LineRedactor{regex: `(?i)"name": *"PASSWORD"`},
`(?i)("value": *")(?P<mask>[^"]*)(")`,
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(strings.NewReader(input.String()), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
// Verify all secrets were redacted
require.NotContains(t, string(result), `"value": "secret"`)
require.Contains(t, string(result), `"value": "***HIDDEN***"`)
}

View File

@@ -104,6 +104,10 @@ func GetRedactionList() RedactionList {
} }
func ResetRedactionList() { func ResetRedactionList() {
// Wait for all pending redaction goroutines to complete before resetting
// This prevents race conditions where goroutines write to the map after reset
pendingRedactions.Wait()
redactionListMut.Lock() redactionListMut.Lock()
defer redactionListMut.Unlock() defer redactionListMut.Unlock()
allRedactions = RedactionList{ allRedactions = RedactionList{

View File

@@ -1724,6 +1724,7 @@ func Test_Redactors(t *testing.T) {
t.Run("test default redactors", func(t *testing.T) { t.Run("test default redactors", func(t *testing.T) {
req := require.New(t) req := require.New(t)
ResetRedactionList() // Ensure clean state before test
redactors, err := getRedactors("testpath") redactors, err := getRedactors("testpath")
req.NoError(err) req.NoError(err)

View File

@@ -3,6 +3,7 @@ package redact
import ( import (
"bufio" "bufio"
"bytes" "bytes"
"errors"
"fmt" "fmt"
"io" "io"
"regexp" "regexp"
@@ -39,6 +40,13 @@ func NewSingleLineRedactor(re LineRedactor, maskText, path, name string, isDefau
return &SingleLineRedactor{scan: scanCompiled, re: compiled, maskText: maskText, filePath: path, redactName: name, isDefault: isDefault}, nil return &SingleLineRedactor{scan: scanCompiled, re: compiled, maskText: maskText, filePath: path, redactName: name, isDefault: isDefault}, nil
} }
// Redact processes the input reader line-by-line, applying redaction patterns.
// Unlike the previous implementation using bufio.Scanner, this now uses LineReader
// to preserve the exact newline structure of the input file. Lines that originally
// ended with \n will have \n added back, while lines without \n (like the last line
// of a file without a trailing newline, or binary files) will not have \n added.
// This ensures binary files and text files without trailing newlines are not corrupted.
func (r *SingleLineRedactor) Redact(input io.Reader, path string) io.Reader { func (r *SingleLineRedactor) Redact(input io.Reader, path string) io.Reader {
out, writer := io.Pipe() out, writer := io.Pipe()
@@ -48,7 +56,8 @@ func (r *SingleLineRedactor) Redact(input io.Reader, path string) io.Reader {
if err == nil || err == io.EOF { if err == nil || err == io.EOF {
writer.Close() writer.Close()
} else { } else {
if err == bufio.ErrTooLong { // Check if error is about line exceeding maximum size
if errors.Is(err, bufio.ErrTooLong) {
s := fmt.Sprintf("Error redacting %q. A line in the file exceeded %d MB max length", path, constants.SCANNER_MAX_SIZE/1024/1024) s := fmt.Sprintf("Error redacting %q. A line in the file exceeded %d MB max length", path, constants.SCANNER_MAX_SIZE/1024/1024)
klog.V(2).Info(s) klog.V(2).Info(s)
} else { } else {
@@ -58,68 +67,89 @@ func (r *SingleLineRedactor) Redact(input io.Reader, path string) io.Reader {
} }
}() }()
buf := make([]byte, constants.BUF_INIT_SIZE) // Use LineReader instead of bufio.Scanner to track newline presence
scanner := bufio.NewScanner(input) lineReader := NewLineReader(input)
scanner.Buffer(buf, constants.SCANNER_MAX_SIZE)
tokenizer := GetGlobalTokenizer() tokenizer := GetGlobalTokenizer()
lineNum := 0 lineNum := 0
for scanner.Scan() {
lineNum++
line := scanner.Bytes()
// is scan is not nil, then check if line matches scan by lowercasing it for {
line, hadNewline, readErr := lineReader.ReadLine()
// Handle EOF with no content - we're done
if readErr == io.EOF && len(line) == 0 {
break
}
// We have content to process
lineNum++
// Determine if we should redact this line
shouldRedact := true
// Pre-filter: if scan is not nil, check if line matches scan by lowercasing it
if r.scan != nil { if r.scan != nil {
lowerLine := bytes.ToLower(line) lowerLine := bytes.ToLower(line)
if !r.scan.Match(lowerLine) { if !r.scan.Match(lowerLine) {
// Append newline since scanner strips it shouldRedact = false
err = writeBytes(writer, line, NEW_LINE)
if err != nil {
return
}
continue
} }
} }
// if scan matches, but re does not, do not redact // Check if line matches the main redaction pattern
if !r.re.Match(line) { if shouldRedact && !r.re.Match(line) {
// Append newline since scanner strips it shouldRedact = false
err = writeBytes(writer, line, NEW_LINE)
if err != nil {
return
}
continue
} }
var clean []byte // Process the line (redact or pass through)
if tokenizer.IsEnabled() { var outputLine []byte
// Use tokenized replacement - context comes from the redactor name which often indicates the secret type if shouldRedact {
context := r.redactName // Line matches - perform redaction
clean = getTokenizedReplacementPatternWithPath(r.re, line, context, r.filePath) if tokenizer.IsEnabled() {
// Use tokenized replacement - context comes from the redactor name
context := r.redactName
outputLine = getTokenizedReplacementPatternWithPath(r.re, line, context, r.filePath)
} else {
// Use original masking behavior
substStr := []byte(getReplacementPattern(r.re, r.maskText))
outputLine = r.re.ReplaceAll(line, substStr)
}
// Track redaction if content changed
if !bytes.Equal(outputLine, line) {
addRedaction(Redaction{
RedactorName: r.redactName,
CharactersRemoved: len(line) - len(outputLine),
Line: lineNum,
File: r.filePath,
IsDefaultRedactor: r.isDefault,
})
}
} else { } else {
// Use original masking behavior // No match - use original line
substStr := []byte(getReplacementPattern(r.re, r.maskText)) outputLine = line
clean = r.re.ReplaceAll(line, substStr)
} }
// Append newline since scanner strips it
err = writeBytes(writer, clean, NEW_LINE) // Write the line
err = writeBytes(writer, outputLine)
if err != nil { if err != nil {
return return
} }
// Only add newline if original line had one
// if clean is not equal to line, a redaction was performed if hadNewline {
if !bytes.Equal(clean, line) { err = writeBytes(writer, NEW_LINE)
addRedaction(Redaction{ if err != nil {
RedactorName: r.redactName, return
CharactersRemoved: len(line) - len(clean), }
Line: lineNum, }
File: r.filePath,
IsDefaultRedactor: r.isDefault, // Check if we hit EOF after processing this line
}) if readErr == io.EOF {
break
}
// Check for non-EOF errors
if readErr != nil {
err = readErr
return
} }
}
if scanErr := scanner.Err(); scanErr != nil {
err = scanErr
} }
}() }()
return out return out

View File

@@ -21,7 +21,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
name: "copied from default redactors", name: "copied from default redactors",
re: `(?i)(Pwd *= *)(?P<mask>[^\;]+)(;)`, re: `(?i)(Pwd *= *)(?P<mask>[^\;]+)(;)`,
inputString: `pwd = abcdef;`, inputString: `pwd = abcdef;`,
wantString: "pwd = ***HIDDEN***;\n", wantString: "pwd = ***HIDDEN***;", // No trailing newline in input, so none in output
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"copied from default redactors": []Redaction{ "copied from default redactors": []Redaction{
@@ -49,7 +49,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
name: "no leading matching group", // this is not the ideal behavior - why are we dropping ungrouped match components? name: "no leading matching group", // this is not the ideal behavior - why are we dropping ungrouped match components?
re: `(?i)Pwd *= *(?P<mask>[^\;]+)(;)`, re: `(?i)Pwd *= *(?P<mask>[^\;]+)(;)`,
inputString: `pwd = abcdef;`, inputString: `pwd = abcdef;`,
wantString: "***HIDDEN***;\n", wantString: "***HIDDEN***;", // No trailing newline in input, so none in output
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"no leading matching group": []Redaction{ "no leading matching group": []Redaction{
@@ -77,7 +77,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
name: "multiple matching literals", name: "multiple matching literals",
re: `(?i)(Pwd *= *)(?P<mask>[^\;]+)(;)`, re: `(?i)(Pwd *= *)(?P<mask>[^\;]+)(;)`,
inputString: `pwd = abcdef;abcdef`, inputString: `pwd = abcdef;abcdef`,
wantString: "pwd = ***HIDDEN***;abcdef\n", wantString: "pwd = ***HIDDEN***;abcdef", // No trailing newline in input, so none in output
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"multiple matching literals": []Redaction{ "multiple matching literals": []Redaction{
@@ -105,8 +105,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
name: "Redact values for environment variables that look like AWS Secret Access Keys", name: "Redact values for environment variables that look like AWS Secret Access Keys",
re: `(?i)("name":"[^\"]*SECRET_?ACCESS_?KEY","value":")(?P<mask>[^\"]*)(")`, re: `(?i)("name":"[^\"]*SECRET_?ACCESS_?KEY","value":")(?P<mask>[^\"]*)(")`,
inputString: `{"name":"SECRET_ACCESS_KEY","value":"123"}`, inputString: `{"name":"SECRET_ACCESS_KEY","value":"123"}`,
wantString: `{"name":"SECRET_ACCESS_KEY","value":"***HIDDEN***"} wantString: `{"name":"SECRET_ACCESS_KEY","value":"***HIDDEN***"}`, // No trailing newline in input, so none in output
`,
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"Redact values for environment variables that look like AWS Secret Access Keys": []Redaction{ "Redact values for environment variables that look like AWS Secret Access Keys": []Redaction{
@@ -134,7 +133,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
name: "Redact connection strings with username and password", name: "Redact connection strings with username and password",
re: `(?i)(https?|ftp)(:\/\/)(?P<mask>[^:\"\/]+){1}(:)(?P<mask>[^@\"\/]+){1}(?P<host>@[^:\/\s\"]+){1}(?P<port>:[\d]+)?`, re: `(?i)(https?|ftp)(:\/\/)(?P<mask>[^:\"\/]+){1}(:)(?P<mask>[^@\"\/]+){1}(?P<host>@[^:\/\s\"]+){1}(?P<port>:[\d]+)?`,
inputString: `http://user:password@host:8888`, inputString: `http://user:password@host:8888`,
wantString: "http://***HIDDEN***:***HIDDEN***@host:8888\n", wantString: "http://***HIDDEN***:***HIDDEN***@host:8888", // No trailing newline in input, so none in output
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"Redact connection strings with username and password": []Redaction{ "Redact connection strings with username and password": []Redaction{
@@ -163,8 +162,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
re: `(?i)("name":"[^\"]*SECRET_?ACCESS_?KEY","value":")(?P<mask>[^\"]*)(")`, re: `(?i)("name":"[^\"]*SECRET_?ACCESS_?KEY","value":")(?P<mask>[^\"]*)(")`,
scan: `secret_?access_?key`, scan: `secret_?access_?key`,
inputString: `{"name":"SECRET_ACCESS_KEY","value":"123"}`, inputString: `{"name":"SECRET_ACCESS_KEY","value":"123"}`,
wantString: `{"name":"SECRET_ACCESS_KEY","value":"***HIDDEN***"} wantString: `{"name":"SECRET_ACCESS_KEY","value":"***HIDDEN***"}`, // No trailing newline in input, so none in output
`,
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"Redact values for environment variables that look like AWS Secret Access Keys With Scan": { "Redact values for environment variables that look like AWS Secret Access Keys With Scan": {
@@ -193,8 +191,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
re: `(?i)("name":"[^\"]*ACCESS_?KEY_?ID","value":")(?P<mask>[^\"]*)(")`, re: `(?i)("name":"[^\"]*ACCESS_?KEY_?ID","value":")(?P<mask>[^\"]*)(")`,
scan: `access_?key_?id`, scan: `access_?key_?id`,
inputString: `{"name":"ACCESS_KEY_ID","value":"123"}`, inputString: `{"name":"ACCESS_KEY_ID","value":"123"}`,
wantString: `{"name":"ACCESS_KEY_ID","value":"***HIDDEN***"} wantString: `{"name":"ACCESS_KEY_ID","value":"***HIDDEN***"}`, // No trailing newline in input, so none in output
`,
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"Redact values for environment variables that look like Access Keys ID With Scan": { "Redact values for environment variables that look like Access Keys ID With Scan": {
@@ -223,8 +220,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
re: `(?i)("name":"[^\"]*OWNER_?ACCOUNT","value":")(?P<mask>[^\"]*)(")`, re: `(?i)("name":"[^\"]*OWNER_?ACCOUNT","value":")(?P<mask>[^\"]*)(")`,
scan: `owner_?account`, scan: `owner_?account`,
inputString: `{"name":"OWNER_ACCOUNT","value":"123"}`, inputString: `{"name":"OWNER_ACCOUNT","value":"123"}`,
wantString: `{"name":"OWNER_ACCOUNT","value":"***HIDDEN***"} wantString: `{"name":"OWNER_ACCOUNT","value":"***HIDDEN***"}`, // No trailing newline in input, so none in output
`,
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"Redact values for environment variables that look like Owner Account With Scan": { "Redact values for environment variables that look like Owner Account With Scan": {
@@ -253,8 +249,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
re: `(?i)(Data Source *= *)(?P<mask>[^\;]+)(;)`, re: `(?i)(Data Source *= *)(?P<mask>[^\;]+)(;)`,
scan: `data source`, scan: `data source`,
inputString: `Data Source = abcdef;`, inputString: `Data Source = abcdef;`,
wantString: `Data Source = ***HIDDEN***; wantString: `Data Source = ***HIDDEN***;`, // No trailing newline in input, so none in output
`,
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"Redact 'Data Source' values With Scan": { "Redact 'Data Source' values With Scan": {
@@ -283,8 +278,7 @@ func TestNewSingleLineRedactor(t *testing.T) {
re: `(?i)(https?|ftp)(:\/\/)(?P<mask>[^:\"\/]+){1}(:)(?P<mask>[^@\"\/]+){1}(?P<host>@[^:\/\s\"]+){1}(?P<port>:[\d]+)?`, re: `(?i)(https?|ftp)(:\/\/)(?P<mask>[^:\"\/]+){1}(:)(?P<mask>[^@\"\/]+){1}(?P<host>@[^:\/\s\"]+){1}(?P<port>:[\d]+)?`,
scan: `https?|ftp`, scan: `https?|ftp`,
inputString: `http://user:password@host:8888;`, inputString: `http://user:password@host:8888;`,
wantString: `http://***HIDDEN***:***HIDDEN***@host:8888; wantString: `http://***HIDDEN***:***HIDDEN***@host:8888;`, // No trailing newline in input, so none in output
`,
wantRedactions: RedactionList{ wantRedactions: RedactionList{
ByRedactor: map[string][]Redaction{ ByRedactor: map[string][]Redaction{
"Redact connection strings With Scan": { "Redact connection strings With Scan": {
@@ -340,3 +334,318 @@ func TestNewSingleLineRedactor(t *testing.T) {
}) })
} }
} }
// Test 2.15: Binary file (no newlines) → unchanged
func TestSingleLineRedactor_BinaryFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Binary content with no newlines
binaryData := []byte{0x01, 0x02, 0x03, 0x04, 0x00, 0xFF, 0xFE, 0xAB, 0xCD}
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: "password"}, // Pattern that won't match binary
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file should be unchanged")
}
// Test: Binary file with every single byte value (0x00 -> 0xFF)
func TestSingleLineRedactor_AllSingleByteValues(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create binary data with every possible byte value
binaryData := make([]byte, 256)
for i := 0; i < 256; i++ {
binaryData[i] = byte(i)
}
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: `secret`},
MASK_TEXT, "testfile", t.Name(), false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file with all byte values should be unchanged")
require.Len(t, result, 256, "Should preserve all 256 bytes")
}
// Test: Binary file with every two-byte combination (0x00+0x00 -> 0xFF+0xFF)
func TestSingleLineRedactor_AllTwoByteValues(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create binary data with all 65,536 two-byte combinations (128KB)
binaryData := make([]byte, 256*256*2)
pos := 0
for i := 0; i < 256; i++ {
for j := 0; j < 256; j++ {
binaryData[pos] = byte(i)
binaryData[pos+1] = byte(j)
pos += 2
}
}
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: `secret`},
MASK_TEXT, "testfile", t.Name(), false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(binaryData), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, binaryData, result, "Binary file with all two-byte combinations should be unchanged")
require.Len(t, result, 256*256*2, "Should preserve all 131,072 bytes (64k combinations)")
}
// Test 2.16: Text file with \n → preserved
func TestSingleLineRedactor_TextFileWithNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "hello world\n"
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: "xyz"}, // No match
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "hello world\n", string(result), "Trailing newline should be preserved")
}
// Test 2.17: Text file without \n → preserved
func TestSingleLineRedactor_TextFileWithoutNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "hello world"
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: "xyz"}, // No match
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "hello world", string(result), "No newline should be added")
}
// Test 2.18: Empty file → unchanged
func TestSingleLineRedactor_EmptyFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := ""
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: "password"},
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "", string(result), "Empty file should remain empty")
}
// Test 2.19: Single line with secret → redacted correctly
func TestSingleLineRedactor_SingleLineWithSecret(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "password=secret123"
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: `(?i)(password=)(?P<mask>.*)`},
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "password=***HIDDEN***", string(result))
}
// Test 2.20: Multiple lines with secrets → all redacted
func TestSingleLineRedactor_MultipleLinesWithSecrets(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "password=secret1\npassword=secret2\npassword=secret3\n"
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: `(?i)(password=)(?P<mask>.*)`},
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := "password=***HIDDEN***\npassword=***HIDDEN***\npassword=***HIDDEN***\n"
require.Equal(t, expected, string(result))
}
// Test 2.21: Scan pattern filters correctly
func TestSingleLineRedactor_ScanPatternFilters(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "password=secret\nusername=admin\n"
redactor, err := NewSingleLineRedactor(
LineRedactor{
regex: `(?i)(password=)(?P<mask>.*)`,
scan: `password`, // Only process lines containing "password"
},
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
expected := "password=***HIDDEN***\nusername=admin\n"
require.Equal(t, expected, string(result))
}
// Test 2.22: File with only one newline \n → one newline out
func TestSingleLineRedactor_OnlyNewline(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
input := "\n"
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: "password"},
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, "\n", string(result))
}
// Test 2.23: Mixed binary/text content
func TestSingleLineRedactor_MixedContent(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Binary data with embedded newline
input := []byte{0x01, 0x02, '\n', 0x03, 0x04}
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: "password"},
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(input), "test.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, input, result, "Mixed content should be preserved")
}
// Test: Windows CRLF (\r\n) line endings preserved
func TestSingleLineRedactor_WindowsLineEndings(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Windows-style line endings with no secrets
input := "line1\r\nline2\r\nline3\r\n"
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: `secret`},
MASK_TEXT, "testfile", t.Name(), false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
// Windows line endings should be preserved exactly
require.Equal(t, "line1\r\nline2\r\nline3\r\n", string(result),
"Windows CRLF line endings should be preserved, not converted to LF")
}
// Test: Windows CRLF with redaction
func TestSingleLineRedactor_WindowsLineEndingsWithRedaction(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Windows-style line endings with a secret
// Note: LineReader splits on \n, so line content includes \r
input := "password=secret123\r\nusername=admin\r\n"
// Use a regex that doesn't capture the \r
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: `(password=)(?P<mask>[^\r\n]+)`},
MASK_TEXT, "testfile", t.Name(), false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader([]byte(input)), "test.txt")
result, err := io.ReadAll(out)
require.NoError(t, err)
// \r\n should be preserved: regex doesn't capture \r, so it stays in output
expected := "password=***HIDDEN***\r\nusername=admin\r\n"
require.Equal(t, expected, string(result),
"Windows CRLF should be preserved - \\r not captured by regex, \\n added by LineReader")
}
// Test 2.24: Large binary file (1MB, no newlines) → preserved
func TestSingleLineRedactor_LargeBinaryFile(t *testing.T) {
ResetRedactionList()
defer ResetRedactionList()
// Create 1MB of binary data
largeData := make([]byte, 1024*1024)
for i := range largeData {
largeData[i] = byte(i % 256)
}
redactor, err := NewSingleLineRedactor(
LineRedactor{regex: "password"},
MASK_TEXT, "testfile", "test", false,
)
require.NoError(t, err)
out := redactor.Redact(bytes.NewReader(largeData), "large.bin")
result, err := io.ReadAll(out)
require.NoError(t, err)
require.Equal(t, largeData, result, "Large binary file should be unchanged")
}

View File

@@ -1,7 +1,7 @@
{ {
"updated_at": "2025-11-27T05:43:03Z", "updated_at": "2025-12-10T17:29:30Z",
"git_sha": "280a582f4f5d1dcb242dc9e4441ba1797b0209c7", "git_sha": "7d73318d1ea65a72cfe2ffa7ea52940cf4b98f18",
"workflow_run_id": "19726392590", "workflow_run_id": "20107431959",
"k8s_version": "v1.31.2-k3s1", "k8s_version": "v1.31.2-k3s1",
"updated_by": "Ethan Mosbaugh <ethan@replicated.com>" "updated_by": "hedge-sparrow <sparrow@spooky.academy>"
} }