mirror of
https://github.com/replicatedhq/troubleshoot.git
synced 2026-02-14 10:19:54 +00:00
* fix: prevent redactors from corrupting binary files (#102) Redactors were adding newlines to files without them, corrupting binary files during support bundle collection (51 bytes → 53 bytes). Created LineReader to track original newline state and only restore newlines when they were present in the original file. - Added pkg/redact/line_reader.go - Refactored single_line.go, multi_line.go, literal.go - Added 48 tests, all passing - Verified: binary files now preserved byte-for-byte Fixes #102 * fix: handle empty lines correctly in MultiLineRedactor - Check line1 == nil instead of len(line1) == 0 for empty file detection - Fixes edge case where file containing only '\n' would be dropped - Addresses bugbot finding about empty line handling * fix: handle empty lines correctly in MultiLineRedactor - Check line1 != nil instead of len(line1) > 0 in both locations - Fixes edge case where empty trailing lines would be dropped - Fix test isolation in literal_test.go (move ResetRedactionList to parent) - Addresses bugbot findings about empty line handling * fmt * chore: update regression baselines from run 20107431959 * adding defense * fix: propagate non-EOF errors in all early return paths Ensure non-EOF errors (like buffer overflow) are properly propagated to caller in both pre-loop early returns. Addresses bugbot finding. * fix: use unique test names to prevent redaction list pollution Use t.Name() instead of hardcoded 'test' to ensure each test has unique redactor name, preventing parallel test interference --------- Co-authored-by: hedge-sparrow <sparrow@spooky.academy>
265 lines
7.7 KiB
Go
265 lines
7.7 KiB
Go
package redact
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"regexp"
|
|
)
|
|
|
|
// lineState represents a line and whether it ended with a newline character.
|
|
// This is used by MultiLineRedactor to track newline state for line pairs.
|
|
type lineState struct {
|
|
content []byte
|
|
hadNewline bool
|
|
}
|
|
|
|
type MultiLineRedactor struct {
|
|
scan *regexp.Regexp
|
|
re1 *regexp.Regexp
|
|
re2 *regexp.Regexp
|
|
maskText string
|
|
filePath string
|
|
redactName string
|
|
isDefault bool
|
|
}
|
|
|
|
func NewMultiLineRedactor(re1 LineRedactor, re2 string, maskText, path, name string, isDefault bool) (*MultiLineRedactor, error) {
|
|
var scanCompiled *regexp.Regexp
|
|
compiled1, err := compileRegex(re1.regex)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if re1.scan != "" {
|
|
scanCompiled, err = compileRegex(re1.scan)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
compiled2, err := compileRegex(re2)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &MultiLineRedactor{scan: scanCompiled, re1: compiled1, re2: compiled2, maskText: maskText, filePath: path, redactName: name, isDefault: isDefault}, nil
|
|
}
|
|
|
|
// Redact processes the input reader in pairs of lines, applying redaction patterns.
|
|
// Unlike the previous implementation using bufio.Reader with readLine(), this now
|
|
// uses LineReader to preserve the exact newline structure of the input file.
|
|
//
|
|
// The MultiLineRedactor works by:
|
|
// 1. Reading pairs of lines (line1, line2)
|
|
// 2. If line1 matches the selector pattern (re1), redact line2 using re2
|
|
// 3. Write both lines with their original newline structure preserved
|
|
//
|
|
// This ensures binary files and text files without trailing newlines are not corrupted.
|
|
func (r *MultiLineRedactor) Redact(input io.Reader, path string) io.Reader {
|
|
out, writer := io.Pipe()
|
|
go func() {
|
|
var err error
|
|
defer func() {
|
|
writer.CloseWithError(err)
|
|
}()
|
|
|
|
tokenizer := GetGlobalTokenizer()
|
|
lineReader := NewLineReader(input)
|
|
|
|
// Try to read first two lines
|
|
line1, nl1, line2, nl2, readErr := getNextTwoLines(lineReader, nil)
|
|
|
|
// Handle case where we can't read 2 lines (empty file or single line)
|
|
// Note: We check line1 == nil (not len(line1) == 0) because:
|
|
// - nil means truly empty file with no content
|
|
// - []byte{} (len==0) means an empty line that had a newline (e.g., "\n")
|
|
if readErr != nil && line1 == nil {
|
|
// Empty file - nothing to write
|
|
// Propagate non-EOF errors (EOF is expected for empty files)
|
|
if readErr != io.EOF {
|
|
err = readErr
|
|
}
|
|
return
|
|
}
|
|
|
|
if readErr != nil {
|
|
// Only 1 line available (or empty line with newline) - write it and exit
|
|
// FIX: This is the bug fix - only add newline if original had one
|
|
// Also handles empty lines (line1 == []byte{} with nl1 == true)
|
|
err = writeLine(writer, line1, nl1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
// Propagate non-EOF errors (EOF is expected for single-line files)
|
|
if readErr != io.EOF {
|
|
err = readErr
|
|
}
|
|
return
|
|
}
|
|
|
|
// Process line pairs
|
|
flushLastLine := false
|
|
lineNum := 1
|
|
|
|
for readErr == nil {
|
|
lineNum++ // the first line that can be redacted is line 2
|
|
|
|
// Pre-filter: if scan is not nil, check if line1 matches scan by lowercasing it
|
|
if r.scan != nil {
|
|
lowerLine1 := bytes.ToLower(line1)
|
|
if !r.scan.Match(lowerLine1) {
|
|
// No match - write line1 and advance
|
|
err = writeLine(writer, line1, nl1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
line1, nl1, line2, nl2, readErr = getNextTwoLines(lineReader, &lineState{line2, nl2})
|
|
flushLastLine = true
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Check if line1 matches the selector pattern (re1)
|
|
if !r.re1.Match(line1) {
|
|
// No match - write line1 and advance
|
|
err = writeLine(writer, line1, nl1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
line1, nl1, line2, nl2, readErr = getNextTwoLines(lineReader, &lineState{line2, nl2})
|
|
flushLastLine = true
|
|
continue
|
|
}
|
|
|
|
// line1 matched selector - redact line2
|
|
flushLastLine = false
|
|
var clean []byte
|
|
if tokenizer.IsEnabled() {
|
|
// Use tokenized replacement for line2 based on line1 context
|
|
context := r.redactName
|
|
clean = getTokenizedReplacementPatternWithPath(r.re2, line2, context, r.filePath)
|
|
} else {
|
|
// Use original masking behavior
|
|
substStr := []byte(getReplacementPattern(r.re2, r.maskText))
|
|
clean = r.re2.ReplaceAll(line2, substStr)
|
|
}
|
|
|
|
// Write line1 (selector line) and line2 (redacted line)
|
|
err = writeLine(writer, line1, nl1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
err = writeLine(writer, clean, nl2)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// Track redaction if content changed
|
|
if !bytes.Equal(clean, line2) {
|
|
addRedaction(Redaction{
|
|
RedactorName: r.redactName,
|
|
CharactersRemoved: len(line2) - len(clean),
|
|
Line: lineNum,
|
|
File: r.filePath,
|
|
IsDefaultRedactor: r.isDefault,
|
|
})
|
|
}
|
|
|
|
// Get next pair
|
|
line1, nl1, line2, nl2, readErr = getNextTwoLines(lineReader, nil)
|
|
}
|
|
|
|
// After loop exits (readErr != nil), check if we have an unwritten line1
|
|
// This happens in two cases:
|
|
// 1. flushLastLine=true: line1 was advanced but not written (scan/re1 didn't match)
|
|
// 2. line1 != nil: we read line1 but couldn't get line2 (unpaired line at end)
|
|
// Note: We check line1 != nil (not len(line1) > 0) to handle empty lines ([]byte{})
|
|
if flushLastLine || line1 != nil {
|
|
err = writeLine(writer, line1, nl1)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
// Propagate non-EOF read errors to the caller
|
|
// EOF is expected (end of file) and not an error condition
|
|
// Note: readErr is always non-nil here (loop exited), but we only propagate non-EOF errors
|
|
if readErr != io.EOF {
|
|
err = readErr
|
|
}
|
|
}()
|
|
return out
|
|
}
|
|
|
|
// getNextTwoLines reads the next pair of lines from the LineReader.
|
|
// It returns the content and newline state for both lines.
|
|
//
|
|
// If curLine2 is provided, it's used as line1 (optimization for advancing through file).
|
|
// Otherwise, both lines are read fresh from the reader.
|
|
//
|
|
// Returns:
|
|
// - line1, hadNewline1: First line content and newline state
|
|
// - line2, hadNewline2: Second line content and newline state
|
|
// - err: Error only if we couldn't read line1, or if line2 read failed with non-EOF error
|
|
//
|
|
// Note: If line2 returns (content, false, io.EOF), we treat this as SUCCESS because
|
|
// we got the content. The EOF just means it didn't have a trailing newline.
|
|
func getNextTwoLines(lr *LineReader, curLine2 *lineState) (
|
|
line1 []byte, hadNewline1 bool,
|
|
line2 []byte, hadNewline2 bool,
|
|
err error,
|
|
) {
|
|
if curLine2 == nil {
|
|
// Read both lines fresh
|
|
line1, hadNewline1, err = lr.ReadLine()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
line2, hadNewline2, err = lr.ReadLine()
|
|
// If we got line2 content but hit EOF, that's OK - it just means no trailing newline
|
|
if err == io.EOF && len(line2) > 0 {
|
|
err = nil // Clear the error - we successfully read both lines
|
|
}
|
|
return
|
|
}
|
|
|
|
// Use cached line2 as new line1 (optimization)
|
|
line1 = curLine2.content
|
|
hadNewline1 = curLine2.hadNewline
|
|
|
|
// Read new line2
|
|
line2, hadNewline2, err = lr.ReadLine()
|
|
// If we got line2 content but hit EOF, that's OK - it just means no trailing newline
|
|
if err == io.EOF && len(line2) > 0 {
|
|
err = nil // Clear the error - we successfully read both lines
|
|
}
|
|
return
|
|
}
|
|
|
|
// writeLine writes a line to the writer, optionally adding a newline if hadNewline is true.
|
|
// This helper reduces code duplication in the Redact function.
|
|
func writeLine(w io.Writer, line []byte, hadNewline bool) error {
|
|
if err := writeBytes(w, line); err != nil {
|
|
return err
|
|
}
|
|
if hadNewline {
|
|
if err := writeBytes(w, NEW_LINE); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// writeBytes writes all byte slices to the writer
|
|
// in the order they are passed in the variadic argument
|
|
func writeBytes(w io.Writer, bs ...[]byte) error {
|
|
for _, b := range bs {
|
|
_, err := w.Write(b)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|