diff --git a/gemini/processing.go b/gemini/processing.go index ca26268..15227cb 100644 --- a/gemini/processing.go +++ b/gemini/processing.go @@ -10,25 +10,25 @@ import ( "golang.org/x/text/transform" ) -func EnsureValidUTF8(input []byte) (string, error) { - // Remove NULL byte 0x00 - inputNoNull := bytes.ReplaceAll(input, []byte{0}, nil) +func BytesToValidUTF8(input []byte) (string, error) { + // Remove NULL byte 0x00 (ReplaceAll accepts slices) + inputNoNull := bytes.ReplaceAll(input, []byte{byte(0)}, []byte{}) isValidUTF8 := utf8.Valid(inputNoNull) - if !isValidUTF8 { - encodings := []transform.Transformer{ - charmap.ISO8859_1.NewDecoder(), // First try ISO8859-1 - charmap.Windows1252.NewDecoder(), // Then try Windows-1252, etc - // TODO: Try more encodings? - } - // First successful conversion wins - for _, encoding := range encodings { - reader := transform.NewReader(bytes.NewReader(inputNoNull), encoding) - result, err := io.ReadAll(reader) - if err != nil { - return "", fmt.Errorf("UTF-8 error: %w", err) - } + if isValidUTF8 { + return string(inputNoNull), nil + } + encodings := []transform.Transformer{ + charmap.ISO8859_1.NewDecoder(), // First try ISO8859-1 + charmap.Windows1252.NewDecoder(), // Then try Windows-1252, etc + // TODO: Try more encodings? + } + // First successful conversion wins. + for _, encoding := range encodings { + reader := transform.NewReader(bytes.NewReader(inputNoNull), encoding) + result, err := io.ReadAll(reader) + if err == nil { return string(result), nil } } - return string(inputNoNull), nil + return "", fmt.Errorf("UTF-8 error: %w", err) } diff --git a/gemini/processing_test.go b/gemini/processing_test.go index d47209f..349a5a0 100644 --- a/gemini/processing_test.go +++ b/gemini/processing_test.go @@ -6,7 +6,7 @@ import "testing" func TestEnsureValidUTF8(t *testing.T) { // Create a string with a null byte strWithNull := "Hello" + string('\x00') + "world" - result, _ := EnsureValidUTF8([]byte(strWithNull)) + result, _ := BytesToValidUTF8([]byte(strWithNull)) if result != "Helloworld" { t.Errorf("Expected string without NULL byte, got %s", result) }