apache · garydgregory · Jun 25, 2026 · Jun 25, 2026 · Jun 25, 2026
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -277,15 +277,22 @@ Token nextToken(final Token token) throws IOException {
         }
         // Important: make sure a new char gets consumed in each iteration
         while (token.type == Token.Type.INVALID) {
+            // isDelimiter consumes the trailing characters of a multi-character delimiter as a side effect, so it must
+            // only be evaluated once per character. Remember a match found while skipping whitespace below.
+            boolean delimiter = false;
             // ignore whitespaces at beginning of a token
             if (ignoreSurroundingSpaces) {
-                while (Character.isWhitespace((char) c) && !isDelimiter(c) && !eol) {
+                while (Character.isWhitespace((char) c) && !eol) {
+                    if (isDelimiter(c)) {
+                        delimiter = true;
+                        break;
+                    }
                     c = reader.read();
                     eol = readEndOfLine(c);
                 }
             }
             // ok, start of token reached: encapsulated, or token
-            if (isDelimiter(c)) {
+            if (delimiter || isDelimiter(c)) {
                 // empty token return TOKEN("")
                 token.type = Token.Type.TOKEN;
             } else if (eol) {

diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -1758,6 +1758,26 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
         }
     }
 
+    /**
+     * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+     * the empty field at the delimiter boundary must survive. The delimiter look-ahead is consumed while skipping
+     * leading whitespace, so re-evaluating it would drop the empty field and merge the following field's value.
+     */
+    @Test
+    void testEmptyFieldBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+        final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+        try (CSVParser parser = CSVParser.parse(" |a", format)) {
+            final List<CSVRecord> records = parser.getRecords();
+            assertEquals(1, records.size());
+            assertValuesEquals(new String[] { "", "a" }, records.get(0));
+        }
+        try (CSVParser parser = CSVParser.parse("a | |b", format)) {
+            final List<CSVRecord> records = parser.getRecords();
+            assertEquals(1, records.size());
+            assertValuesEquals(new String[] { "a", "", "b" }, records.get(0));
+        }
+    }
+
     @Test
     void testProvidedHeader() throws Exception {
         final Reader in = new StringReader("a,b,c\n1,2,3\nx,y,z");

diff --git a/src/test/java/org/apache/commons/csv/LexerTest.java b/src/test/java/org/apache/commons/csv/LexerTest.java
@@ -447,6 +447,25 @@ void testPartialMultiCharacterDelimiterAtEOFAfterMismatch() throws IOException {
         }
     }
 
+    /**
+     * With {@code ignoreSurroundingSpaces} enabled and a multi-character delimiter whose first character is whitespace,
+     * the side-effecting {@link Lexer#isDelimiter(int)} must only be evaluated once per character, otherwise the
+     * delimiter is consumed in the whitespace-skip loop and the empty field at the boundary is dropped.
+     */
+    @Test
+    void testEmptyTokenBeforeWhitespacePrefixedMultiCharacterDelimiter() throws IOException {
+        final CSVFormat format = CSVFormat.DEFAULT.builder().setDelimiter(" |").setIgnoreSurroundingSpaces(true).get();
+        try (Lexer lexer = createLexer(" |a", format)) {
+            assertNextToken(TOKEN, "", lexer);
+            assertNextToken(EOF, "a", lexer);
+        }
+        try (Lexer lexer = createLexer("a | |b", format)) {
+            assertNextToken(TOKEN, "a", lexer);
+            assertNextToken(TOKEN, "", lexer);
+            assertNextToken(EOF, "b", lexer);
+        }
+    }
+
     @Test
     void testReadEscapeBackspace() throws IOException {
         try (Lexer lexer = createLexer("b", CSVFormat.DEFAULT.withEscape('\b'))) {