vincentlaucsb
diff --git a/‎.claude/rules/testing-conventions.md‎
Lines changed: 26 additions & 0 deletions b/‎.claude/rules/testing-conventions.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎.github/workflows/coverage.yml‎
Lines changed: 71 additions & 0 deletions b/‎.github/workflows/coverage.yml‎
Lines changed: 71 additions & 0 deletions
diff --git a/‎.github/workflows/sanitizers.yml‎
Lines changed: 37 additions & 0 deletions b/‎.github/workflows/sanitizers.yml‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 3 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 7 additions & 5 deletions b/‎README.md‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎codecov.yml‎
Lines changed: 12 additions & 4 deletions b/‎codecov.yml‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎include/internal/csv_format.cpp‎
Lines changed: 12 additions & 0 deletions b/‎include/internal/csv_format.cpp‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎include/internal/csv_format.hpp‎
Lines changed: 15 additions & 0 deletions b/‎include/internal/csv_format.hpp‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎include/internal/csv_reader.cpp‎
Lines changed: 9 additions & 4 deletions b/‎include/internal/csv_reader.cpp‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎include/internal/csv_reader.hpp‎
Lines changed: 3 additions & 25 deletions b/‎include/internal/csv_reader.hpp‎
Lines changed: 3 additions & 25 deletions
@@ -49,6 +49,32 @@ Or exclude from default runs:
 TEST_CASE("Description", "[.][bug]") { ... }  // Skip by default
 ```
 
+## Rule: Edge Case and Regression Tests Go at the End of the File
+
+Test files are organized with mainline functionality tests first. Edge cases, regression
+tests for specific issues, and boundary condition tests must be placed **at the end of
+the file**, after all general feature tests.
+
+❌ **WRONG - regression test inserted at the top or middle:**
+```cpp
+// At line 14, before any feature tests:
+TEST_CASE("Regression #149 - trailing newline", ...) { ... }
+
+TEST_CASE("Test Parse Flags", ...) { ... }  // General test displaced
+```
+
+✅ **RIGHT - regression test at the bottom:**
+```cpp
+TEST_CASE("Test Parse Flags", ...) { ... }       // General tests first
+TEST_CASE("Read CSV from string", ...) { ... }   // ...
+
+// --- Edge cases and regression tests ---
+TEST_CASE("Regression #149 - trailing newline", ...) { ... }
+```
+
+This keeps the file readable: a maintainer skimming the top sees the feature coverage;
+scrolling to the bottom reveals all known edge cases in one place.
+
 ## Test Pattern for Known Bugs
 
 ```cpp
 
@@ -0,0 +1,71 @@
+name: Code Coverage
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+
+permissions:
+  contents: read
+  pull-requests: write
+
+jobs:
+  coverage:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository and submodules
+      uses: actions/checkout@v4
+      with:
+        submodules: recursive
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y lcov
+
+    - name: Configure CMake with coverage instrumentation
+      run: >
+        cmake -B build
+        -DCMAKE_BUILD_TYPE=Debug
+        -DENABLE_CODE_COVERAGE=ON
+        -DCMAKE_CXX_COMPILER=g++
+        -DCMAKE_C_COMPILER=gcc
+        -DCSV_CXX_STANDARD=17
+        -S ${{ github.workspace }}
+
+    - name: Build
+      run: cmake --build build --config Debug
+
+    - name: Run tests
+      working-directory: build
+      run: ctest --build-config Debug --output-on-failure
+
+    - name: Capture coverage data
+      run: |
+        lcov --capture \
+          --directory build \
+          --output-file coverage.info \
+          --ignore-errors mismatch,source,empty,unused,negative \
+          --rc lcov_branch_coverage=1
+        # Strip system headers, external deps, Catch2, and test files
+        lcov --remove coverage.info \
+          '/usr/*' \
+          '*/external/*' \
+          '*/catch2/*' \
+          '*/Catch2/*' \
+          '*/tests/*' \
+          --output-file coverage.info \
+          --ignore-errors empty,unused \
+          --rc lcov_branch_coverage=1
+        lcov --list coverage.info --ignore-errors empty,unused --rc lcov_branch_coverage=1
+
+    - name: Upload to Codecov
+      uses: codecov/codecov-action@v4
+      with:
+        files: coverage.info
+        fail_ci_if_error: true
+        token: ${{ secrets.CODECOV_TOKEN }}
+        override_commit: ${{ github.event.pull_request.head.sha || github.sha }}
+        override_pr: ${{ github.event.pull_request.number }}
@@ -109,3 +109,40 @@ jobs:
       with:
         name: valgrind-results
         path: build/Testing/
+
+  # Reproduces the exact build profile reported in issue #293:
+  # -O3 combined with --coverage exposes memory ordering issues invisible to plain -O3
+  o3-coverage:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository and submodules
+      uses: actions/checkout@v4
+      with:
+        submodules: recursive
+
+    - name: Install dependencies
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y build-essential cmake lcov
+
+    - name: Configure CMake
+      run: |
+        mkdir -p build
+        cd build
+        cmake .. \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DCSV_CXX_STANDARD=17 \
+          -DCMAKE_CXX_COMPILER=g++ \
+          -DCMAKE_C_COMPILER=gcc \
+          -DCMAKE_CXX_FLAGS="-O3 --coverage" \
+          -DCMAKE_C_FLAGS="-O3 --coverage" \
+          -DCMAKE_EXE_LINKER_FLAGS="--coverage"
+
+    - name: Build
+      run: cmake --build build
+
+    - name: Test with -O3 --coverage
+      working-directory: build
+      run: ctest --output-on-failure
+      timeout-minutes: 20
@@ -34,7 +34,9 @@ else()
 
 	if(ENABLE_CODE_COVERAGE)
 		message("Code coverage instrumentation enabled")
-		set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} --coverage -Og")
+		# -fprofile-update=atomic prevents negative counter corruption when
+		# background reader threads write to the same .gcda file concurrently.
+		set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} --coverage -Og -fprofile-update=atomic")
 	endif()
 endif(MSVC)
 
 
@@ -52,11 +52,12 @@ On my computer (12th Gen Intel(R) Core(TM) i5-12400 @ 2.50 GHz/Western Digital B
 
 By default, the parser reads CSV data in 10MB chunks. This balance was determined through empirical testing to optimize throughput while minimizing memory overhead and thread synchronization costs.
 
-If you encounter rows larger than the chunk size, use `set_chunk_size()` to adjust:
+If you encounter rows larger than the chunk size, pass a custom `CSVFormat` with `chunk_size()`:
 
 ```cpp
-CSVReader reader("massive_rows.csv");
-reader.set_chunk_size(100 * 1024 * 1024);  // 100MB chunks
+CSVFormat fmt;
+fmt.chunk_size(100 * 1024 * 1024);  // 100MB chunks
+CSVReader reader("massive_rows.csv", fmt);
 for (auto& row : reader) {
     // Process row
 }
@@ -452,8 +453,9 @@ DataFrame<int> df2(reader, "employee_id");
 // O(1) lookups by key
 auto salary = df[12345]["salary"].get<double>();
 
-// Access by position also works
-auto first_row = df[0];
+// Positional access: operator[](size_t) is disabled when KeyType is an integer
+// type to prevent ambiguity with operator[](const KeyType&). Use iloc() instead.
+auto first_row = df.iloc(0);
 auto name = first_row["name"].get<std::string>();
 
 // Check if a key exists
 
@@ -1,8 +1,16 @@
 ignore:
     - "include/external"
+    - "programs"
+    - "single_include"
+    - "single_include_test"
     - "tests"
 coverage:
-	status:
-		project:
-			default:
-				target: 95%
+    status:
+        project:
+            default:
+                target: auto
+                informational: true
+        patch:
+            default:
+                target: auto
+                informational: true
@@ -47,6 +47,18 @@ namespace csv {
         return *this;
     }
 
+    CSV_INLINE CSVFormat& CSVFormat::chunk_size(size_t size) {
+        if (size < internals::ITERATION_CHUNK_SIZE) {
+            throw std::invalid_argument(
+                "Chunk size must be at least " +
+                std::to_string(internals::ITERATION_CHUNK_SIZE) +
+                " bytes (10MB). Provided: " + std::to_string(size)
+            );
+        }
+        this->_chunk_size = size;
+        return *this;
+    }
+
     CSV_INLINE void CSVFormat::assert_no_char_overlap()
     {
         auto delims = std::set<char>(
 
@@ -104,6 +104,17 @@ namespace csv {
             return *this;
         }
 
+        /** Sets the chunk size used when reading the CSV
+         *
+         *  @param[in] size Chunk size in bytes (minimum: 10MB = ITERATION_CHUNK_SIZE)
+         *  @throws std::invalid_argument if size < ITERATION_CHUNK_SIZE
+         *
+         *  Use this when constructing a CSVReader from a filename and individual rows
+         *  may exceed the default 10MB chunk size. The value is passed to CSVReader at
+         *  construction time, before any data is read.
+         */
+        CSVFormat& chunk_size(size_t size);
+
         #ifndef DOXYGEN_SHOULD_SKIP_THIS
         char get_delim() const {
             // This error should never be received by end users.
@@ -120,6 +131,7 @@ namespace csv {
         std::vector<char> get_possible_delims() const { return this->possible_delimiters; }
         std::vector<char> get_trim_chars() const { return this->trim_chars; }
         CONSTEXPR VariableColumnPolicy get_variable_column_policy() const { return this->variable_column_policy; }
+        CONSTEXPR size_t get_chunk_size() const { return this->_chunk_size; }
         #endif
 
         /** CSVFormat for guessing the delimiter */
@@ -163,5 +175,8 @@ namespace csv {
 
         /**< Allow variable length columns? */
         VariableColumnPolicy variable_column_policy = VariableColumnPolicy::IGNORE_ROW;
+
+        /**< Chunk size for reading; passed to CSVReader at construction time */
+        size_t _chunk_size = internals::ITERATION_CHUNK_SIZE;
     };
 }
@@ -167,7 +167,8 @@ namespace csv {
 	CSV_INLINE CSVReader::CSVReader(csv::string_view filename, CSVFormat format) : _format(format) {
         auto head = internals::get_csv_head(filename);
         using Parser = internals::MmapParser;
-
+        // Apply chunk size from format before any reading occurs
+        this->_chunk_size = format.get_chunk_size();
         /** Guess delimiter and header row */
         if (format.guess_delim()) {
             auto guess_result = internals::_guess_format(head, format.possible_delimiters);
@@ -326,14 +327,18 @@ namespace csv {
                     // End of file and no more records
                     return false;
 
-                // Detect infinite loop: A previous read was requested but records are still empty
-                // This typically means a single row is larger than the chunk size
+                // Detect infinite loop: a previous read was requested but records are still empty.
+                // This fires when a single row spans more than 2 × _chunk_size bytes:
+                //   - chunk N   fills without finding '\n'  → _read_requested set to true
+                //   - chunk N+1 also fills without '\n'     → guard fires here
+                // Default _chunk_size is ITERATION_CHUNK_SIZE (10 MB), so the threshold is
+                // rows > 20 MB.  Use CSVFormat::chunk_size() to raise the limit.
                 if (this->_read_requested && this->records->empty()) {
                     throw std::runtime_error(
                         "End of file not reached and no more records parsed. "
                         "This likely indicates a CSV row larger than the chunk size of " +
                         std::to_string(this->_chunk_size) + " bytes. "
-                        "Use set_chunk_size() to increase the chunk size."
+                        "Use CSVFormat::chunk_size() to increase the chunk size."
                     );
                 }
 
 
@@ -184,6 +184,9 @@ namespace csv {
             auto head = internals::get_csv_head(source);
             using Parser = internals::StreamParser<TStream>;
 
+            // Apply chunk size from format before any reading occurs
+            this->_chunk_size = format.get_chunk_size();
+
             if (format.guess_delim()) {
                 auto guess_result = internals::_guess_format(head, format.possible_delimiters);
                 format.delimiter(guess_result.delim);
@@ -260,31 +263,6 @@ namespace csv {
         /** Sets this reader's column names and associated data */
         void set_col_names(const std::vector<std::string>&);
 
-        /** @brief Set the size of chunks to read from the CSV in bytes
-         *
-         *  @param[in] size Chunk size in bytes (minimum: 10MB, default: 10MB)
-         *  @throws std::invalid_argument if size < 10MB (ITERATION_CHUNK_SIZE)
-         *
-         *  Use this to handle CSV files where a single row exceeds the default 10MB chunk size.
-         *  Larger chunks use more memory but allow parsing of larger individual rows.
-         *
-         *  Example:
-         *  @snippet tests/test_edge_cases_large_rows.cpp Set Chunk Size Example
-         *
-         *  @note Chunk size must be at least ITERATION_CHUNK_SIZE (10MB) to avoid
-         *  architectural constraints and ensure reliable parsing behavior.
-         */
-        void set_chunk_size(size_t size) {
-            if (size < internals::ITERATION_CHUNK_SIZE) {
-                throw std::invalid_argument(
-                    "Chunk size must be at least " +
-                    std::to_string(internals::ITERATION_CHUNK_SIZE) +
-                    " bytes (10MB). Provided: " + std::to_string(size)
-                );
-            }
-            this->_chunk_size = size;
-        }
-
         /** @name CSV Settings **/
         ///@{
         CSVFormat _format;