Skip to content

Commit 23f8999

Browse files
committed
Use std::vector of unique_ptrs to RawCSVFields
Concurrent R/W to std::map is UB
1 parent c82889d commit 23f8999

2 files changed

Lines changed: 45 additions & 38 deletions

File tree

include/internal/raw_csv_data.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,32 @@
44

55
#include "raw_csv_data.hpp"
66

7+
#include <cassert>
8+
79
namespace csv {
810
namespace internals {
911
CSV_INLINE RawCSVField& CSVFieldList::operator[](size_t n) const {
1012
const size_t page_no = n / _single_buffer_capacity;
1113
const size_t buffer_idx = n % _single_buffer_capacity;
12-
return this->buffers.at(page_no)[buffer_idx];
14+
15+
assert(page_no < _block_capacity);
16+
RawCSVField* block = this->_blocks[page_no].load(std::memory_order_acquire);
17+
assert(block != nullptr);
18+
return block[buffer_idx];
1319
}
1420

1521
CSV_INLINE void CSVFieldList::allocate() {
16-
// If this isn't the first allocation, move to next block
17-
if (!buffers.empty()) {
22+
if (_back != nullptr) {
1823
_current_block++;
1924
}
20-
21-
// std::map provides stable references: insertions never invalidate existing elements.
22-
// Safe for concurrent reads during write without mutex.
23-
buffers[_current_block] = std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]);
2425

26+
std::unique_ptr<RawCSVField[]> block(new RawCSVField[_single_buffer_capacity]);
27+
RawCSVField* block_ptr = block.get();
28+
this->_owned_blocks.push_back(std::move(block));
29+
30+
this->_blocks[_current_block].store(block_ptr, std::memory_order_release);
2531
_current_buffer_size = 0;
26-
_back = buffers[_current_block].get();
32+
_back = block_ptr;
2733
}
2834
}
2935
}

include/internal/raw_csv_data.hpp

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
*/
99

1010
#pragma once
11-
#include <map>
11+
#include <atomic>
1212
#include <memory>
1313
#include <mutex>
1414
#include <unordered_map>
1515
#include <string>
16+
#include <vector>
1617

1718
#include "common.hpp"
1819
#include "col_names.hpp"
@@ -63,6 +64,13 @@ namespace csv {
6364
/** Construct a CSVFieldList which allocates blocks of a certain size */
6465
CSVFieldList(size_t single_buffer_capacity = (size_t)(internals::PAGE_SIZE / sizeof(RawCSVField))) :
6566
_single_buffer_capacity(single_buffer_capacity) {
67+
const size_t max_fields = internals::ITERATION_CHUNK_SIZE + 1;
68+
_block_capacity = (max_fields + _single_buffer_capacity - 1) / _single_buffer_capacity;
69+
_blocks = std::unique_ptr<std::atomic<RawCSVField*>[]>(new std::atomic<RawCSVField*>[_block_capacity]);
70+
for (size_t i = 0; i < _block_capacity; i++) {
71+
_blocks[i].store(nullptr, std::memory_order_relaxed);
72+
}
73+
6674
this->allocate();
6775
}
6876

@@ -71,28 +79,27 @@ namespace csv {
7179

7280
// CSVFieldArrays may be moved
7381
CSVFieldList(CSVFieldList&& other) :
74-
_single_buffer_capacity(other._single_buffer_capacity) {
82+
_single_buffer_capacity(other._single_buffer_capacity),
83+
_block_capacity(other._block_capacity) {
7584

76-
this->buffers = std::move(other.buffers);
85+
this->_blocks = std::move(other._blocks);
86+
this->_owned_blocks = std::move(other._owned_blocks);
7787
_current_buffer_size = other._current_buffer_size;
7888
_current_block = other._current_block;
79-
80-
// Recalculate _back pointer to point into OUR buffers, not the moved-from ones
81-
if (!this->buffers.empty()) {
82-
auto it = this->buffers.find(_current_block);
83-
if (it != this->buffers.end() && it->second) {
84-
_back = it->second.get() + _current_buffer_size;
85-
} else {
86-
_back = nullptr;
87-
}
89+
90+
// Recalculate _back pointer to point into OUR blocks, not the moved-from ones
91+
if (this->_blocks) {
92+
RawCSVField* block = this->_blocks[_current_block].load(std::memory_order_acquire);
93+
_back = block ? (block + _current_buffer_size) : nullptr;
8894
} else {
8995
_back = nullptr;
9096
}
91-
97+
9298
// Invalidate moved-from state to prevent use-after-move bugs
9399
other._back = nullptr;
94100
other._current_buffer_size = 0;
95101
other._current_block = 0;
102+
other._block_capacity = 0;
96103
}
97104

98105
template <class... Args>
@@ -114,29 +121,23 @@ namespace csv {
114121
private:
115122
const size_t _single_buffer_capacity;
116123

117-
/** Map of block indices to RawCSVField arrays.
118-
*
119-
* std::map is ideal for thread-safe concurrent access: insertions never invalidate
120-
* references/pointers to existing elements (guaranteed by C++ standard §26.2.6).
121-
* This eliminates the need for mutex locks during concurrent read-during-write.
122-
*
123-
* Unlike std::deque, whose internal map can reallocate (invalidating ALL operator[]
124-
* calls even for old elements), std::map provides stable element access.
125-
*
126-
* Performance: O(log n) access where n = blocks per 10MB chunk. Pathological case
127-
* (1-char rows): 10MB / 2 bytes = 5M fields / 170 per block ≈ 29K blocks.
128-
* log₂(29K) ≈ 15 comparisons << mutex contention cost.
129-
*
130-
* See: Issue #217, PR #237, v2.3.0 (June 2024); Sanitizer fixes Feb 2026
131-
*/
132-
std::map<size_t, std::unique_ptr<RawCSVField[]>> buffers = {};
124+
/** Fixed-size table of block pointers for lock-free read access. */
125+
std::unique_ptr<std::atomic<RawCSVField*>[]> _blocks = nullptr;
126+
127+
/** Owned blocks (writer thread only), used for lifetime management. */
128+
std::vector<std::unique_ptr<RawCSVField[]>> _owned_blocks = {};
129+
// _owned_blocks may reallocate, but RawCSVField[] allocations stay put;
130+
// _blocks holds raw pointers to those allocations, so readers remain valid.
133131

134132
/** Number of items in the current buffer */
135133
size_t _current_buffer_size = 0;
136134

137-
/** Current block number */
135+
/** Current block index */
138136
size_t _current_block = 0;
139137

138+
/** Number of block slots available in _blocks */
139+
size_t _block_capacity = 0;
140+
140141
/** Pointer to the current empty field */
141142
RawCSVField* _back = nullptr;
142143

0 commit comments

Comments
 (0)