Skip to content

Commit a412c83

Browse files
committed
Replace std::deque with std::map in internal data structure
std::deque actually does have invalidation issues (of iterators) when expanding just like std::vector
1 parent 384e753 commit a412c83

3 files changed

Lines changed: 30 additions & 20 deletions

File tree

include/internal/csv_row.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ namespace csv {
5555
throw std::runtime_error("Index out of bounds.");
5656

5757
const size_t field_index = this->fields_start + index;
58-
// Copy field to avoid holding reference during potential deque reallocation
5958
auto field = this->data->fields[field_index];
6059
auto field_str = csv::string_view(this->data->data).substr(this->data_start + field.start);
6160

@@ -98,8 +97,6 @@ namespace csv {
9897
throw std::runtime_error("Index out of bounds.");
9998

10099
const size_t field_index = this->fields_start + index;
101-
102-
// Copy field to avoid holding reference during potential deque reallocation
103100
auto field = _data->fields[field_index];
104101
auto field_str = csv::string_view(_data->data).substr(this->data_start + field.start);
105102

include/internal/raw_csv_data.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,21 @@ namespace csv {
99
CSV_INLINE RawCSVField& CSVFieldList::operator[](size_t n) const {
1010
const size_t page_no = n / _single_buffer_capacity;
1111
const size_t buffer_idx = (page_no < 1) ? n : n % _single_buffer_capacity;
12-
return this->buffers[page_no][buffer_idx];
12+
return this->buffers.at(page_no)[buffer_idx];
1313
}
1414

1515
CSV_INLINE void CSVFieldList::allocate() {
16-
buffers.push_back(std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]));
16+
// If this isn't the first allocation, move to next block
17+
if (!buffers.empty()) {
18+
_current_block++;
19+
}
20+
21+
// std::map provides stable references: insertions never invalidate existing elements.
22+
// Safe for concurrent reads during write without mutex.
23+
buffers[_current_block] = std::unique_ptr<RawCSVField[]>(new RawCSVField[_single_buffer_capacity]);
1724

1825
_current_buffer_size = 0;
19-
_back = buffers.back().get();
26+
_back = buffers[_current_block].get();
2027
}
2128
}
2229
}

include/internal/raw_csv_data.hpp

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*/
99

1010
#pragma once
11-
#include <deque>
11+
#include <map>
1212
#include <memory>
1313
#include <mutex>
1414
#include <unordered_map>
@@ -75,17 +75,19 @@ namespace csv {
7575

7676
this->buffers = std::move(other.buffers);
7777
_current_buffer_size = other._current_buffer_size;
78+
_current_block = other._current_block;
7879

7980
// Recalculate _back pointer to point into OUR buffers, not the moved-from ones
8081
if (!this->buffers.empty()) {
81-
_back = this->buffers.back().get() + _current_buffer_size;
82+
_back = this->buffers[_current_block].get() + _current_buffer_size;
8283
} else {
8384
_back = nullptr;
8485
}
8586

8687
// Invalidate moved-from state to prevent use-after-move bugs
8788
other._back = nullptr;
8889
other._current_buffer_size = 0;
90+
other._current_block = 0;
8991
}
9092

9193
template <class... Args>
@@ -99,33 +101,37 @@ namespace csv {
99101
}
100102

101103
size_t size() const noexcept {
102-
return this->_current_buffer_size + ((this->buffers.size() - 1) * this->_single_buffer_capacity);
104+
return this->_current_buffer_size + (_current_block * this->_single_buffer_capacity);
103105
}
104106

105107
RawCSVField& operator[](size_t n) const;
106108

107109
private:
108110
const size_t _single_buffer_capacity;
109111

110-
/** Deque of pointers to RawCSVField arrays.
112+
/** Map of block indices to RawCSVField arrays.
111113
*
112-
* std::deque is critical for thread safety: unlike std::vector, it never reallocates
113-
* existing elements when expanding. This prevents a race condition where:
114-
* 1. Reading thread accesses a RawCSVField via pointer
115-
* 2. Parsing thread pushes to at-capacity std::vector
116-
* 3. std::vector reallocates → reading thread accesses deallocated memory
114+
* std::map is ideal for thread-safe concurrent access: insertions never invalidate
115+
* references/pointers to existing elements (guaranteed by C++ standard §26.2.6).
116+
* This eliminates the need for mutex locks during concurrent read-during-write.
117117
*
118-
* Using std::deque also improves performance by avoiding reallocation costs.
119-
* Memory locality is preserved because we store pointers to RawCSVField[], not
120-
* the objects themselves.
118+
* Unlike std::deque, whose internal map can reallocate (invalidating ALL operator[]
119+
* calls even for old elements), std::map provides stable element access.
121120
*
122-
* See: Issue #217, PR #237, v2.3.0 (June 2024)
121+
* Performance: O(log n) access where n = blocks per 10MB chunk. Pathological case
122+
* (1-char rows): 10MB / 2 bytes = 5M fields / 170 per block ≈ 29K blocks.
123+
* log₂(29K) ≈ 15 comparisons << mutex contention cost.
124+
*
125+
* See: Issue #217, PR #237, v2.3.0 (June 2024); Sanitizer fixes Feb 2026
123126
*/
124-
std::deque<std::unique_ptr<RawCSVField[]>> buffers = {};
127+
std::map<size_t, std::unique_ptr<RawCSVField[]>> buffers = {};
125128

126129
/** Number of items in the current buffer */
127130
size_t _current_buffer_size = 0;
128131

132+
/** Current block number */
133+
size_t _current_block = 0;
134+
129135
/** Pointer to the current empty field */
130136
RawCSVField* _back = nullptr;
131137

0 commit comments

Comments
 (0)