Skip to content

Commit 6604cac

Browse files
committed
Made CSVField::try_parse_hex a template
Allows for using long long for better overflow safety without breaking int usages
1 parent 3f02253 commit 6604cac

5 files changed

Lines changed: 146 additions & 68 deletions

File tree

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,11 +231,17 @@ for (auto& row: reader) {
231231
row["timestamp"].get<int>();
232232
233233
// You can also attempt to parse hex values
234-
int value;
234+
long long value;
235235
if (row["hexValue"].try_parse_hex(value)) {
236236
std::cout << "Hex value is " << value << std::endl;
237237
}
238238
239+
// Or specify a different integer type
240+
int smallValue;
241+
if (row["smallHex"].try_parse_hex<int>(smallValue)) {
242+
std::cout << "Small hex value is " << smallValue << std::endl;
243+
}
244+
239245
// Non-imperial decimal numbers can be handled this way
240246
long double decimalValue;
241247
if (row["decimalNumber"].try_parse_decimal(decimalValue, ',')) {

include/internal/csv_row.cpp

Lines changed: 39 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -89,70 +89,46 @@ namespace csv {
8989
return field_str.substr(0, field.length);
9090
}
9191

92-
CSV_INLINE bool CSVField::try_parse_hex(int& parsedValue) {
93-
size_t start = 0, end = 0;
94-
95-
// Trim out whitespace chars
96-
for (; start < this->sv.size() && this->sv[start] == ' '; start++);
97-
for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++);
98-
99-
int value_ = 0;
100-
101-
size_t digits = (end - start);
102-
size_t base16_exponent = digits - 1;
103-
104-
if (digits == 0) return false;
105-
106-
for (const auto& ch : this->sv.substr(start, digits)) {
107-
int digit = 0;
108-
109-
switch (ch) {
110-
case '0':
111-
case '1':
112-
case '2':
113-
case '3':
114-
case '4':
115-
case '5':
116-
case '6':
117-
case '7':
118-
case '8':
119-
case '9':
120-
digit = static_cast<int>(ch - '0');
121-
break;
122-
case 'a':
123-
case 'A':
124-
digit = 10;
125-
break;
126-
case 'b':
127-
case 'B':
128-
digit = 11;
129-
break;
130-
case 'c':
131-
case 'C':
132-
digit = 12;
133-
break;
134-
case 'd':
135-
case 'D':
136-
digit = 13;
137-
break;
138-
case 'e':
139-
case 'E':
140-
digit = 14;
141-
break;
142-
case 'f':
143-
case 'F':
144-
digit = 15;
145-
break;
146-
default:
147-
return false;
92+
CSV_INLINE csv::string_view CSVRow::get_field_safe(size_t index, internals::RawCSVDataPtr _data) const
93+
{
94+
using internals::ParseFlags;
95+
96+
if (index >= this->size())
97+
throw std::runtime_error("Index out of bounds.");
98+
99+
const size_t field_index = this->fields_start + index;
100+
auto& field = _data->fields[field_index];
101+
auto field_str = csv::string_view(_data->data).substr(this->data_start + field.start);
102+
103+
if (field.has_double_quote) {
104+
auto& value = _data->double_quote_fields[field_index];
105+
// Double-check locking: minimize lock contention by checking before acquiring lock
106+
if (value.empty()) {
107+
std::lock_guard<std::mutex> lock(_data->double_quote_init_lock);
108+
109+
// Check again after acquiring lock in case another thread initialized it
110+
if (value.empty()) {
111+
bool prev_ch_quote = false;
112+
for (size_t i = 0; i < field.length; i++) {
113+
if (_data->parse_flags[field_str[i] + CHAR_OFFSET] == ParseFlags::QUOTE) {
114+
if (prev_ch_quote) {
115+
prev_ch_quote = false;
116+
continue;
117+
}
118+
else {
119+
prev_ch_quote = true;
120+
}
121+
}
122+
123+
value += field_str[i];
124+
}
125+
}
148126
}
149127

150-
value_ += digit * (int)pow(16, (double)base16_exponent);
151-
base16_exponent--;
128+
return csv::string_view(value);
152129
}
153130

154-
parsedValue = value_;
155-
return true;
131+
return field_str.substr(0, field.length);
156132
}
157133

158134
CSV_INLINE bool CSVField::try_parse_decimal(long double& dVal, const char decimalSymbol) {
@@ -206,7 +182,7 @@ namespace csv {
206182
: daddy(_reader), data(_reader->data), i(_i) {
207183
if (_i < (int)this->daddy->size())
208184
this->field = std::make_shared<CSVField>(
209-
this->daddy->operator[](_i));
185+
CSVField(this->daddy->get_field_safe(_i, this->data)));
210186
else
211187
this->field = nullptr;
212188
}
@@ -224,7 +200,7 @@ namespace csv {
224200
this->i++;
225201
if (this->i < (int)this->daddy->size())
226202
this->field = std::make_shared<CSVField>(
227-
this->daddy->operator[](i));
203+
CSVField(this->daddy->get_field_safe(i, this->data)));
228204
else // Reached the end of row
229205
this->field = nullptr;
230206
return *this;
@@ -241,7 +217,7 @@ namespace csv {
241217
// Pre-decrement operator
242218
this->i--;
243219
this->field = std::make_shared<CSVField>(
244-
this->daddy->operator[](this->i));
220+
CSVField(this->daddy->get_field_safe(this->i, this->data)));
245221
return *this;
246222
}
247223

include/internal/csv_row.hpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "common.hpp"
1616
#include "data_type.hpp"
17+
#include "parse_hex.hpp"
1718
#include "raw_csv_data.hpp"
1819

1920
namespace csv {
@@ -108,8 +109,15 @@ namespace csv {
108109
return static_cast<T>(this->value);
109110
}
110111

111-
/** Parse a hexadecimal value, returning false if the value is not hex. */
112-
bool try_parse_hex(int& parsedValue);
112+
/** Parse a hexadecimal value, returning false if the value is not hex.
113+
* @tparam T An integral type (int, long, long long, etc.)
114+
*/
115+
template<typename T = long long>
116+
bool try_parse_hex(T& parsedValue) {
117+
static_assert(std::is_integral<T>::value,
118+
"try_parse_hex only works with integral types (int, long, long long, etc.)");
119+
return internals::try_parse_hex(this->sv, parsedValue);
120+
}
113121

114122
/** Attempts to parse a decimal (or integer) value using the given symbol,
115123
* returning `true` if the value is numeric.
@@ -290,6 +298,11 @@ namespace csv {
290298
/** Retrieve a string view corresponding to the specified index */
291299
csv::string_view get_field(size_t index) const;
292300

301+
/** Iterator-safe field access using explicit data pointer
302+
* (prevents accessing freed data when CSVRow is reassigned)
303+
*/
304+
csv::string_view get_field_safe(size_t index, internals::RawCSVDataPtr _data) const;
305+
293306
internals::RawCSVDataPtr data;
294307

295308
/** Where in RawCSVData.data we start */

include/internal/parse_hex.hpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/** @file
2+
* @brief Implements Functions related to hexadecimal parsing
3+
*/
4+
5+
#pragma once
6+
#include <type_traits>
7+
#include <cmath>
8+
9+
#include "common.hpp"
10+
11+
namespace csv {
12+
namespace internals {
13+
template<typename T>
14+
bool try_parse_hex(csv::string_view sv, T& parsedValue) {
15+
static_assert(std::is_integral<T>::value,
16+
"try_parse_hex only works with integral types (int, long, long long, etc.)");
17+
18+
size_t start = 0, end = 0;
19+
20+
// Trim out whitespace chars
21+
for (; start < sv.size() && sv[start] == ' '; start++);
22+
for (end = start; end < sv.size() && sv[end] != ' '; end++);
23+
24+
T value_ = 0;
25+
26+
size_t digits = (end - start);
27+
size_t base16_exponent = digits - 1;
28+
29+
if (digits == 0) return false;
30+
31+
for (const auto& ch : sv.substr(start, digits)) {
32+
int digit = 0;
33+
34+
switch (ch) {
35+
case '0':
36+
case '1':
37+
case '2':
38+
case '3':
39+
case '4':
40+
case '5':
41+
case '6':
42+
case '7':
43+
case '8':
44+
case '9':
45+
digit = static_cast<int>(ch - '0');
46+
break;
47+
case 'a':
48+
case 'A':
49+
digit = 10;
50+
break;
51+
case 'b':
52+
case 'B':
53+
digit = 11;
54+
break;
55+
case 'c':
56+
case 'C':
57+
digit = 12;
58+
break;
59+
case 'd':
60+
case 'D':
61+
digit = 13;
62+
break;
63+
case 'e':
64+
case 'E':
65+
digit = 14;
66+
break;
67+
case 'f':
68+
case 'F':
69+
digit = 15;
70+
break;
71+
default:
72+
return false;
73+
}
74+
75+
value_ += digit * (T)pow(16, (double)base16_exponent);
76+
base16_exponent--;
77+
}
78+
79+
parsedValue = value_;
80+
return true;
81+
}
82+
}
83+
}

tests/test_csv_field.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ TEST_CASE("CSVField get<>() - Floating Point Value", "[test_csv_field_get_float]
107107
}
108108

109109
TEST_CASE("CSVField try_parse_hex()", "[test_csv_field_parse_hex]") {
110-
int value = 0;
110+
long long value = 0;
111111

112112
SECTION("Valid Hex Values") {
113-
std::unordered_map<std::string, int> test_cases = {
113+
std::unordered_map<std::string, long long> test_cases = {
114114
{" A ", 10},
115115
{"0A", 10},
116116
{"0B", 11},

0 commit comments

Comments
 (0)