Skip to content

Commit f0fc529

Browse files
committed
More clean-up
1 parent eecd87f commit f0fc529

4 files changed

Lines changed: 46 additions & 41 deletions

File tree

include/internal/basic_csv_parser.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -248,11 +248,8 @@ namespace csv {
248248
#ifdef _MSC_VER
249249
#pragma region Specializations
250250
#endif
251+
#if !defined(__EMSCRIPTEN__)
251252
CSV_INLINE void MmapParser::next(size_t bytes = ITERATION_CHUNK_SIZE) {
252-
#if defined(__EMSCRIPTEN__)
253-
(void)bytes;
254-
throw std::runtime_error("MmapParser is not supported on Emscripten; use stream-based parsing.");
255-
#else
256253
// CRITICAL SECTION: Chunk Transition Logic
257254
// This function reads 10MB chunks and must correctly handle fields that span
258255
// chunk boundaries. The 'remainder' calculation below ensures partial fields
@@ -306,8 +303,8 @@ namespace csv {
306303
}
307304

308305
this->mmap_pos -= (length - remainder);
309-
#endif
310306
}
307+
#endif
311308
#ifdef _MSC_VER
312309
#pragma endregion
313310
#endif

include/internal/basic_csv_parser.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ namespace csv {
273273
size_t stream_pos = 0;
274274
};
275275

276+
#if !defined(__EMSCRIPTEN__)
276277
/** Parser for memory-mapped files
277278
*
278279
* @par Implementation
@@ -300,5 +301,6 @@ namespace csv {
300301
std::string _filename;
301302
size_t mmap_pos = 0;
302303
};
304+
#endif
303305
}
304306
}

include/internal/csv_reader.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,16 @@ namespace csv {
165165
*
166166
*/
167167
CSV_INLINE CSVReader::CSVReader(csv::string_view filename, CSVFormat format) : _format(format) {
168-
auto head = internals::get_csv_head(filename);
169168
#if defined(__EMSCRIPTEN__)
170-
using Parser = internals::StreamParser<std::ifstream>;
169+
this->owned_file_stream = std::unique_ptr<std::ifstream>(new std::ifstream(std::string(filename), std::ios::binary));
170+
if (!this->owned_file_stream->is_open()) {
171+
throw std::runtime_error("Cannot open file " + std::string(filename));
172+
}
173+
174+
this->init_from_stream(*this->owned_file_stream, format);
171175
#else
176+
auto head = internals::get_csv_head(filename);
172177
using Parser = internals::MmapParser;
173-
#endif
174178
// Apply chunk size from format before any reading occurs
175179
this->_chunk_size = format.get_chunk_size();
176180
/** Guess delimiter and header row */
@@ -190,16 +194,9 @@ namespace csv {
190194
if (!format.col_names.empty())
191195
this->set_col_names(format.col_names);
192196

193-
#if defined(__EMSCRIPTEN__)
194-
this->owned_file_stream = std::unique_ptr<std::ifstream>(new std::ifstream(std::string(filename), std::ios::binary));
195-
if (!this->owned_file_stream->is_open()) {
196-
throw std::runtime_error("Cannot open file " + std::string(filename));
197-
}
198-
this->parser = std::unique_ptr<Parser>(new Parser(*this->owned_file_stream, format, this->col_names));
199-
#else
200197
this->parser = std::unique_ptr<Parser>(new Parser(filename, format, this->col_names)); // For C++11
201-
#endif
202198
this->initial_read();
199+
#endif
203200
}
204201

205202
/** Return the format of the original raw CSV */

include/internal/csv_reader.hpp

Lines changed: 34 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -183,30 +183,7 @@ namespace csv {
183183
template<typename TStream,
184184
csv::enable_if_t<std::is_base_of<std::istream, TStream>::value, int> = 0>
185185
CSVReader(TStream &source, CSVFormat format = CSVFormat::guess_csv()) : _format(format) {
186-
auto head = internals::get_csv_head(source);
187-
using Parser = internals::StreamParser<TStream>;
188-
189-
// Apply chunk size from format before any reading occurs
190-
this->_chunk_size = format.get_chunk_size();
191-
192-
if (format.guess_delim()) {
193-
auto guess_result = internals::_guess_format(head, format.possible_delimiters);
194-
format.delimiter(guess_result.delim);
195-
// Only override header if user hasn't explicitly called no_header()
196-
// Note: column_names() also sets header=-1, but it populates col_names,
197-
// so we can distinguish: no_header() means header=-1 && col_names.empty()
198-
if (format.header != -1 || !format.col_names.empty()) {
199-
format.header = guess_result.header_row;
200-
}
201-
this->_format = format;
202-
}
203-
204-
if (!format.col_names.empty())
205-
this->set_col_names(format.col_names);
206-
207-
this->parser = std::unique_ptr<Parser>(
208-
new Parser(source, format, col_names)); // For C++11
209-
this->initial_read();
186+
this->init_from_stream(source, format);
210187
}
211188
///@}
212189

@@ -283,8 +260,10 @@ namespace csv {
283260
/** Queue of parsed CSV rows */
284261
std::unique_ptr<RowCollection> records{new RowCollection(100)};
285262

286-
/** Owned file stream used for stream-based filename parsing on targets without mmap. */
263+
#if defined(__EMSCRIPTEN__)
264+
/** Owned file stream used by filename constructor fallback to stream parsing. */
287265
std::unique_ptr<std::ifstream> owned_file_stream = nullptr;
266+
#endif
288267

289268
size_t n_cols = 0; /**< The number of columns in this CSV */
290269
size_t _n_rows = 0; /**< How many rows (minus header) have been read so far */
@@ -337,6 +316,36 @@ namespace csv {
337316
}
338317
}
339318

319+
template<typename TStream,
320+
csv::enable_if_t<std::is_base_of<std::istream, TStream>::value, int> = 0>
321+
void init_from_stream(TStream& source, CSVFormat format) {
322+
auto head = internals::get_csv_head(source);
323+
using Parser = internals::StreamParser<TStream>;
324+
325+
// Apply chunk size from format before any reading occurs
326+
this->_chunk_size = format.get_chunk_size();
327+
328+
if (format.guess_delim()) {
329+
auto guess_result = internals::_guess_format(head, format.possible_delimiters);
330+
format.delimiter(guess_result.delim);
331+
// Only override header if user hasn't explicitly called no_header()
332+
// Note: column_names() also sets header=-1, but it populates col_names,
333+
// so we can distinguish: no_header() means header=-1 && col_names.empty()
334+
if (format.header != -1 || !format.col_names.empty()) {
335+
format.header = guess_result.header_row;
336+
}
337+
this->_format = format;
338+
}
339+
340+
if (!format.col_names.empty()) {
341+
this->set_col_names(format.col_names);
342+
}
343+
344+
this->parser = std::unique_ptr<Parser>(
345+
new Parser(source, format, col_names)); // For C++11
346+
this->initial_read();
347+
}
348+
340349
/** Read initial chunk to get metadata */
341350
void initial_read() {
342351
#if CSV_ENABLE_THREADS

0 commit comments

Comments
 (0)