Skip to content

Commit bb3f5a3

Browse files
Single Threaded + Emscripten Support (#301)
* Rough draft of Emscripten and single threaded support * Use different classes for CSVRow deques * Disable threads automatically for Emscripten * Update basic_csv_parser.hpp Fix Emscripten failure * Update CMakeLists.txt * Update csv_row.hpp Reduce duplication * More clean-up * Auto-disable threads if not detected * Fixed version detection macro * Final polish * Run emscripten tests * Very minor BasicCSVParser constructor fix * Documentation updates * Update README.md * Fix CI failures * Disable file-reading tests for Emscripten suite * Minor issues * Fix test failures * Added more file guards * Added more guards * Update test_edge_cases_large_rows.cpp * Disable Emscripten tests for now
1 parent 7577473 commit bb3f5a3

32 files changed

Lines changed: 571 additions & 92 deletions

.github/workflows/cmake-multi-platform.yml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,5 +95,76 @@ jobs:
9595
# See https://cmake.org/cmake/help/latest/manual/ctest.1.html for more detail
9696
run: ctest --build-config ${{ matrix.build_type }}
9797

98+
single-threaded:
99+
name: Single-threaded build (CSV_ENABLE_THREADS=OFF)
100+
runs-on: ubuntu-latest
101+
102+
steps:
103+
- name: Checkout repository and submodules
104+
uses: actions/checkout@v5
105+
with:
106+
submodules: recursive
107+
108+
- name: Configure CMake (single-threaded)
109+
run: >
110+
cmake -B ${{ github.workspace }}/build-single-thread
111+
-DCSV_ENABLE_THREADS=OFF
112+
-DCSV_CXX_STANDARD=20
113+
-DCMAKE_BUILD_TYPE=Release
114+
-S ${{ github.workspace }}
115+
116+
- name: Build (single-threaded)
117+
run: cmake --build ${{ github.workspace }}/build-single-thread --config Release
118+
119+
- name: Test (single-threaded)
120+
working-directory: ${{ github.workspace }}/build-single-thread
121+
run: ctest --build-config Release --output-on-failure
122+
123+
single-threaded-emscripten:
124+
name: Single-threaded Emscripten build (CSV_ENABLE_THREADS=OFF)
125+
runs-on: ubuntu-latest
126+
env:
127+
# TODO: Set to '1' to re-enable wasm runtime tests once a lightweight
128+
# Emscripten smoke subset is in place and stable in CI.
129+
CSV_EMSCRIPTEN_RUN_TESTS: '0'
130+
131+
steps:
132+
- name: Checkout repository and submodules
133+
uses: actions/checkout@v5
134+
with:
135+
submodules: recursive
136+
137+
- name: Set up Python
138+
uses: actions/setup-python@v6
139+
with:
140+
python-version: '3.x'
141+
142+
- name: Set up Emscripten
143+
uses: mymindstorm/setup-emsdk@667eb33f24e84e7f362c16d8d7fff0629a73e15e # v14
144+
with:
145+
version: latest
146+
147+
- name: Configure CMake (single-threaded emscripten)
148+
run: >
149+
emcmake cmake -B ${{ github.workspace }}/build-single-thread-emscripten
150+
-DCSV_ENABLE_THREADS=OFF
151+
-DCSV_CXX_STANDARD=20
152+
-DCSV_BUILD_SINGLE_INCLUDE_TEST=ON
153+
-DCMAKE_CROSSCOMPILING_EMULATOR=node
154+
-DCMAKE_BUILD_TYPE=Release
155+
-S ${{ github.workspace }}
156+
157+
- name: Build (single-threaded emscripten)
158+
run: cmake --build ${{ github.workspace }}/build-single-thread-emscripten --config Release
159+
160+
- name: Test (single-threaded emscripten)
161+
if: env.CSV_EMSCRIPTEN_RUN_TESTS == '1'
162+
working-directory: ${{ github.workspace }}/build-single-thread-emscripten
163+
run: ctest --build-config Release --output-on-failure
164+
165+
- name: Skip tests (single-threaded emscripten)
166+
if: env.CSV_EMSCRIPTEN_RUN_TESTS != '1'
167+
run: echo "Skipping Emscripten ctest (build-only coverage). Set CSV_EMSCRIPTEN_RUN_TESTS=1 to re-enable."
168+
98169

99170

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,5 +73,6 @@ ThreadSafeDeque<CSVRow>
7373
3. **Don't use uniform values:** Each column needs distinct values to detect corruption.
7474
4. **Don't ignore async:** Worker thread means exceptions must use `exception_ptr`.
7575
5. **Don't change one constructor:** Likely affects both mmap and stream paths.
76+
6. **Don't delete or simplify comments** unless they are trivially obvious (e.g. `// increment i`) or factually incorrect. Comments in this codebase frequently encode concurrency invariants, non-obvious design decisions, and hard-won bug context that cannot be recovered from the code alone.
7677
7778
See `tests/AGENTS.md` for test strategy, checklist, and conventions.

CLAUDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
- Use distinct column values to detect field corruption
3333
- Exceptions from worker thread need `exception_ptr`
3434
- Changes to one constructor likely affect both paths
35+
- **Do not delete or simplify comments** unless trivially obvious or factually wrong — comments encode concurrency invariants and bug history
3536

3637
## Tests
3738
See `tests/AGENTS.md` for full test strategy, checklist, and conventions.

CMakeLists.txt

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,35 @@ endif(CSV_CXX_STANDARD)
1010
option(BUILD_PYTHON "Build Python Binding" OFF)
1111
option(CSV_BUILD_SINGLE_INCLUDE_TEST "Build single-header smoke test (requires Python)" OFF)
1212
option(ENABLE_CODE_COVERAGE "Enable code coverage instrumentation" OFF)
13+
option(CSV_ENABLE_THREADS "Enable multi-threaded CSV parsing" ON)
1314

14-
message("Building CSV library using C++${CMAKE_CXX_STANDARD}")
15+
if(EMSCRIPTEN AND CSV_ENABLE_THREADS)
16+
message(STATUS "Emscripten target detected: forcing CSV_ENABLE_THREADS=OFF")
17+
set(CSV_ENABLE_THREADS OFF CACHE BOOL "Enable multi-threaded CSV parsing" FORCE)
18+
endif()
19+
20+
if(EMSCRIPTEN)
21+
message(STATUS "Emscripten target detected: enabling C++ exceptions (-fexceptions)")
22+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
23+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fexceptions")
24+
endif()
1525

16-
# Defines CSV_HAS_CXX17 in compatibility.hpp
17-
if (CMAKE_VERSION VERSION_LESS "3.12.0")
18-
add_definitions(-DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD})
26+
if(CSV_ENABLE_THREADS)
27+
message(STATUS "CSV_ENABLE_THREADS enabled: multi-threaded mode")
1928
else()
20-
add_compile_definitions(CMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD})
29+
message(STATUS "CSV_ENABLE_THREADS disabled: single-threaded mode")
2130
endif()
2231

23-
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
24-
find_package(Threads QUIET REQUIRED)
32+
message("Building CSV library using C++${CMAKE_CXX_STANDARD}")
33+
34+
if(CSV_ENABLE_THREADS)
35+
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
36+
find_package(Threads QUIET)
37+
if(NOT Threads_FOUND)
38+
message(STATUS "Threads package not found: forcing CSV_ENABLE_THREADS=OFF")
39+
set(CSV_ENABLE_THREADS OFF CACHE BOOL "Enable multi-threaded CSV parsing" FORCE)
40+
endif()
41+
endif()
2542

2643
if(MSVC)
2744
# Make Visual Studio report accurate C++ version

README.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
- [Sponsors](#sponsors)
1616
- [Integration](#integration)
1717
- [C++ Version](#c-version)
18+
- [Threading Modes](#threading-modes)
19+
- [Emscripten / WebAssembly](#emscripten--webassembly)
1820
- [Single Header](#single-header)
1921
- [CMake Instructions](#cmake-instructions)
2022
- [Avoid cloning with FetchContent](#avoid-cloning-with-fetchcontent)
@@ -124,6 +126,30 @@ While C++17 is recommended, C++11 is the minimum version required. This library
124126
125127
This library requires C++ exceptions to be enabled (for example, do not compile with `-fno-exceptions`).
126128
129+
### Threading Modes
130+
By default, `csv-parser` uses a background thread to parse file-based input. If CMake cannot find a thread library, threading is disabled automatically.
131+
132+
You can also disable it explicitly:
133+
134+
**CMake**
135+
```cmake
136+
set(CSV_ENABLE_THREADS OFF)
137+
add_subdirectory(csv-parser)
138+
```
139+
140+
**Non-CMake (define the macro before any csv-parser header)**
141+
```cpp
142+
#define CSV_ENABLE_THREADS 0
143+
#include "csv.hpp"
144+
```
145+
146+
Single-threaded mode is useful for embedded targets, environments where `std::thread` is unavailable, and WebAssembly builds without pthreads. The public API is unchanged; parsing simply runs synchronously on the caller's thread.
147+
148+
### Emscripten / WebAssembly
149+
On Emscripten, `CSV_ENABLE_THREADS` is forced off and memory-mapped parsing is replaced by the stream-based parser. The filename constructor (`CSVReader("file.csv")`) still works—it opens an `std::ifstream` internally instead of using mmap.
150+
151+
Emscripten builds must keep C++ exceptions enabled. In practice, compile/link with exception support (for example, `-fexceptions`) and do not disable exception catching.
152+
127153
### Single Header
128154
**[📥 Download csv.hpp](https://vincentlaucsb.github.io/csv-parser/csv.hpp)** — Available on GitHub Pages
129155
@@ -141,6 +167,10 @@ and add the following to your CMakeLists.txt:
141167
```
142168
# Optional: Defaults to C++ 17
143169
# set(CSV_CXX_STANDARD 11)
170+
171+
# Optional: disable background parsing threads
172+
# set(CSV_ENABLE_THREADS OFF)
173+
144174
add_subdirectory(csv-parser)
145175
146176
# ...

include/csv.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
CSV for C++, version 2.5.2
2+
CSV for C++, version 3.0.0
33
https://github.com/vincentlaucsb/csv-parser
44
55
MIT License

include/internal/CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,18 @@ target_sources(csv
2323
data_type.hpp
2424
raw_csv_data.hpp
2525
raw_csv_data.cpp
26+
row_deque.hpp
27+
single_thread_deque.hpp
2628
thread_safe_deque.hpp
2729
)
2830

2931
set_target_properties(csv PROPERTIES LINKER_LANGUAGE CXX)
30-
target_link_libraries(csv PRIVATE Threads::Threads)
32+
33+
if(CSV_ENABLE_THREADS)
34+
target_compile_definitions(csv PUBLIC CSV_ENABLE_THREADS=1)
35+
target_link_libraries(csv PRIVATE Threads::Threads)
36+
else()
37+
target_compile_definitions(csv PUBLIC CSV_ENABLE_THREADS=0)
38+
endif()
39+
3140
target_include_directories(csv INTERFACE ../)

include/internal/basic_csv_parser.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,19 @@ namespace csv {
66
namespace internals {
77
CSV_INLINE size_t get_file_size(csv::string_view filename) {
88
std::ifstream infile(std::string(filename), std::ios::binary);
9+
if (!infile.is_open()) {
10+
throw std::runtime_error("Cannot open file " + std::string(filename));
11+
}
12+
913
const auto start = infile.tellg();
1014
infile.seekg(0, std::ios::end);
1115
const auto end = infile.tellg();
1216

13-
return end - start;
17+
if (start < 0 || end < 0) {
18+
throw std::runtime_error("Cannot determine file size for " + std::string(filename));
19+
}
20+
21+
return static_cast<size_t>(end - start);
1422
}
1523

1624
CSV_INLINE std::string get_csv_head(csv::string_view filename) {
@@ -20,6 +28,19 @@ namespace csv {
2028
CSV_INLINE std::string get_csv_head(csv::string_view filename, size_t file_size) {
2129
const size_t bytes = 500000;
2230

31+
#if defined(__EMSCRIPTEN__)
32+
std::ifstream infile(std::string(filename), std::ios::binary);
33+
if (!infile.is_open()) {
34+
throw std::runtime_error("Cannot open file " + std::string(filename));
35+
}
36+
37+
const size_t length = std::min((size_t)file_size, bytes);
38+
std::string head(length, '\0');
39+
infile.read(&head[0], (std::streamsize)length);
40+
head.resize((size_t)infile.gcount());
41+
return head;
42+
#else
43+
2344
std::error_code error;
2445
size_t length = std::min((size_t)file_size, bytes);
2546
auto mmap = mio::make_mmap_source(std::string(filename), 0, length, error);
@@ -29,6 +50,7 @@ namespace csv {
2950
}
3051

3152
return std::string(mmap.begin(), mmap.end());
53+
#endif
3254
}
3355

3456
#ifdef _MSC_VER
@@ -234,6 +256,7 @@ namespace csv {
234256
#ifdef _MSC_VER
235257
#pragma region Specializations
236258
#endif
259+
#if !defined(__EMSCRIPTEN__)
237260
CSV_INLINE void MmapParser::next(size_t bytes = ITERATION_CHUNK_SIZE) {
238261
// CRITICAL SECTION: Chunk Transition Logic
239262
// This function reads 10MB chunks and must correctly handle fields that span
@@ -289,6 +312,7 @@ namespace csv {
289312

290313
this->mmap_pos -= (length - remainder);
291314
}
315+
#endif
292316
#ifdef _MSC_VER
293317
#pragma endregion
294318
#endif

include/internal/basic_csv_parser.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@
99
#include <memory>
1010
#include <unordered_map>
1111
#include <unordered_set>
12-
#include <thread>
1312
#include <vector>
1413

14+
#if !defined(__EMSCRIPTEN__)
1515
#include "../external/mio.hpp"
16+
#endif
1617
#include "col_names.hpp"
1718
#include "common.hpp"
1819
#include "csv_format.hpp"
1920
#include "csv_row.hpp"
20-
#include "thread_safe_deque.hpp"
21+
#include "row_deque.hpp"
2122

2223
namespace csv {
2324
namespace internals {
@@ -240,15 +241,15 @@ namespace csv {
240241
const auto end = _source.tellg();
241242
_source.seekg(0, std::ios::beg);
242243

243-
source_size = end - start;
244+
source_size = static_cast<size_t>(end - start);
244245
}
245246

246247
// Read data into buffer
247248
size_t length = std::min(source_size - stream_pos, bytes);
248249
std::unique_ptr<char[]> buff(new char[length]);
249250
_source.seekg(stream_pos, std::ios::beg);
250251
_source.read(buff.get(), length);
251-
stream_pos = _source.tellg();
252+
stream_pos = static_cast<size_t>(_source.tellg());
252253
((std::string*)(this->data_ptr->_data.get()))->assign(buff.get(), length);
253254

254255
// Create string_view
@@ -272,6 +273,7 @@ namespace csv {
272273
size_t stream_pos = 0;
273274
};
274275

276+
#if !defined(__EMSCRIPTEN__)
275277
/** Parser for memory-mapped files
276278
*
277279
* @par Implementation
@@ -299,5 +301,6 @@ namespace csv {
299301
std::string _filename;
300302
size_t mmap_pos = 0;
301303
};
304+
#endif
302305
}
303306
}

include/internal/common.hpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,22 @@
2828
#pragma once
2929
#include <type_traits>
3030

31+
#if defined(__EMSCRIPTEN__)
32+
#undef CSV_ENABLE_THREADS
33+
#define CSV_ENABLE_THREADS 0
34+
#elif !defined(CSV_ENABLE_THREADS)
35+
#define CSV_ENABLE_THREADS 1
36+
#endif
37+
3138
// Minimal portability macros (Hedley subset) with CSV_ prefix.
3239
#if defined(__clang__) || defined(__GNUC__)
3340
#define CSV_CONST __attribute__((__const__))
3441
#define CSV_PURE __attribute__((__pure__))
35-
#define CSV_PRIVATE __attribute__((__visibility__("hidden")))
42+
#if defined(_WIN32)
43+
#define CSV_PRIVATE
44+
#else
45+
#define CSV_PRIVATE __attribute__((__visibility__("hidden")))
46+
#endif
3647
#define CSV_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__)))
3748
#elif defined(_MSC_VER)
3849
#define CSV_CONST
@@ -73,15 +84,15 @@
7384
# define CSV_CPLUSPLUS __cplusplus
7485
#endif
7586

76-
#if (defined(CMAKE_CXX_STANDARD) && CMAKE_CXX_STANDARD == 20) || CSV_CPLUSPLUS >= 202002L
87+
#if CSV_CPLUSPLUS >= 202002L
7788
#define CSV_HAS_CXX20
7889
#endif
7990

80-
#if (defined(CMAKE_CXX_STANDARD) && CMAKE_CXX_STANDARD == 17) || CSV_CPLUSPLUS >= 201703L
91+
#if CSV_CPLUSPLUS >= 201703L
8192
#define CSV_HAS_CXX17
8293
#endif
8394

84-
#if (defined(CMAKE_CXX_STANDARD) && CMAKE_CXX_STANDARD >= 14) || CSV_CPLUSPLUS >= 201402L
95+
#if CSV_CPLUSPLUS >= 201402L
8596
#define CSV_HAS_CXX14
8697
#endif
8798

0 commit comments

Comments
 (0)