Skip to content

Commit a813e5d

Browse files
committed
Clean up CSV writing duplicated logic
1 parent a77e2bb commit a813e5d

11 files changed

Lines changed: 156 additions & 143 deletions

File tree

docs/source/Doxy.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ column extraction, editing, and grouping.
8787
* `set_throw_on_missing_key()`: Control exception behavior for missing keys
8888

8989
### ETL Utilities
90-
* csv::csv_data_types(): Infer SQL-friendly column data types
90+
* csv::csv_data_types(): Infer SQL-friendly column data types from a CSVReader or any supported CSVReader constructor input
9191

9292
### CSV Writing
9393
The [CSV Writing Guide](@ref csv_writing_guide) contains a

include/csv.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ SOFTWARE.
3333
#include "internal/csv_reader.hpp"
3434
#include "internal/csv_utility.hpp"
3535
#include "internal/csv_writer.hpp"
36-
#include "internal/csv_writer_extensions.hpp"
3736

3837
/** INSERT_CSV_SOURCES **/
3938

include/internal/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ target_sources(csv
1818
csv_utility.cpp
1919
csv_utility.hpp
2020
csv_writer.hpp
21-
csv_writer_extensions.hpp
2221
data_type.hpp
2322
raw_csv_data.hpp
2423
raw_csv_data.hpp

include/internal/common.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#pragma once
66
#include <algorithm>
77
#include <array>
8+
#include <cassert>
89
#include <cmath>
910
#include <cstdlib>
1011
#include <deque>
@@ -134,6 +135,12 @@ namespace csv {
134135
// Allows static assertions without specifying a message
135136
#define STATIC_ASSERT(x) static_assert(x, "Assertion failed")
136137

138+
#ifdef NDEBUG
139+
#define CSV_DEBUG_ASSERT(x) ((void)sizeof(x), (void)0)
140+
#else
141+
#define CSV_DEBUG_ASSERT(x) assert(x)
142+
#endif
143+
137144
#ifdef CSV_HAS_CXX17
138145
/** @typedef string_view
139146
* The string_view class used by this library.

include/internal/csv_utility.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
#include "data_frame.hpp"
33

44
namespace csv {
5-
CSV_INLINE std::unordered_map<std::string, DataType> csv_data_types(const std::string& filename) {
5+
CSV_INLINE std::unordered_map<std::string, DataType> csv_data_types(CSVReader& reader) {
66
std::unordered_map<std::string, DataType> csv_dtypes;
7-
CSVReader reader(filename);
87
const auto col_names = reader.get_col_names();
98
std::vector<std::unordered_map<DataType, size_t>> type_counts(col_names.size());
109
constexpr size_t TYPE_CHUNK_SIZE = 5000;

include/internal/csv_utility.hpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <string>
1212
#include <type_traits>
1313
#include <unordered_map>
14+
#include <utility>
1415

1516
namespace csv {
1617
/** Returned by get_file_info() */
@@ -87,7 +88,37 @@ namespace csv {
8788

8889
/** @name Utility Functions */
8990
///@{
90-
std::unordered_map<std::string, DataType> csv_data_types(const std::string&);
91+
/** Infer SQL-friendly column data types from an existing CSVReader.
92+
*
93+
* This consumes rows from `reader` using the chunked ETL path and returns
94+
* one inferred `DataType` per column name.
95+
*/
96+
std::unordered_map<std::string, DataType> csv_data_types(CSVReader& reader);
97+
98+
/** Infer SQL-friendly column data types from any CSVReader constructor input.
99+
*
100+
* This convenience overload forwards its arguments directly to
101+
* `CSVReader`, so it supports filenames, `std::istream` sources, owned
102+
* streams, and custom `CSVFormat` combinations without additional wrapper
103+
* code.
104+
*
105+
* @par Example
106+
* @code
107+
* std::istringstream input("name,age\nAlice,30\nBob,41\n");
108+
* CSVFormat format;
109+
* format.delimiter(',').header_row(0);
110+
*
111+
* auto dtypes = csv::csv_data_types(input, format);
112+
* @endcode
113+
*/
114+
template<
115+
typename... ReaderArgs,
116+
csv::enable_if_t<std::is_constructible<CSVReader, ReaderArgs...>::value, int> = 0
117+
>
118+
inline std::unordered_map<std::string, DataType> csv_data_types(ReaderArgs&&... reader_args) {
119+
CSVReader reader(std::forward<ReaderArgs>(reader_args)...);
120+
return csv_data_types(reader);
121+
}
91122

92123
/** Apply a per-column batch function over a CSVReader using a reusable executor.
93124
*

include/internal/csv_writer.hpp

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,16 @@ namespace csv {
375375
write_range_impl(container);
376376
return *this;
377377
}
378+
379+
/** Write a row-like object that exposes to_sv_range(). */
380+
template<typename RowLike>
381+
DelimWriter& operator<<(const RowLike& row)
382+
requires internals::has_to_sv_range<RowLike>
383+
&& !internals::csv_string_field_range<RowLike> {
384+
append_row_like(row);
385+
finish_write_call();
386+
return *this;
387+
}
378388
#else
379389
/** Write a range of string-like fields as one delimited row.
380390
*
@@ -432,12 +442,13 @@ namespace csv {
432442
}
433443

434444
private:
435-
/** Helper to write a range of values, handling first element undelimited,
436-
* rest prefixed with delimiter. Inlines aggressively across both C++20 and
437-
* C++11 operator<< entry points.
445+
/** Append delimited fields from a range without terminating the record.
446+
*
447+
* Shared by single-row writes and bulk row appends so escaping and
448+
* delimiter handling stay on one code path.
438449
*/
439450
template<typename Range>
440-
inline void write_range_impl(const Range& record) {
451+
inline void append_range_fields(Range&& record) {
441452
auto it = std::begin(record);
442453
auto end = std::end(record);
443454

@@ -450,6 +461,14 @@ namespace csv {
450461
batch_buffer_.push_back(Delim);
451462
write_field(*it);
452463
}
464+
}
465+
466+
/** Helper to write a complete range-backed row and apply the normal
467+
* end-of-record and flush policy for operator<< entry points.
468+
*/
469+
template<typename Range>
470+
inline void write_range_impl(const Range& record) {
471+
append_range_fields(record);
453472

454473
end_record();
455474
finish_write_call();
@@ -467,23 +486,6 @@ namespace csv {
467486

468487
end_record();
469488
}
470-
471-
template<std::ranges::input_range Range>
472-
requires std::convertible_to<std::ranges::range_reference_t<Range>, csv::string_view>
473-
void append_range_fields(Range&& record) {
474-
auto it = std::begin(record);
475-
auto end = std::end(record);
476-
477-
if (it != end) {
478-
write_field(*it);
479-
++it;
480-
}
481-
482-
for (; it != end; ++it) {
483-
batch_buffer_.push_back(Delim);
484-
write_field(*it);
485-
}
486-
}
487489
#endif
488490

489491
template<

include/internal/csv_writer_extensions.hpp

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)