RFC 4180対応版 CSVレコードの分解
Posted feedbacks - C++
上のコードをC++に移植しました。カバレッジ稼ぎ。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | #include <iostream>
#include <vector>
#include <string>
#include <iterator>
std::vector<std::string> split_csv(const std::string& csv)
{
std::vector<std::string> v;
std::string::const_iterator it = csv.begin();
while (it != csv.end())
{
std::string s;
bool quoted = false;
while (it != csv.end())
{
if (!quoted && *it == ',')
{
++it;
break;
}
else if (!quoted && (*it == '\r' || *it == '\n'))
{
it = csv.end(); // ignore second record
break;
}
else if (*it == '"' && (++it == csv.end() || *it != '"'))
{
quoted = !quoted;
}
else
{
s.append(1, *it++);
}
}
v.push_back(s);
}
return v;
}
int main()
{
const char csv[] = "\"aaa\",\"b\nbb\",\"ccc\",zzz,\"y\"\"Y\"\"y\",xxx";
std::vector<std::string> v = split_csv(csv);
for (size_t i = 0; i < v.size(); ++i)
{
std::cout << (i + 1) << " => " << v[i] << std::endl;
}
}
|
変態的と名高い(?) Boost.Spirit で解析。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | #include <vector>
#include <string>
#include <exception>
#include <stdexcept>
#include <boost/spirit.hpp>
#include <boost/spirit/actor/push_back_actor.hpp>
#include <boost/spirit/actor/clear_actor.hpp>
typedef std::vector<std::string> csv_elem_t;
std::vector<csv_elem_t>
parse_csv(
std::string lines
)
{
using namespace boost::spirit;
std::vector<csv_elem_t> csv;
csv_elem_t e;
rule<> element_r = *((anychar_p - ch_p('"')) | str_p("\"\""));
rule<> quoted_r = ch_p('"') >> element_r[push_back_a(e)] >> ch_p('"');
rule<> naked_r = (*(anychar_p - ch_p('"') - ch_p(',') - eol_p))[push_back_a(e)];
rule<> record_r = list_p((quoted_r|naked_r), ch_p(','));
rule<> csv_r = list_p(record_r[push_back_a(csv,e)][clear_a(e)], eol_p) >> end_p;
parse_info<> result = parse(lines.c_str(), csv_r);
if ( !result.full ) {
throw std::runtime_error("failed to parse");
}
typedef std::vector<csv_elem_t>::iterator csv_list_iter;
typedef csv_elem_t::iterator csv_iter;
for ( csv_list_iter clit = csv.begin(); clit != csv.end(); ++clit ) {
for ( csv_iter cit = clit->begin(); cit != clit->end(); ++cit ) {
std::string::size_type idx=0;
while ( (idx = cit->find("\"\"", idx)) != std::string::npos ) {
cit->replace(idx, 2, "\""); ++idx;
}
}
}
return csv;
}
int main()
{
try {
std::vector<csv_elem_t> csv =
parse_csv("\"aaa\",\"b\nbb\",\"ccc\",zzz,\"y\"\"Y\"\"y\",xxx");
std::cout << "total records: " << csv.size() << "\n";
typedef std::vector<csv_elem_t>::const_iterator csv_list_iter;
typedef csv_elem_t::const_iterator csv_iter;
int l = 1;
for ( csv_list_iter clit = csv.begin(); clit != csv.end(); ++clit,++l ) {
std::cout << "#" << l << "\n";
int i = 1;
for ( csv_iter cit = clit->begin(); cit != clit->end(); ++cit,++i ) {
std::cout << i << " => " << *cit << "\n";
}
}
}
catch ( std::exception& e ) {
std::cerr << e.what() << "\n";
}
return 0;
}
|



raynstard
#3389()
Rating1/1=1.00
[ reply ]