Comment detail

RFC 4180対応版 CSVレコードの分解 (Nested Flatten)
汚いなぁ。。。
fiber 便利。
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
Iterator::with_peek: method fiber {
    prev, curr: null, null;
    noloop: true;
    this {|next|
        if (curr) {
            yield prev, curr, next;
        }
        prev, curr = curr, next;
        noloop = false;
    }
    if (!noloop) {
        yield prev, curr, null;
    }
}

CSVParser: class {
    - _iter;
    - _finish;

    initialize: method(string) {
        _iter = string.split("").each.with_peek;
        _finish = false;
    }

    flush: method(field) {
        r: field.join("");
        field.clear();
        return r;
    }

    line_parser: method fiber {
        in_quote: false;
        field: [];
        _iter {|prev,it,next|
            if (in_quote) {
                if (it == "\"") {
                    if (prev == "\"") {
                        // ignore
                    } else if (next == "\"") {
                        field.push_back(it);
                    } else {
                        in_quote = false;
                    }
                } else {
                    field.push_back(it);
                }
            } else {
                if (field.empty() && it == "\"") {
                    in_quote = true;
                } else if (it == "\n") {
                    yield flush(field);
                    break;
                } else if (it == ",") {
                    yield flush(field);
                } else {
                    field.push_back(it);
                }
            }
        } nobreak {
            _finish = true;
        }
        if (!field.empty()) {
            yield flush(field);
        }
    }

    parse: method fiber {
        while (!_finish) {
            yield line_parser();
        }
    }
}


parser: CSVParser(
    [%!"aaa","b\nbb","ccc",zzz,"y""Y""y",xxx!,
     %!a,b,c,d!,
     %!a,b,c,!,
     %!a!,
     %!!,
     ].join("\n"));

format: %f[%(line)d:%(col)d: %(cell)s];
parser.parse.with_index {|lineno,line|
    line.with_index {|colno,it|
        format(line: lineno, col: colno, cell: it).p;
    }
}

Index

Feed

Other

Link

Pathtraq

loading...