RFC 4180対応版 CSVレコードの分解
Posted feedbacks - C#
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | using System;
using System.Collections.Generic;
using System.Text;
class Program
{
static void Main()
{
string s = "\"aaa\",\"b\nbb\",\"ccc\",zzz,\"y\"\"Y\"\"y\",xxx";
string[] ss = splitCSV(s);
for (int i = 0; i < ss.Length; ++i)
Console.WriteLine("{0} => {1}",i+1,ss[i]);
}
static string[] splitCSV(string s)
{
if (s == null) return null;
List<string> a = new List<string>();
int i = 0, j;
string t;
StringBuilder h = new StringBuilder();
while (i < s.Length)
{
bool b = s[i] == '"';
if (b) ++i;
j = s.IndexOf(b ? '"' : ',', i);
if (j < 0) j = s.Length;
t = s.Substring(i, j - i);
if (b && j < s.Length - 1 && s[j + 1] == '"')
{
h.Append(t);
h.Append('"');
i = j + 1;
}
else
{
a.Add(h + t);
h.Length = 0;
i = j + (b ? 2 : 1);
}
}
return a.ToArray();
}
}
|
C#でStateパターン風に実装。
.NET FrameWork 2.0 以上だと #9257 の Microsoft.VisualBasic.FileIO を使えばいいわけなので、1.1 の言語機能だけで実現するべくジェネリックとか Linq とかは使わないようにしてみました。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | using System;
using System.Collections;
using System.Text;
using System.IO;
class ParserState
{
private TextFieldParser _p;
private bool _break = true;
private int _openQ = 0;
private bool _isClosing = false;
private StringBuilder _curFldDlm;
private StringBuilder _curRecTrm;
public bool IsBreaking { get { return _break; } }
public bool IsQuoted { get { return (_openQ != 0); } }
public bool IsClosing { get { return _isClosing; } }
public bool IsFieldDelimited { get { return Grep(_curFldDlm.ToString(), _p.FieldDelimiters); } }
public bool IsRecordTerminated { get { return Grep(_curRecTrm.ToString(), _p.RecordTerminaters); } }
private bool Grep(string s, string[] c)
{
foreach (string t in c)
if (s == t) return true;
return false;
}
private bool CheckIfQuote(int input)
{
foreach (int c in _p.Quoters)
if (input == c) return true;
return false;
}
private bool CheckIfFieldEnding(int input)
{
foreach (string s in _p.FieldDelimiters)
if (s.StartsWith(_curFldDlm.ToString() + ((char)input).ToString()))
return true;
return false;
}
private bool CheckIfRecordEnding(int input)
{
foreach (string s in _p.RecordTerminaters)
if (s.StartsWith(_curRecTrm.ToString() + ((char)input).ToString()))
return true;
return false;
}
private bool IsTransit { get { return _curFldDlm.Length > 0 || _curRecTrm.Length > 0; } }
public ParserState(TextFieldParser parser)
{
_p = parser;
}
public int GetChar(int input, out string backtrack)
{
backtrack = String.Empty;
if (_break)
{
_curFldDlm = new StringBuilder(_p.MaxFieldDelimiterLength);
_curRecTrm = new StringBuilder(_p.MaxRecordTerminaterLength);
if (CheckIfQuote(input))
{
_break = false;
_openQ = input;
_isClosing = false;
return -1;
}
}
if (IsQuoted)
{
if (input == _openQ)
{
_isClosing = !_isClosing;
return _isClosing ? -1 : input;
}
if (!IsClosing) return input;
//after the quote has been closed
_openQ = 0;
_isClosing = false;
}
if (CheckIfFieldEnding(input))
_curFldDlm.Append((char)input);
else if (_curFldDlm.Length > 0)
{
backtrack = _curFldDlm.ToString();
_curFldDlm = new StringBuilder(_p.MaxFieldDelimiterLength);
}
if (CheckIfRecordEnding(input))
_curRecTrm.Append((char)input);
else if (_curRecTrm.Length > 0)
{
backtrack = _curRecTrm.ToString();
_curRecTrm = new StringBuilder(_p.MaxRecordTerminaterLength);
}
_break = this.IsFieldDelimited || this.IsRecordTerminated;
return this.IsTransit ? -1 : input;
}
}
class TextFieldParser
{
private readonly StreamReader _reader;
private readonly char[] _q;
private readonly string[] _fldDlm;
private readonly string[] _rcdTrm;
private readonly int _fldDlmLen = 0;
private readonly int _rcdTrmLen = 0;
private readonly bool _trmSpc = false;
public TextFieldParser(
Stream stream,
char[] quoters,
string[] fieldDelimiters,
string[] recordTerminaters,
bool trimSpaces)
{
_reader = new StreamReader(stream);
_q = quoters;
_fldDlm = fieldDelimiters;
foreach (string s in _fldDlm)
if (s.Length > _fldDlmLen)
_fldDlmLen = s.Length;
_rcdTrm = recordTerminaters;
foreach (string s in _rcdTrm)
if (s.Length > _rcdTrmLen)
_rcdTrmLen = s.Length;
_trmSpc = trimSpaces;
}
public TextFieldParser(Stream stream, char[] quoters,
string[] fieldDelimiters, string[] recordTerminaters) :
this(stream, quoters, fieldDelimiters, recordTerminaters, false) { }
public TextFieldParser(Stream stream, char[] quoters, string[] fieldDelimiters) :
this(stream, quoters, fieldDelimiters, new string[]{ "\r\n" }) { }
public TextFieldParser(Stream stream, char[] quoters) :
this(stream, quoters, new string[] { "," }) { }
public TextFieldParser(Stream stream) :
this(stream, new char[] { '"' }) { }
public char[] Quoters { get { return (char[])_q.Clone(); } }
public string[] FieldDelimiters { get { return (string[])_fldDlm.Clone(); } }
internal int MaxFieldDelimiterLength { get { return _fldDlmLen; } }
public string[] RecordTerminaters { get { return (string[])_rcdTrm.Clone(); } }
internal int MaxRecordTerminaterLength { get { return _rcdTrmLen; } }
public bool EndOfData { get { return _reader.EndOfStream; } }
int _fldCnt = 0;
public string[] ReadFields()
{
if (_reader.Peek() == -1) return null;
ParserState stat = new ParserState( this );
ArrayList fields = new ArrayList( _fldCnt );
StringBuilder field = new StringBuilder();
int nextChr;
while (-1 != (nextChr = _reader.Read()))
{
string buf;
int ret = stat.GetChar(nextChr, out buf);
if (ret != -1) field.Append(buf).Append((char)ret);
if (stat.IsBreaking)
{
string f = field.ToString();
if (_trmSpc) f = f.Trim();
fields.Add(f);
field = new StringBuilder();
}
if (stat.IsRecordTerminated) break;
}
if (_fldCnt < fields.Count) _fldCnt = fields.Count;
return (string[])fields.ToArray(typeof(string));
}
public string[][] Parse()
{
ArrayList records = new ArrayList();
while (!this.EndOfData)
records.Add(this.ReadFields());
return (string[][])records.ToArray();
}
}
class Program
{
static void Main(string[] args)
{
TextFieldParser p = new TextFieldParser(Console.OpenStandardInput());
while (!p.EndOfData)
{
int i = 0;
foreach (string col in p.ReadFields())
Console.WriteLine("{0} => {1}", ++i, col);
}
}
}
|


raynstard
#3389()
Rating1/1=1.00
[ reply ]