RFC 4180対応版 CSVレコードの分解
Posted feedbacks - Scala
寝る前にもいっちょ。今度は自分でパース。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | import scala.collection.mutable.ListBuffer
def parseCSV(s:String):Array[Array[String]] = {
def split(s:String, c:String) = {
val buf = new ListBuffer[String]
val result = new ListBuffer[String]
s.split(c).foreach(p => p.filter(_ =='"').length%2 match{
case 0 if buf.isEmpty => result += p
case 0 if !buf.isEmpty => buf += p
case 1 if buf.isEmpty => buf += p
case 1 if !buf.isEmpty =>
buf += p
result += buf.mkString(c)
buf.clear
})
result
}
split(s,"\n").map(line => {
split(line, ",").map(col => {
col.replaceAll("\"\"", "\"").replaceAll("^(\")", "")
.replaceAll("(\")$", "")
}).toArray
}).toArray
}
val data = """"aaa","b
bb","ccc",zzz,"y""Y""y",xxx"""
parseCSV(data).foreach(line => {
(1 to line.length).foreach(i => {
println(i + " => " + line(i-1))
})
})
|
Scala 2.6.0-RC1でscala.util.parsing.combinatorパッケージが標準ライブラリになりました。 ということでScalaでパーサコンビネータ。ほとんど資料がないので手探りですが。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 | import scala.util.parsing.combinator.{Parsers, ImplicitConversions, ~, mkTilde}
import scala.util.parsing.input.CharArrayReader
import Character.isISOControl
object CSVParser {
trait Base
case class Field(s:String) extends Base {
override def toString = s
}
case class Record(fields: List[Field]) extends Base
case class File(records :List[Record]) extends Base
def mkString(cs :List[Any]) = cs.mkString("")
class CSVParser extends Parsers {
type Elem = Char
def notMeta(c:Elem) = c!=',' && c!='\n' && c!='"' && !isISOControl(c)
lazy val file = record.*('\n') ^^ File
lazy val record = (field|quotedField|nullableField).*(',') ^^ Record
lazy val field = chars.+ ^^ {cs => Field(mkString(cs))}
lazy val nullableField = chars.* ^^ {cs => Field("")}
lazy val quotedField = '"' ~ (charsInQuote|quoteInQuote).* ~ '"' ^^ {cs => Field(mkString(cs))}
lazy val charsInQuote = elem("chars in field", _!='"')
lazy val quoteInQuote = repN(2, quote) ^^ {cs => '"'}
lazy val quote = '"' ^^ success
lazy val chars = elem("chars", notMeta)
}
}
val data = """
"aaa","b
bb","ccc",zzz,"y""Y""y",xxx
""".trim
(new CSVParser.CSVParser).file(new
CharArrayReader(data.toCharArray)).map(file => {
file.records.map({record =>
val fields = record.fields
(1 to fields.length).foreach(i => println(i +" => " + fields(i-1)))
})
})
|



raynstard
#3389()
Rating1/1=1.00
[ reply ]