一部のHTMLタグを通すフィルタ
Posted feedbacks - Common Lisp
ぴったりなライブラリはどこかにあると思うのですが、 見付けられませんでした…。 とりあえず、みつけられたurl-rewiteと allegroのparse-htmlを使って作成してみました。 長くてぐちゃぐちゃですいません…。 嗚呼…。 ライブラリ: http://opensource.franz.com/xmlutils/xmlutils-dist/phtml.htm http://weitz.de/url-rewrite/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #+allegro (progn (require :phtml) (use-package :net.html.parser))
(defun html-filter (string &optional in-prop-p)
(let ((form (if in-prop-p
(parse-html string)
(sanitize-html (parse-html string)))))
(apply #'concatenate 'string (build-html form in-prop-p))))
(defun sanitize-html (form)
(mapcar (lambda (item)
(if (atom item)
item
(let ((keys `(,(car item) ,(and (consp (car item)) (caar item)))))
(cond ((member :a keys)
`(,(car item) ,@(sanitize-html (cdr item))))
((member :strong keys)
`(:strong ,@(sanitize-html (cdr item))))
((member :br keys) :br)
('T item)))))
form))
(defun build-html (form &optional in-prop-p)
(if (atom form)
form
(cond ((and (atom (car form)) (eq :br (car form)))
`(,(br in-prop-p) ,@(build-html (cdr form) in-prop-p)))
((keywordp (car form)) (build-other form in-prop-p))
((consp (car form))
`(,(let* ((top (car form))
(keys `(,(car top) ,(and (consp (car top)) (caar top)))))
(if in-prop-p
(cond ((member-if #'keywordp keys) (build-other top in-prop-p))
('T top))
(cond (member :a keys) (build-a top))
((member :strong keys) (build-strong top))
((member-if #'keywordp keys) (build-other top in-prop-p))
('T top))))
,@(build-html (cdr form) in-prop-p)))
('T `(,(build-html (car form) in-prop-p)
,@(build-html (cdr form) in-prop-p))))))
(defun br (&optional in-prop-p)
(if in-prop-p "<br />" "<br />"))
(defun build-strong (form)
(format nil "<strong>~{~A~}</strong>" (build-html (cdr form))))
(defun build-a (form)
(let ((tag (car form))
(body (build-html (cdr form))))
(if (and (consp (car form))
(member (second tag) '(:href :name)))
(format nil "<a ~(~A~)='~A'>~{~A~}</a> "
(second tag)
(put-dot-slash-if-need (url-rewrite:url-encode (third tag))) body)
(format nil "<a>~{~A~}</a> " (build-html (cdr form))))))
(defun put-dot-slash-if-need (str)
(let ((s (cl-ppcre:create-scanner "^[Hh][Tt][Tt][Pp][Ss]*://")))
(if (cl-ppcre:scan s str)
str
(concatenate 'string "./" str))))
(defun build-other (form &optional in-prop-p)
(let ((tag (car form))
(body (build-html (cdr form))))
(if (consp tag)
(format nil "~(<~A~{ ~A~}~)>~{~A~}~0@*</~(~A~)> "
(car tag) (prop-maker (cdr tag)) body)
(if (and in-prop-p (eq :br tag))
"<br>"
(format nil "~(<~A>~)~{~A~}~0@*</~(~A~)>"
tag (build-html (cdr form)))))))
(defun prop-maker (lst)
(do ((l lst (cddr l))
result)
((endp l) (nreverse result))
(push (format nil "~A=\"~A\""
(car l) (html-filter (cadr l) t)) result)))
|
html-parseを使用しました。確認はclispで行ないました。
HTMLの属性値の変換は、< > & " のHTML escapeだけを行なってます。
(html-filter
"<a href=\"www.google.com\">link</a> <blink>and</blink>\n<strong onClick='alert(\"NG\")'>click<br>me!</strong>")
=> "<a href=\"www.google.com\">link</a><blink>and</blink>\n<strong>click<br/>me!</strong>"
HTMLの属性値の変換は、< > & " のHTML escapeだけを行なってます。
(html-filter
"<a href=\"www.google.com\">link</a> <blink>and</blink>\n<strong onClick='alert(\"NG\")'>click<br>me!</strong>")
=> "<a href=\"www.google.com\">link</a><blink>and</blink>\n<strong>click<br/>me!</strong>"
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | (asdf:oos 'asdf:load-op :cl-html-parse)
(defconstant *enable-tags* '(("a" "href" "name")
("br")
("strong")))
(defun html-filter (html)
(html-element-filter (html-parse:parse-html html)))
(defun html-element-filter (elements)
(cond
((stringp elements)
(convert-not-tag elements))
((symbolp elements)
(convert-tag elements nil))
(t
(let* ((element-type (cond ((and (listp (car elements))
(symbolp (caar elements))
(not (null (cadar elements)))
(symbolp (cadar elements)))
:tag-attr)
((symbolp (car elements)) :tag)
(t :not-tag)))
(tag (case element-type
(:tag-attr (caar elements))
(:tag (car elements))))
(attr-lst (if (equal element-type :tag-attr)
(cdar elements)))
(elems (case element-type
(:tag-attr (cdr elements))
(:tag (cdr elements))
(:not-tag elements))))
(case element-type
(:tag-attr (convert-tag-attr tag attr-lst elems))
(:tag (convert-tag tag elems))
(:not-tag (string-join (mapcar #'html-element-filter elems))))))))
(defun convert-tag (tag elems)
(let* ((enable-tag (assoc (string-downcase tag) *enable-tags* :test #'equal))
(< (if enable-tag "<" "<"))
(> (if enable-tag ">" ">")))
(cond ((null elems)
(format nil "~a~(~a~)/~a" < tag >))
(t
(string-join
(format nil "~a~(~a~)~a" < tag >)
(html-element-filter elems)
(format nil "~a/~(~a~)~a" < tag >))))))
(defun convert-tag-attr (tag attr-lst elems)
(let* ((enable-tag (assoc (string-downcase tag) *enable-tags* :test #'equal))
(< (if enable-tag "<" "<"))
(> (if enable-tag ">" ">"))
(attr (loop for e in attr-lst by #'cddr
when (or (null enable-tag)
(member (string-downcase e) (cdr enable-tag)
:test #'equal))
collect (format nil "~(~a~)=\"~a\""
e
(escape-html (getf attr-lst e))))))
(cond
(elems
(string-join
(if attr
(format nil "~a~(~a~) ~{~a~^, ~}~a" < tag attr >)
(format nil "~a~(~a~)~a" < tag >))
(html-element-filter elems)
(format nil "~a/~(~a~)~a" < tag >)))
(t
(if attr
(format nil "~a~(~a~) ~{~a~^, ~}/~a" < tag attr >)
(format nil "~a~(~a~)/~a" < tag >))))))
(defun convert-not-tag (element)
(format nil "~a" (escape-html element)))
(defun string-join (&rest lst)
(let ((l (if (listp (car lst))
(car lst)
lst)))
(apply #'concatenate (cons 'string l))))
(defun escape-html (str)
(let ((in (make-string-input-stream str))
(out (make-string-output-stream)))
(loop for c = (read-char in nil nil)
while c
do (case c
(#\< (write-string "<" out))
(#\> (write-string ">" out))
(#\& (write-string "&" out))
(#\" (write-string """ out))
(t (write-char c out)))
finally (return (get-output-stream-string out)))))
|


にしお
#3410()
Rating0/0=0.00
このお題はperezvonさんの提案を元にしています。ありがとうございました。 ただ、いきなりだと難しいかと思ったので、肝の部分以外を先に出題しました。このお題は続編で徐々に難しくなっていきます。
追記:属性に<や>が含まれてしまうケースに漏れのある解答が多いようなのでテストケースを追加します。 これは「この出力なら十分」という意味です。この出力の通りでなければいけないという意味ではありません。 <script foo="<script>alert('bar')</script>">alert('foo')</script> <script foo="<script>alert('bar')</script>">alert('foo')</script> <script foo="<a href='link'>link</a>">alert('foo')</script> <script foo="<a href='link'>link</a>">alert('foo')</script> <a href='www.g>oogle.com'>link</a> <a href="./www.g%3Eoogle.com">link</a>[ reply ]