challenge 一部のHTMLタグを通すフィルタ

ユーザが入力した文字列から、一部のタグだけを許可して他をエスケープするコードを書いてください。要件は次のようになります。
  • 通すタグはAとBRとSTRONGのみ。大文字小文字は区別しない。
  • それ以外のタグとして意味を持ちうる文字列は<を&lt;に変換することで無効化する(削除するのではない。>は変換してもしなくてもよい)
  • Aタグのhrefとname以外の属性は削除する。BRやSTRONGの属性はすべて削除する。

このお題はperezvonさんの提案を元にしています。ありがとうございました。 ただ、いきなりだと難しいかと思ったので、肝の部分以外を先に出題しました。このお題は続編で徐々に難しくなっていきます。

追記:属性に<や>が含まれてしまうケースに漏れのある解答が多いようなのでテストケースを追加します。
これは「この出力なら十分」という意味です。この出力の通りでなければいけないという意味ではありません。

<script foo="<script>alert('bar')</script>">alert('foo')</script>
&lt;script foo="&lt;script&gt;alert('bar')&lt;/script&gt;"&gt;alert('foo')&lt;/script&gt;


<script foo="<a href='link'>link</a>">alert('foo')</script>
&lt;script foo="&lt;a href='link'&gt;link&lt;/a&gt;"&gt;alert('foo')&lt;/script&gt;

<a href='www.g>oogle.com'>link</a>

<a href="./www.g%3Eoogle.com">link</a>

Posted feedbacks - Common Lisp

ぴったりなライブラリはどこかにあると思うのですが、
見付けられませんでした…。
とりあえず、みつけられたurl-rewiteと
allegroのparse-htmlを使って作成してみました。
長くてぐちゃぐちゃですいません…。
嗚呼…。

ライブラリ:
http://opensource.franz.com/xmlutils/xmlutils-dist/phtml.htm
http://weitz.de/url-rewrite/
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#+allegro (progn (require :phtml) (use-package :net.html.parser))

(defun html-filter (string &optional in-prop-p)
  (let ((form (if in-prop-p 
		  (parse-html string)
		  (sanitize-html (parse-html string)))))
    (apply #'concatenate 'string (build-html form in-prop-p))))

(defun sanitize-html (form)
  (mapcar (lambda (item)
	    (if (atom item)
		item
		(let ((keys `(,(car item) ,(and (consp (car item)) (caar item)))))
		  (cond ((member :a keys)
			 `(,(car item) ,@(sanitize-html (cdr item))))
			((member :strong keys)
			 `(:strong ,@(sanitize-html (cdr item))))
			((member :br keys) :br)
			('T item)))))
	  form))

(defun build-html (form &optional in-prop-p)
  (if (atom form) 
      form
      (cond ((and (atom (car form)) (eq :br (car form)))
	     `(,(br in-prop-p) ,@(build-html (cdr form) in-prop-p)))
	    ((keywordp (car form)) (build-other form in-prop-p))
	    ((consp (car form))
	     `(,(let* ((top (car form))
		       (keys `(,(car top) ,(and (consp (car top)) (caar top)))))
		      (if in-prop-p
			  (cond ((member-if #'keywordp keys) (build-other top in-prop-p))
				('T top))
			  (cond (member :a keys) (build-a top))
				((member :strong keys) (build-strong top))
				((member-if #'keywordp keys) (build-other top in-prop-p))
				('T top))))
		,@(build-html (cdr form) in-prop-p)))
	    ('T `(,(build-html (car form) in-prop-p)
		   ,@(build-html (cdr form) in-prop-p))))))

(defun br (&optional in-prop-p)
  (if in-prop-p "&lt;br /&gt;" "<br />"))

(defun build-strong (form)
  (format nil "<strong>~{~A~}</strong>" (build-html (cdr form))))

(defun build-a (form)
  (let ((tag (car form))
	(body (build-html (cdr form))))
    (if (and (consp (car form)) 
	     (member (second tag) '(:href :name)))
	(format nil "<a ~(~A~)='~A'>~{~A~}</a> " 
		(second tag) 
		(put-dot-slash-if-need (url-rewrite:url-encode (third tag))) body)
	(format nil "<a>~{~A~}</a> " (build-html (cdr form))))))

(defun put-dot-slash-if-need (str)
  (let ((s (cl-ppcre:create-scanner "^[Hh][Tt][Tt][Pp][Ss]*://")))
    (if (cl-ppcre:scan s str)
	str
	(concatenate 'string "./" str))))

(defun build-other (form &optional in-prop-p)
  (let ((tag (car form))
	(body (build-html (cdr form))))
    (if (consp tag)
	(format nil "~(&lt;~A~{ ~A~}~)&gt;~{~A~}~0@*&lt;/~(~A~)&gt; " 
		(car tag) (prop-maker (cdr tag)) body)
	(if (and in-prop-p (eq :br tag))
	    "&lt;br&gt;"
	    (format nil "~(&lt;~A&gt;~)~{~A~}~0@*&lt;/~(~A~)&gt;"
		    tag (build-html (cdr form)))))))

(defun prop-maker (lst)
  (do ((l lst (cddr l))
       result)
      ((endp l) (nreverse result))
  (push (format nil "~A=\"~A\"" 
		(car l) (html-filter (cadr l) t)) result)))

html-parseを使用しました。確認はclispで行ないました。
HTMLの属性値の変換は、< > & " のHTML escapeだけを行なってます。

(html-filter 
"<a href=\"www.google.com\">link</a> <blink>and</blink>\n<strong onClick='alert(\"NG\")'>click<br>me!</strong>")
  => "<a href=\"www.google.com\">link</a>&lt;blink&gt;and&lt;/blink&gt;\n<strong>click<br/>me!</strong>" 
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
(asdf:oos 'asdf:load-op :cl-html-parse)

(defconstant *enable-tags* '(("a" "href" "name")
                             ("br")
                             ("strong")))

(defun html-filter (html)
  (html-element-filter (html-parse:parse-html html)))

(defun html-element-filter (elements)
  (cond 
    ((stringp elements)
     (convert-not-tag elements))
    ((symbolp elements)
     (convert-tag elements nil))
    (t

      (let* ((element-type (cond ((and (listp (car elements))
                                       (symbolp (caar elements))
                                       (not (null (cadar elements)))
                                       (symbolp (cadar elements)))
                                  :tag-attr)
                                 ((symbolp (car elements)) :tag)
                                 (t :not-tag)))
             (tag (case element-type
                    (:tag-attr (caar elements))
                    (:tag      (car elements))))
             (attr-lst (if (equal element-type :tag-attr)
                         (cdar elements)))
             (elems (case element-type
                      (:tag-attr (cdr elements))
                      (:tag      (cdr elements))
                      (:not-tag  elements))))
        (case element-type
          (:tag-attr (convert-tag-attr tag attr-lst elems))
          (:tag      (convert-tag tag elems))
          (:not-tag  (string-join (mapcar #'html-element-filter elems))))))))

(defun convert-tag (tag elems)
  (let* ((enable-tag (assoc (string-downcase tag) *enable-tags* :test #'equal))
         (< (if enable-tag "<" "&lt;"))
         (> (if enable-tag ">" "&gt;")))
    (cond ((null elems)
           (format nil "~a~(~a~)/~a" < tag >))
          (t
            (string-join
              (format nil "~a~(~a~)~a" < tag >)
              (html-element-filter elems)
              (format nil "~a/~(~a~)~a" < tag >))))))

(defun convert-tag-attr (tag attr-lst elems)
  (let* ((enable-tag (assoc (string-downcase tag) *enable-tags* :test #'equal))
         (< (if enable-tag "<" "&lt;"))
         (> (if enable-tag ">" "&gt;"))
         (attr (loop for e in attr-lst by #'cddr
                     when (or (null enable-tag)
                              (member (string-downcase e) (cdr enable-tag)
                                      :test #'equal))
                     collect (format nil "~(~a~)=\"~a\""
                                     e
                                     (escape-html (getf attr-lst e))))))
    (cond 
      (elems
        (string-join
          (if attr
            (format nil "~a~(~a~) ~{~a~^, ~}~a" < tag attr >)
            (format nil "~a~(~a~)~a" < tag >))
          (html-element-filter elems)
          (format nil "~a/~(~a~)~a" < tag >)))
      (t
        (if attr
          (format nil "~a~(~a~) ~{~a~^, ~}/~a" < tag attr >)
          (format nil "~a~(~a~)/~a" < tag >))))))

(defun convert-not-tag (element)
  (format nil "~a" (escape-html element)))

(defun string-join (&rest lst)
  (let ((l (if (listp (car lst))
             (car lst)
             lst)))
    (apply #'concatenate (cons 'string l))))

(defun escape-html (str)
  (let ((in (make-string-input-stream str))
        (out (make-string-output-stream)))
    (loop for c = (read-char in nil nil)
          while c
          do (case c
               (#\< (write-string "&lt;" out))
               (#\> (write-string "&gt;" out))
               (#\& (write-string "&amp;" out))
               (#\" (write-string "&quot;" out))
               (t (write-char c out)))
          finally (return (get-output-stream-string out)))))

Index

Feed

Other

Link

Pathtraq

loading...