Comment detail

文字列リストをTRIE Optimizeされた正規表現に (Nested Flatten)
連想配列を使って普通に木を作る方法で。

標準ライブラリ(std.regexp)では後方参照なしのグルーピングがサポートされないので、普通の括弧にしました。

program(ist(ic)?|m(a(r|ti(c(ally)?|st))?|er))?
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import std.stdio, std.string, std.utf;

class Trie {
    private struct Node {
        Node*[dchar] nodes;
        bool matchTail;
        
        const string toString() {
            if(!nodes.length) return "";
            string[] patterns;
            foreach(c, node; this.nodes) {
                patterns ~= quotemeta(toUTF8([c])) ~ node.toString();
            }
            string pattern = patterns.join("|");
            if(nodes.length == 1) {
                return matchTail ? "(" ~ pattern ~ ")?" : pattern;
            } else {
                return "(" ~ pattern ~ ")" ~ (matchTail ? "?" : "");
            }
        }
    }
    
    private Node tree;
    
    this() { }
    
    this(const(string)[] words) {
        foreach(word; words) {
            this.addWord(word);
        }
    }
    
    void addWord(string word) {
        auto node = &this.tree;
        foreach(dchar c; word) {
            if(auto p = c in node.nodes) {
                node = *p;
            } else {
                node = node.nodes[c] = new Node;
            }
        }
        node.matchTail = true;
    }
    
    const string toString() {
        return tree.toString();
    }
}

private string quotemeta(string str) {
    string result;
    foreach(c; str) {
        switch(c) {
            case '.': case '*': case '+': case '?':
            case '^': case '$': case '{': case '}':
            case '[': case ']': case '(': case ')':
            case '|':
                result ~= ['\\', c];
                break;
            default:
                result ~= c;
        }
    }
    return result;
}

void main(){
    auto words = `program
programist
programistic
programma
programmar
programmatic
programmatically
programmatist
programmer`.splitlines();
    writeln(new Trie(words));
}

Index

Feed

Other

Link

Pathtraq

loading...