import sys
def get_chr_tree(strs, before=""):
index = len(before)
s = set([str[index] for str in strs if len(str) > index and str[:index]==before])
l = [get_chr_tree(strs, before+c) for c in s]
return [before[-1]] + l if before else l[0]
def merge_chr(chr_tree):
try:
if len(chr_tree) == 2:
return merge_chr([chr_tree[0]+chr_tree[1][0]] + chr_tree[1][1:])
else: return [chr_tree[0]] + [merge_chr(tree) for tree in chr_tree[1:]]
except IndexError:
return chr_tree
def to_regexp(chr_tree):
def make_regexp(chr_tree):
if len(chr_tree) == 1: return chr_tree[0]
else:
hatena = "?" if [True for l in chr_tree[1:] if "\n" in l] else ""
try:
inner = "".join(["|" + make_regexp(tree) for tree in chr_tree[1:]])
except IndexError:
print "IndexError",chr_tree
inner = ""
return chr_tree[0] + "(?:" + inner + ")" + hatena
return "(?-xism:" + make_regexp(chr_tree).replace("|\n", "").replace("\n", "").replace(":|", ":") + ")"
words = []
for line in sys.stdin: words.append(line)
chr_tree = get_chr_tree(words)
merged_chr_tree = merge_chr(chr_tree)
print to_regexp(merged_chr_tree) # => (?-xism:program(?:m(?:a(?:ti(?:c(?:ally)?|st)|r)?|er)|ist(?:ic)?)?)
greentea #4207() [ Other ] Rating0/0=0.00
1文字ずつのツリーを作って、子を1つしかもたない親に子をくっつけて、参考ページにあるようなTrieツリーにしました。
import sys def get_chr_tree(strs, before=""): index = len(before) s = set([str[index] for str in strs if len(str) > index and str[:index]==before]) l = [get_chr_tree(strs, before+c) for c in s] return [before[-1]] + l if before else l[0] def merge_chr(chr_tree): try: if len(chr_tree) == 2: return merge_chr([chr_tree[0]+chr_tree[1][0]] + chr_tree[1][1:]) else: return [chr_tree[0]] + [merge_chr(tree) for tree in chr_tree[1:]] except IndexError: return chr_tree def to_regexp(chr_tree): def make_regexp(chr_tree): if len(chr_tree) == 1: return chr_tree[0] else: hatena = "?" if [True for l in chr_tree[1:] if "\n" in l] else "" try: inner = "".join(["|" + make_regexp(tree) for tree in chr_tree[1:]]) except IndexError: print "IndexError",chr_tree inner = "" return chr_tree[0] + "(?:" + inner + ")" + hatena return "(?-xism:" + make_regexp(chr_tree).replace("|\n", "").replace("\n", "").replace(":|", ":") + ")" words = [] for line in sys.stdin: words.append(line) chr_tree = get_chr_tree(words) merged_chr_tree = merge_chr(chr_tree) print to_regexp(merged_chr_tree) # => (?-xism:program(?:m(?:a(?:ti(?:c(?:ally)?|st)|r)?|er)|ist(?:ic)?)?)Rating0/0=0.00-0+
1 reply [ reply ]