Skip to content

Instantly share code, notes, and snippets.

@whosaysni
Last active January 19, 2018 01:16
Show Gist options
  • Select an option

  • Save whosaysni/93d4a2771e45255be0d6cb8a8f913ee0 to your computer and use it in GitHub Desktop.

Select an option

Save whosaysni/93d4a2771e45255be0d6cb8a8f913ee0 to your computer and use it in GitHub Desktop.
マスクしたSQLのハッシュ値を計算する
# coding: utf-8
from hashlib import md5
from sqlparse import parse
from sqlparse.exceptions import SQLParseError
from sqlparse.filters import StripCommentsFilter, StripWhitespaceFilter
from sqlparse.tokens import Other, Literal
from logging import getLogger
logger = getLogger('SQLGramHasher')
class SQLGramHasher(object):
"""SQL文のマスク済みハッシュ値計算器
このクラスは、SQLクエリからリテラルの値をマスクして整形し、
整形済みのクエリのハッシュ値を計算する。
そのため、構文が同じでパラメタが異なるクエリは同じハッシュ値を
返す。
"""
DEFAULT_TERM_TTYPES = [Literal, Other]
DEFAULT_FILTERS = [StripWhitespaceFilter(), StripCommentsFilter()]
def __init__(self, term_ttypes=None, filters=None, hash_algorithm=md5):
"""コンストラクタ
:param list[Token] term_ttypes: マスク対象のトークン
:param list filters: sqlparse の提供するクエリフィルタの列
:param callable hash_algorithm: ハッシュアルゴリズム
"""
if term_ttypes is None:
term_ttypes = self.DEFAULT_TERM_TTYPES
if filters is None:
filters = self.DEFAULT_FILTERS
self.term_ttypes = term_ttypes
self.filters = filters
self.hash_algorithm = hash_algorithm
def hash(self, query):
"""ハッシュ値を計算する
:param str query: 計算対象のクエリ
:return: ハッシュ値 (hexstring)
:rtype: str
"""
try:
hasher = self.hash_algorithm()
parsed = parse(query)
for stmt in parsed:
for filter_ in self.filters:
stmt = filter_.process(stmt)
for token in stmt.flatten():
ttype, value = token.ttype, token.value
if any(ttype in tt for tt in self.term_ttypes):
token.value = '' # 'MASKED'
hasher.update(str(stmt).encode('utf-8'))
except SQLParseError as exc:
logger.debug('SQLParseError, fallback to raw sql hash: %s', exc)
hasher = self.hash_algorithm()
hasher.update(query.encode('utf8'))
return hasher.hexdigest()
if __name__ == '__main__':
Q = (u"select foo from (select * from hoge) as bar "
u"inner join baz on baz.qux=bar.qux where bar.qux='おっふ' "
u"and id like '%foo' \n\n\t \tlimit 10")
R = Q.replace('asdf', 'pqrs')
S = u"update foo set bar='asdf' and qux=12 where foo.qux='おっふ'"
h = SQLGramHasher()
print(h.hash(Q))
print(h.hash(R))
print(h.hash(S))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment