Last active
January 19, 2018 01:16
-
-
Save whosaysni/93d4a2771e45255be0d6cb8a8f913ee0 to your computer and use it in GitHub Desktop.
マスクしたSQLのハッシュ値を計算する
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # coding: utf-8 | |
| from hashlib import md5 | |
| from sqlparse import parse | |
| from sqlparse.exceptions import SQLParseError | |
| from sqlparse.filters import StripCommentsFilter, StripWhitespaceFilter | |
| from sqlparse.tokens import Other, Literal | |
| from logging import getLogger | |
| logger = getLogger('SQLGramHasher') | |
| class SQLGramHasher(object): | |
| """SQL文のマスク済みハッシュ値計算器 | |
| このクラスは、SQLクエリからリテラルの値をマスクして整形し、 | |
| 整形済みのクエリのハッシュ値を計算する。 | |
| そのため、構文が同じでパラメタが異なるクエリは同じハッシュ値を | |
| 返す。 | |
| """ | |
| DEFAULT_TERM_TTYPES = [Literal, Other] | |
| DEFAULT_FILTERS = [StripWhitespaceFilter(), StripCommentsFilter()] | |
| def __init__(self, term_ttypes=None, filters=None, hash_algorithm=md5): | |
| """コンストラクタ | |
| :param list[Token] term_ttypes: マスク対象のトークン | |
| :param list filters: sqlparse の提供するクエリフィルタの列 | |
| :param callable hash_algorithm: ハッシュアルゴリズム | |
| """ | |
| if term_ttypes is None: | |
| term_ttypes = self.DEFAULT_TERM_TTYPES | |
| if filters is None: | |
| filters = self.DEFAULT_FILTERS | |
| self.term_ttypes = term_ttypes | |
| self.filters = filters | |
| self.hash_algorithm = hash_algorithm | |
| def hash(self, query): | |
| """ハッシュ値を計算する | |
| :param str query: 計算対象のクエリ | |
| :return: ハッシュ値 (hexstring) | |
| :rtype: str | |
| """ | |
| try: | |
| hasher = self.hash_algorithm() | |
| parsed = parse(query) | |
| for stmt in parsed: | |
| for filter_ in self.filters: | |
| stmt = filter_.process(stmt) | |
| for token in stmt.flatten(): | |
| ttype, value = token.ttype, token.value | |
| if any(ttype in tt for tt in self.term_ttypes): | |
| token.value = '' # 'MASKED' | |
| hasher.update(str(stmt).encode('utf-8')) | |
| except SQLParseError as exc: | |
| logger.debug('SQLParseError, fallback to raw sql hash: %s', exc) | |
| hasher = self.hash_algorithm() | |
| hasher.update(query.encode('utf8')) | |
| return hasher.hexdigest() | |
| if __name__ == '__main__': | |
| Q = (u"select foo from (select * from hoge) as bar " | |
| u"inner join baz on baz.qux=bar.qux where bar.qux='おっふ' " | |
| u"and id like '%foo' \n\n\t \tlimit 10") | |
| R = Q.replace('asdf', 'pqrs') | |
| S = u"update foo set bar='asdf' and qux=12 where foo.qux='おっふ'" | |
| h = SQLGramHasher() | |
| print(h.hash(Q)) | |
| print(h.hash(R)) | |
| print(h.hash(S)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment