Created
April 24, 2017 03:46
-
-
Save justinwhite/e86fd35ae6a212a3b2b8f4944c6dabea to your computer and use it in GitHub Desktop.
Calculate matrix difference between files. Used to see which student submissions are line-for-line copies of their peer's work.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import difflib | |
| import sys | |
| import csv | |
| output = 'diff_report.csv' | |
| files = sys.argv[1:] | |
| with open(output, 'wb') as csvfile: | |
| foo = csv.writer(csvfile) | |
| foo.writerow(['-'] + files) | |
| def go(f,j): | |
| if f == j: | |
| return '-' | |
| else: | |
| return round(difflib.SequenceMatcher(None, open(f).read(), open(j).read()).ratio(), 2) | |
| for f in files: | |
| foo.writerow([f] + map(lambda x: go(f,x), files)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment