⚠️Large suppression files
How to handle massive suppression files appropriately
import hashlib
import sys
def main():
# get the filenames from the command line arguments
file1 = sys.argv[1]
file2 = sys.argv[2]
output_file = "matching_lines.txt"
# read the md5sums from file2 and store them in a set
md5sums = set()
with open(file2, "r") as f:
for line in f:
md5sums.add(line.strip())
# loop over each line in file1 and compare its md5sum to the md5sums in the set
with open(file1, "r") as f, open(output_file, "w") as out:
for line in f:
md5 = hashlib.md5(line.encode()).hexdigest()
if md5 in md5sums:
out.write(line)
if __name__ == "__main__":
main()Last updated
Was this helpful?