Commit 19e6bb25 authored by Joel E. Denny's avatar Joel E. Denny
Browse files

[lit] Clean up internal diff's encoding handling

As suggested by rnk at D67643#1673043, instead of reading files
multiple times until an appropriate encoding is found, read them once
as binary, and then try to decode what was read.

For python >= 3.5, don't fail when attempting to decode the
`diff_bytes` output in order to print it.

Finally, add some tests for encoding handling.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D68664

llvm-svn: 374389
parent df35ec82
Loading
Loading
Loading
Loading
+20 −31
Original line number Diff line number Diff line
import difflib
import functools
import getopt
import locale
import os
import sys

@@ -24,37 +25,26 @@ def getDirTree(path, basedir=""):
        return path, sorted(child_trees)

def compareTwoFiles(flags, filepaths):
    compare_bytes = False
    encoding = None
    filelines = []
    for file in filepaths:
        with open(file, 'rb') as file_bin:
            filelines.append(file_bin.readlines())

    try:
            with open(file, 'r') as f:
                filelines.append(f.readlines())
        return compareTwoTextFiles(flags, filepaths, filelines,
                                   locale.getpreferredencoding(False))
    except UnicodeDecodeError:
        try:
                with io.open(file, 'r', encoding="utf-8") as f:
                    filelines.append(f.readlines())
                encoding = "utf-8"
            return compareTwoTextFiles(flags, filepaths, filelines, "utf-8")
        except:
                compare_bytes = True

    if compare_bytes:
        return compareTwoBinaryFiles(flags, filepaths)
    else:
        return compareTwoTextFiles(flags, filepaths, encoding)

def compareTwoBinaryFiles(flags, filepaths):
    filelines = []
    for file in filepaths:
        with open(file, 'rb') as f:
            filelines.append(f.readlines())
            return compareTwoBinaryFiles(flags, filepaths, filelines)

def compareTwoBinaryFiles(flags, filepaths, filelines):
    exitCode = 0
    if hasattr(difflib, 'diff_bytes'):
        # python 3.5 or newer
        diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], filelines[1], filepaths[0].encode(), filepaths[1].encode())
        diffs = [diff.decode() for diff in diffs]
        diffs = [diff.decode(errors="backslashreplace") for diff in diffs]
    else:
        # python 2.7
        if flags.unified_diff:
@@ -68,15 +58,14 @@ def compareTwoBinaryFiles(flags, filepaths):
        exitCode = 1
    return exitCode

def compareTwoTextFiles(flags, filepaths, encoding):
def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding):
    filelines = []
    for file in filepaths:
        if encoding is None:
            with open(file, 'r') as f:
                filelines.append(f.readlines())
        else:
            with io.open(file, 'r', encoding=encoding) as f:
                filelines.append(f.readlines())
    for lines_bin in filelines_bin:
        lines = []
        for line_bin in lines_bin:
            line = line_bin.decode(encoding=encoding)
            lines.append(line)
        filelines.append(lines)

    exitCode = 0
    def compose2(f, g):
+9 −0
Original line number Diff line number Diff line
# Check that diff falls back to binary mode if it cannot decode a file.

# RUN: diff -u diff-in.bin diff-in.bin
# RUN: diff -u diff-in.utf16 diff-in.bin && false || true
# RUN: diff -u diff-in.utf8 diff-in.bin && false || true
# RUN: diff -u diff-in.bin diff-in.utf8 && false || true

# Fail so lit will print output.
# RUN: false
+26 B

File added.

No diff preview for this file type.

+24 B

File added.

No diff preview for this file type.

+3 −0
Original line number Diff line number Diff line
foo
bar
baz
Loading