| #!/usr/bin/env python |
| ## |
| ## Copyright (c) 2021, Alliance for Open Media. All rights reserved |
| ## |
| ## This source code is subject to the terms of the BSD 3-Clause Clear License and the |
| ## Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License was |
| ## not distributed with this source code in the LICENSE file, you can obtain it |
| ## at aomedia.org/license/software-license/bsd-3-c-c/. If the Alliance for Open Media Patent |
| ## License 1.0 was not distributed with this source code in the PATENTS file, you |
| ## can obtain it at aomedia.org/license/patent-license/. |
| ## |
| """Classes for representing diff pieces.""" |
| |
| __author__ = "jkoleszar@google.com" |
| |
| import re |
| |
| |
| class DiffLines(object): |
| """A container for one half of a diff.""" |
| |
| def __init__(self, filename, offset, length): |
| self.filename = filename |
| self.offset = offset |
| self.length = length |
| self.lines = [] |
| self.delta_line_nums = [] |
| |
| def Append(self, line): |
| l = len(self.lines) |
| if line[0] != " ": |
| self.delta_line_nums.append(self.offset + l) |
| self.lines.append(line[1:]) |
| assert l+1 <= self.length |
| |
| def Complete(self): |
| return len(self.lines) == self.length |
| |
| def __contains__(self, item): |
| return item >= self.offset and item <= self.offset + self.length - 1 |
| |
| |
| class DiffHunk(object): |
| """A container for one diff hunk, consisting of two DiffLines.""" |
| |
| def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b): |
| self.header = header |
| self.left = DiffLines(file_a, start_a, len_a) |
| self.right = DiffLines(file_b, start_b, len_b) |
| self.lines = [] |
| |
| def Append(self, line): |
| """Adds a line to the DiffHunk and its DiffLines children.""" |
| if line[0] == "-": |
| self.left.Append(line) |
| elif line[0] == "+": |
| self.right.Append(line) |
| elif line[0] == " ": |
| self.left.Append(line) |
| self.right.Append(line) |
| elif line[0] == "\\": |
| # Ignore newline messages from git diff. |
| pass |
| else: |
| assert False, ("Unrecognized character at start of diff line " |
| "%r" % line[0]) |
| self.lines.append(line) |
| |
| def Complete(self): |
| return self.left.Complete() and self.right.Complete() |
| |
| def __repr__(self): |
| return "DiffHunk(%s, %s, len %d)" % ( |
| self.left.filename, self.right.filename, |
| max(self.left.length, self.right.length)) |
| |
| |
| def ParseDiffHunks(stream): |
| """Walk a file-like object, yielding DiffHunks as they're parsed.""" |
| |
| file_regex = re.compile(r"(\+\+\+|---) (\S+)") |
| range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?") |
| hunk = None |
| while True: |
| line = stream.readline() |
| if not line: |
| break |
| |
| if hunk is None: |
| # Parse file names |
| diff_file = file_regex.match(line) |
| if diff_file: |
| if line.startswith("---"): |
| a_line = line |
| a = diff_file.group(2) |
| continue |
| if line.startswith("+++"): |
| b_line = line |
| b = diff_file.group(2) |
| continue |
| |
| # Parse offset/lengths |
| diffrange = range_regex.match(line) |
| if diffrange: |
| if diffrange.group(2): |
| start_a = int(diffrange.group(1)) |
| len_a = int(diffrange.group(3)) |
| else: |
| start_a = 1 |
| len_a = int(diffrange.group(1)) |
| |
| if diffrange.group(5): |
| start_b = int(diffrange.group(4)) |
| len_b = int(diffrange.group(6)) |
| else: |
| start_b = 1 |
| len_b = int(diffrange.group(4)) |
| |
| header = [a_line, b_line, line] |
| hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b) |
| else: |
| # Add the current line to the hunk |
| hunk.Append(line) |
| |
| # See if the whole hunk has been parsed. If so, yield it and prepare |
| # for the next hunk. |
| if hunk.Complete(): |
| yield hunk |
| hunk = None |
| |
| # Partial hunks are a parse error |
| assert hunk is None |