John Koleszar | a7be7c8 | 2012-07-13 13:01:40 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
John Koleszar | a7be7c8 | 2012-07-13 13:01:40 -0700 | [diff] [blame] | 2 | ## |
Krishna Rapaka | 7319db5 | 2021-09-28 20:35:29 -0700 | [diff] [blame] | 3 | ## Copyright (c) 2021, Alliance for Open Media. All rights reserved |
Yaowu Xu | 9c01aa1 | 2016-09-01 14:32:49 -0700 | [diff] [blame] | 4 | ## |
Krishna Rapaka | 7319db5 | 2021-09-28 20:35:29 -0700 | [diff] [blame] | 5 | ## This source code is subject to the terms of the BSD 3-Clause Clear License and the |
| 6 | ## Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License was |
| 7 | ## not distributed with this source code in the LICENSE file, you can obtain it |
| 8 | ## at aomedia.org/license/software-license/bsd-3-c-c/. If the Alliance for Open Media Patent |
| 9 | ## License 1.0 was not distributed with this source code in the PATENTS file, you |
| 10 | ## can obtain it at aomedia.org/license/patent-license/. |
John Koleszar | a7be7c8 | 2012-07-13 13:01:40 -0700 | [diff] [blame] | 11 | ## |
| 12 | """Classes for representing diff pieces.""" |
| 13 | |
| 14 | __author__ = "jkoleszar@google.com" |
| 15 | |
| 16 | import re |
| 17 | |
| 18 | |
| 19 | class DiffLines(object): |
| 20 | """A container for one half of a diff.""" |
| 21 | |
| 22 | def __init__(self, filename, offset, length): |
| 23 | self.filename = filename |
| 24 | self.offset = offset |
| 25 | self.length = length |
| 26 | self.lines = [] |
| 27 | self.delta_line_nums = [] |
| 28 | |
| 29 | def Append(self, line): |
| 30 | l = len(self.lines) |
| 31 | if line[0] != " ": |
| 32 | self.delta_line_nums.append(self.offset + l) |
| 33 | self.lines.append(line[1:]) |
| 34 | assert l+1 <= self.length |
| 35 | |
| 36 | def Complete(self): |
| 37 | return len(self.lines) == self.length |
| 38 | |
| 39 | def __contains__(self, item): |
| 40 | return item >= self.offset and item <= self.offset + self.length - 1 |
| 41 | |
| 42 | |
| 43 | class DiffHunk(object): |
| 44 | """A container for one diff hunk, consisting of two DiffLines.""" |
| 45 | |
| 46 | def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b): |
| 47 | self.header = header |
| 48 | self.left = DiffLines(file_a, start_a, len_a) |
| 49 | self.right = DiffLines(file_b, start_b, len_b) |
| 50 | self.lines = [] |
| 51 | |
| 52 | def Append(self, line): |
| 53 | """Adds a line to the DiffHunk and its DiffLines children.""" |
| 54 | if line[0] == "-": |
| 55 | self.left.Append(line) |
| 56 | elif line[0] == "+": |
| 57 | self.right.Append(line) |
| 58 | elif line[0] == " ": |
| 59 | self.left.Append(line) |
| 60 | self.right.Append(line) |
Johann | f498d92 | 2014-02-03 12:15:17 -0800 | [diff] [blame] | 61 | elif line[0] == "\\": |
| 62 | # Ignore newline messages from git diff. |
| 63 | pass |
John Koleszar | a7be7c8 | 2012-07-13 13:01:40 -0700 | [diff] [blame] | 64 | else: |
| 65 | assert False, ("Unrecognized character at start of diff line " |
| 66 | "%r" % line[0]) |
| 67 | self.lines.append(line) |
| 68 | |
| 69 | def Complete(self): |
| 70 | return self.left.Complete() and self.right.Complete() |
| 71 | |
| 72 | def __repr__(self): |
| 73 | return "DiffHunk(%s, %s, len %d)" % ( |
| 74 | self.left.filename, self.right.filename, |
| 75 | max(self.left.length, self.right.length)) |
| 76 | |
| 77 | |
| 78 | def ParseDiffHunks(stream): |
| 79 | """Walk a file-like object, yielding DiffHunks as they're parsed.""" |
| 80 | |
| 81 | file_regex = re.compile(r"(\+\+\+|---) (\S+)") |
| 82 | range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?") |
| 83 | hunk = None |
| 84 | while True: |
| 85 | line = stream.readline() |
| 86 | if not line: |
| 87 | break |
| 88 | |
| 89 | if hunk is None: |
| 90 | # Parse file names |
| 91 | diff_file = file_regex.match(line) |
| 92 | if diff_file: |
| 93 | if line.startswith("---"): |
| 94 | a_line = line |
| 95 | a = diff_file.group(2) |
| 96 | continue |
| 97 | if line.startswith("+++"): |
| 98 | b_line = line |
| 99 | b = diff_file.group(2) |
| 100 | continue |
| 101 | |
| 102 | # Parse offset/lengths |
| 103 | diffrange = range_regex.match(line) |
| 104 | if diffrange: |
| 105 | if diffrange.group(2): |
| 106 | start_a = int(diffrange.group(1)) |
| 107 | len_a = int(diffrange.group(3)) |
| 108 | else: |
| 109 | start_a = 1 |
| 110 | len_a = int(diffrange.group(1)) |
| 111 | |
| 112 | if diffrange.group(5): |
| 113 | start_b = int(diffrange.group(4)) |
| 114 | len_b = int(diffrange.group(6)) |
| 115 | else: |
| 116 | start_b = 1 |
| 117 | len_b = int(diffrange.group(4)) |
| 118 | |
| 119 | header = [a_line, b_line, line] |
| 120 | hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b) |
| 121 | else: |
| 122 | # Add the current line to the hunk |
| 123 | hunk.Append(line) |
| 124 | |
| 125 | # See if the whole hunk has been parsed. If so, yield it and prepare |
| 126 | # for the next hunk. |
| 127 | if hunk.Complete(): |
| 128 | yield hunk |
| 129 | hunk = None |
| 130 | |
| 131 | # Partial hunks are a parse error |
| 132 | assert hunk is None |