Googletest export
Fix Theta(N^2) memory usage of EXPECT_EQ(string) when the strings don't match. The underlying CalculateOptimalEdits() implementation used a simple dynamic-programming approach that always used N^2 memory and time. This meant that tests for equality of large strings were ticking time bombs: They'd work fine as long as the test passed, but as soon as the strings differed the test would OOM, which is very hard to debug. I switched it out for a Dijkstra search, which is still worst-case O(N^2), but in the usual case of mostly-matching strings, it is much closer to linear. PiperOrigin-RevId: 210405025
This commit is contained in:
parent
1bb76182ca
commit
167c5e8188
|
@ -159,15 +159,15 @@ GTEST_DISABLE_MSC_WARNINGS_POP_() // 4275
|
||||||
|
|
||||||
#endif // GTEST_HAS_EXCEPTIONS
|
#endif // GTEST_HAS_EXCEPTIONS
|
||||||
|
|
||||||
namespace edit_distance {
|
|
||||||
// Returns the optimal edits to go from 'left' to 'right'.
|
// Returns the optimal edits to go from 'left' to 'right'.
|
||||||
// All edits cost the same, with replace having lower priority than
|
// All edits cost the same, with replace having lower priority than
|
||||||
// add/remove.
|
// add/remove. Returns an approximation of the maximum memory used in
|
||||||
// Simple implementation of the Wagner-Fischer algorithm.
|
// 'memory_usage' if non-null.
|
||||||
// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm
|
// Uses a Dijkstra search, with a couple of simple bells and whistles added on.
|
||||||
enum EditType { kMatch, kAdd, kRemove, kReplace };
|
enum EditType { kEditMatch, kEditAdd, kEditRemove, kEditReplace };
|
||||||
GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
|
GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
|
||||||
const std::vector<size_t>& left, const std::vector<size_t>& right);
|
const std::vector<size_t>& left, const std::vector<size_t>& right,
|
||||||
|
size_t* memory_usage = NULL);
|
||||||
|
|
||||||
// Same as above, but the input is represented as strings.
|
// Same as above, but the input is represented as strings.
|
||||||
GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
|
GTEST_API_ std::vector<EditType> CalculateOptimalEdits(
|
||||||
|
@ -179,8 +179,6 @@ GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,
|
||||||
const std::vector<std::string>& right,
|
const std::vector<std::string>& right,
|
||||||
size_t context = 2);
|
size_t context = 2);
|
||||||
|
|
||||||
} // namespace edit_distance
|
|
||||||
|
|
||||||
// Calculate the diff between 'left' and 'right' and return it in unified diff
|
// Calculate the diff between 'left' and 'right' and return it in unified diff
|
||||||
// format.
|
// format.
|
||||||
// If not null, stores in 'total_line_count' the total number of lines found
|
// If not null, stores in 'total_line_count' the total number of lines found
|
||||||
|
|
|
@ -38,10 +38,11 @@
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
// The following lines pull in the real gtest *.cc files.
|
// The following lines pull in the real gtest *.cc files.
|
||||||
#include "src/gtest.cc"
|
|
||||||
#include "src/gtest-death-test.cc"
|
#include "src/gtest-death-test.cc"
|
||||||
|
#include "src/gtest-edit-distance.cc"
|
||||||
#include "src/gtest-filepath.cc"
|
#include "src/gtest-filepath.cc"
|
||||||
#include "src/gtest-port.cc"
|
#include "src/gtest-port.cc"
|
||||||
#include "src/gtest-printers.cc"
|
#include "src/gtest-printers.cc"
|
||||||
#include "src/gtest-test-part.cc"
|
#include "src/gtest-test-part.cc"
|
||||||
#include "src/gtest-typed-test.cc"
|
#include "src/gtest-typed-test.cc"
|
||||||
|
#include "src/gtest.cc"
|
||||||
|
|
507
googletest/src/gtest-edit-distance.cc
Normal file
507
googletest/src/gtest-edit-distance.cc
Normal file
|
@ -0,0 +1,507 @@
|
||||||
|
// Copyright 2018, Google Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without
|
||||||
|
// modification, are permitted provided that the following conditions are
|
||||||
|
// met:
|
||||||
|
//
|
||||||
|
// * Redistributions of source code must retain the above copyright
|
||||||
|
// notice, this list of conditions and the following disclaimer.
|
||||||
|
// * Redistributions in binary form must reproduce the above
|
||||||
|
// copyright notice, this list of conditions and the following disclaimer
|
||||||
|
// in the documentation and/or other materials provided with the
|
||||||
|
// distribution.
|
||||||
|
// * Neither the name of Google Inc. nor the names of its
|
||||||
|
// contributors may be used to endorse or promote products derived from
|
||||||
|
// this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
//
|
||||||
|
// Internal helper functions for finding optimal edit transformations
|
||||||
|
// between strings.
|
||||||
|
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
|
#include <functional>
|
||||||
|
#include <list>
|
||||||
|
#include <ostream> // NOLINT
|
||||||
|
#include <queue>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace testing {
|
||||||
|
namespace internal {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// The following implement data structures and code for a Dijkstra-search
|
||||||
|
// based implementation of optimal edit distance.
|
||||||
|
|
||||||
|
// Posible states a node can be in. Either a node is unsettled (it hasn't been
|
||||||
|
// drawn from the priority queue yet), or it is settled and a back-link to its
|
||||||
|
// parent node is fixed.
|
||||||
|
enum EditSearchState {
|
||||||
|
kUnsettled,
|
||||||
|
kMatchParent,
|
||||||
|
kAddParent,
|
||||||
|
kRemoveParent,
|
||||||
|
kReplaceParent
|
||||||
|
};
|
||||||
|
|
||||||
|
// Custom container for search states. This is smaller and faster than a hash
|
||||||
|
// map, because the used states are dense along diagonals.
|
||||||
|
// Specifically, each state requires only 1 byte, whereas a hash_map would
|
||||||
|
// require storing the key, which would come to at least 8 bytes. std::map has
|
||||||
|
// an extra 32 bytes per node (3 pointers + 1 byte, padded), so even though
|
||||||
|
// there are circumstances where this class can have kBlockSize overhead per
|
||||||
|
// state, on average it does better than 40 bytes of overhead per state.
|
||||||
|
// In addition, in unopt builds (the usual way tests are run) the fewer
|
||||||
|
// allocations + better locality has this method running 10-50x faster than
|
||||||
|
// std::map for inputs that are large enough to measure.
|
||||||
|
class EditSearchMap {
|
||||||
|
public:
|
||||||
|
EditSearchMap(size_t left_size, size_t right_size)
|
||||||
|
: left_size_(left_size), right_size_(right_size) {
|
||||||
|
GTEST_CHECK_(left_size_ == left_size && right_size_ == right_size)
|
||||||
|
<< "Overflow in size: Arguments too large";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Gets a mutable reference to a state - this is actually of type
|
||||||
|
// EditSearchState - inserting if it does not exist.
|
||||||
|
unsigned char& insert(UInt32 left, UInt32 right) {
|
||||||
|
std::vector<UInt32>* vec;
|
||||||
|
size_t index1;
|
||||||
|
size_t index2;
|
||||||
|
if (left > right) {
|
||||||
|
vec = &left_nodes_;
|
||||||
|
index1 = left - right - 1;
|
||||||
|
index2 = right;
|
||||||
|
} else {
|
||||||
|
vec = &right_nodes_;
|
||||||
|
index1 = right - left;
|
||||||
|
index2 = left;
|
||||||
|
}
|
||||||
|
if (vec->size() <= index1) {
|
||||||
|
GTEST_CHECK_(vec->size() == index1)
|
||||||
|
<< "Array diagonals should only grow by one " << vec->size() << " vs "
|
||||||
|
<< index1;
|
||||||
|
vec->push_back(block_indices_.size());
|
||||||
|
// Round up
|
||||||
|
block_indices_.resize(
|
||||||
|
block_indices_.size() +
|
||||||
|
(DiagonalLength(left, right) + kBlockSize - 1) / kBlockSize,
|
||||||
|
kUnallocatedBlock);
|
||||||
|
}
|
||||||
|
const size_t bucket = index2 / kBlockSize;
|
||||||
|
const size_t pos_in_bucket = index2 % kBlockSize;
|
||||||
|
UInt32& level2 = block_indices_[(*vec)[index1] + bucket];
|
||||||
|
if (level2 == kUnallocatedBlock) {
|
||||||
|
level2 = nodes_.size();
|
||||||
|
size_t diagonal_length = DiagonalLength(left, right);
|
||||||
|
GTEST_CHECK_(diagonal_length > index2)
|
||||||
|
<< diagonal_length << " " << index2;
|
||||||
|
size_t block_size = kBlockSize;
|
||||||
|
if (diagonal_length / kBlockSize == bucket) {
|
||||||
|
// We can never get here if diagonal_length is a multiple of
|
||||||
|
// kBlockSize, which is what we want, since this would evaluate to 0.
|
||||||
|
block_size = diagonal_length % kBlockSize;
|
||||||
|
}
|
||||||
|
nodes_.resize(nodes_.size() + block_size);
|
||||||
|
}
|
||||||
|
return nodes_[level2 + pos_in_bucket];
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t MemoryUsage() const {
|
||||||
|
return nodes_.capacity() +
|
||||||
|
sizeof(UInt32) * (left_nodes_.capacity() + right_nodes_.capacity() +
|
||||||
|
block_indices_.capacity());
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum { kBlockSize = 1024, kUnallocatedBlock = 0xFFFFFFFFul };
|
||||||
|
|
||||||
|
size_t DiagonalLength(UInt32 left, UInt32 right) const {
|
||||||
|
return std::min(left_size_ - left, right_size_ - right) +
|
||||||
|
(left < right ? left : right);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The state space is conceptually a left_size_ by right_size_ sparse matrix
|
||||||
|
// of EditSearchStates. However, due to the access pattern of the search, it
|
||||||
|
// is much better to store the nodes per diagonal rather than per row.
|
||||||
|
UInt32 left_size_;
|
||||||
|
UInt32 right_size_;
|
||||||
|
// The nodes are stored by diagonals, split in two: Those to the left of the
|
||||||
|
// main diagonal are in left_nodes_, and everything else is in right_nodes_.
|
||||||
|
// The values are indices into block_indices_.
|
||||||
|
std::vector<UInt32> left_nodes_;
|
||||||
|
std::vector<UInt32> right_nodes_;
|
||||||
|
// Every entry here is an offset into the beginning of a kBlockSize-sized
|
||||||
|
// block in nodes_. An entire diagonal is allocated together here; for a
|
||||||
|
// diagonal of length <= kBlockSize, that's just a single entry, but for
|
||||||
|
// longer diagonals multiple contiguous index entries will be reserved at
|
||||||
|
// once. Unused entries will be assigned kUnallocatedBlock; this
|
||||||
|
// double-indirect scheme is used to save memory in the cases when an entire
|
||||||
|
// diagonal isn't needed.
|
||||||
|
std::vector<UInt32> block_indices_;
|
||||||
|
// This stores the actual EditSearchState data, pointed to by block_indices_.
|
||||||
|
std::vector<unsigned char> nodes_;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct EditHeapEntry {
|
||||||
|
EditHeapEntry(UInt32 l, UInt32 r, UInt64 c, EditSearchState s)
|
||||||
|
: left(l), right(r), cost(c), state(s) {}
|
||||||
|
|
||||||
|
UInt32 left;
|
||||||
|
UInt32 right;
|
||||||
|
UInt64 cost : 61;
|
||||||
|
// The state that the node will get when this entry is settled. Therefore,
|
||||||
|
// this can never be kUnsettled.
|
||||||
|
UInt64 state : 3;
|
||||||
|
|
||||||
|
bool operator>(const EditHeapEntry& other) const { return cost > other.cost; }
|
||||||
|
};
|
||||||
|
|
||||||
|
// Need a min-queue, so invert the comparator.
|
||||||
|
typedef std::priority_queue<EditHeapEntry, std::vector<EditHeapEntry>,
|
||||||
|
std::greater<EditHeapEntry>>
|
||||||
|
EditHeap;
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
|
||||||
|
const std::vector<size_t>& right,
|
||||||
|
size_t* memory_usage) {
|
||||||
|
const UInt64 kBaseCost = 100000;
|
||||||
|
// We make replace a little more expensive than add/remove to lower
|
||||||
|
// their priority.
|
||||||
|
const UInt64 kReplaceCost = 100001;
|
||||||
|
// In the common case where the vectors are the same (or almost the same)
|
||||||
|
// size, we know that an add will have to be followed by some later remove
|
||||||
|
// (or vice versa) in order to get the lengths to balance. We "borrow" some
|
||||||
|
// of the cost of the later operation and bring it forward into the earlier
|
||||||
|
// operation, to increase the cost of exploring (usually fruitlessly) around
|
||||||
|
// the beginning of the graph.
|
||||||
|
// However, there is a trade-off: This cheapens the cost of exploring around
|
||||||
|
// the beginning of the graph (in one direction) when the vectors are
|
||||||
|
// unequal in length. So we don't steal *all* the cost.
|
||||||
|
// You can view this as a form of A*, using an admissable heuristic that has
|
||||||
|
// been re-cast as a cost function that can be used in Dijkstra.
|
||||||
|
const UInt64 kTowardsGoalCost = 50003;
|
||||||
|
const UInt64 kAwayFromGoalCost = 2 * kBaseCost - kTowardsGoalCost;
|
||||||
|
|
||||||
|
EditSearchMap node_map(left.size() + 1, right.size() + 1);
|
||||||
|
EditHeap heap;
|
||||||
|
heap.push(EditHeapEntry(0, 0, 0, kReplaceParent));
|
||||||
|
|
||||||
|
while (!heap.empty()) {
|
||||||
|
const EditHeapEntry current_entry = heap.top();
|
||||||
|
heap.pop();
|
||||||
|
|
||||||
|
UInt32 left_pos = current_entry.left;
|
||||||
|
UInt32 right_pos = current_entry.right;
|
||||||
|
unsigned char& current_state = node_map.insert(left_pos, right_pos);
|
||||||
|
if (current_state != kUnsettled) {
|
||||||
|
// Node was already settled by a previous entry in the priority queue,
|
||||||
|
// this is a suboptimal path that should be ignored.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
current_state = current_entry.state;
|
||||||
|
|
||||||
|
if (left_pos == left.size() && right_pos == right.size()) {
|
||||||
|
// This is the normal exit point; if we terminate due to the heap being
|
||||||
|
// empty, we'll fail a check later.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special case: Since the cost of a match is zero, we can immediately
|
||||||
|
// settle the new node without putting it in the queue, since nothing can
|
||||||
|
// have a smaller cost than it. Furthermore, we don't need to relax the
|
||||||
|
// other two edges, since we know we don't need them: Any path from this
|
||||||
|
// node that would use them has an path via the match that is at least as
|
||||||
|
// cheap. Together, this means we can loop here until we stop matching.
|
||||||
|
while (left_pos < left.size() && right_pos < right.size() &&
|
||||||
|
left[left_pos] == right[right_pos]) {
|
||||||
|
left_pos++;
|
||||||
|
right_pos++;
|
||||||
|
unsigned char& fast_forward_state = node_map.insert(left_pos, right_pos);
|
||||||
|
if (fast_forward_state != kUnsettled) {
|
||||||
|
// The search reached around and settled this node before settling the
|
||||||
|
// base node. This means we're completely done with this iteration;
|
||||||
|
// abort to the outer loop.
|
||||||
|
goto outer_loop_bottom;
|
||||||
|
// Otherwise, when can settle this node, even if it was created from
|
||||||
|
// another state - we know the cost of settling it now is optimal.
|
||||||
|
}
|
||||||
|
fast_forward_state = kMatchParent;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Relax adjacent nodes. We have no way to find or lower the cost of
|
||||||
|
// existing entries in the heap, so we just push new entries and throw
|
||||||
|
// them out at the top if the node is already settled. We *could* check to
|
||||||
|
// see if they're already settled before pushing, but it turns out to be
|
||||||
|
// ~not any faster, and more complicated to do so.
|
||||||
|
//
|
||||||
|
// If we're at an edge, there's only one node to relax.
|
||||||
|
if (left_pos >= left.size()) {
|
||||||
|
if (right_pos >= right.size()) {
|
||||||
|
break; // Can happen due to the fast-path loop above.
|
||||||
|
}
|
||||||
|
heap.push(EditHeapEntry(left_pos, right_pos + 1,
|
||||||
|
current_entry.cost + kTowardsGoalCost,
|
||||||
|
kAddParent));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (right_pos >= right.size()) {
|
||||||
|
heap.push(EditHeapEntry(left_pos + 1, right_pos,
|
||||||
|
current_entry.cost + kTowardsGoalCost,
|
||||||
|
kRemoveParent));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// General case: Relax 3 edges.
|
||||||
|
heap.push(EditHeapEntry(
|
||||||
|
left_pos, right_pos + 1,
|
||||||
|
current_entry.cost + (right.size() + left_pos > right_pos + left.size()
|
||||||
|
? kTowardsGoalCost
|
||||||
|
: kAwayFromGoalCost),
|
||||||
|
kAddParent));
|
||||||
|
heap.push(EditHeapEntry(
|
||||||
|
left_pos + 1, right_pos,
|
||||||
|
current_entry.cost + (right.size() + left_pos < right_pos + left.size()
|
||||||
|
? kTowardsGoalCost
|
||||||
|
: kAwayFromGoalCost),
|
||||||
|
kRemoveParent));
|
||||||
|
heap.push(EditHeapEntry(left_pos + 1, right_pos + 1,
|
||||||
|
current_entry.cost + kReplaceCost, kReplaceParent));
|
||||||
|
outer_loop_bottom : {} // Need the curlies to form a statement.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reconstruct the best path. We do it in reverse order.
|
||||||
|
std::vector<EditType> best_path;
|
||||||
|
UInt32 left_pos = left.size();
|
||||||
|
UInt32 right_pos = right.size();
|
||||||
|
while (left_pos != 0 || right_pos != 0) {
|
||||||
|
GTEST_CHECK_(left_pos <= left.size() && right_pos <= right.size());
|
||||||
|
// The node must already exist, but if it somehow doesn't, it will be
|
||||||
|
// added as kUnsettled, which will crash below.
|
||||||
|
const unsigned char state = node_map.insert(left_pos, right_pos);
|
||||||
|
switch (state) {
|
||||||
|
case kAddParent:
|
||||||
|
right_pos--;
|
||||||
|
break;
|
||||||
|
case kRemoveParent:
|
||||||
|
left_pos--;
|
||||||
|
break;
|
||||||
|
case kMatchParent:
|
||||||
|
case kReplaceParent:
|
||||||
|
left_pos--;
|
||||||
|
right_pos--;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
GTEST_LOG_(FATAL) << "Unsettled node at " << left_pos << ","
|
||||||
|
<< right_pos;
|
||||||
|
}
|
||||||
|
best_path.push_back(static_cast<EditType>(state - 1));
|
||||||
|
}
|
||||||
|
std::reverse(best_path.begin(), best_path.end());
|
||||||
|
if (memory_usage != NULL) {
|
||||||
|
*memory_usage = node_map.MemoryUsage();
|
||||||
|
}
|
||||||
|
return best_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// Helper class to convert string into ids with deduplication.
|
||||||
|
class InternalStrings {
|
||||||
|
public:
|
||||||
|
size_t GetId(const std::string* str) {
|
||||||
|
IdMap::iterator it = ids_.find(str);
|
||||||
|
if (it != ids_.end()) return it->second;
|
||||||
|
size_t id = ids_.size();
|
||||||
|
return ids_[str] = id;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct IdMapCmp {
|
||||||
|
bool operator()(const std::string* first, const std::string* second) const {
|
||||||
|
return *first < *second;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef std::map<const std::string*, size_t, IdMapCmp> IdMap;
|
||||||
|
IdMap ids_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
std::vector<EditType> CalculateOptimalEdits(
|
||||||
|
const std::vector<std::string>& left,
|
||||||
|
const std::vector<std::string>& right) {
|
||||||
|
std::vector<size_t> left_ids, right_ids;
|
||||||
|
{
|
||||||
|
InternalStrings intern_table;
|
||||||
|
for (size_t i = 0; i < left.size(); ++i) {
|
||||||
|
left_ids.push_back(intern_table.GetId(&left[i]));
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < right.size(); ++i) {
|
||||||
|
right_ids.push_back(intern_table.GetId(&right[i]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return CalculateOptimalEdits(left_ids, right_ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
// Helper class that holds the state for one hunk and prints it out to the
|
||||||
|
// stream.
|
||||||
|
// It reorders adds/removes when possible to group all removes before all
|
||||||
|
// adds. It also adds the hunk header before printing into the stream.
|
||||||
|
class Hunk {
|
||||||
|
public:
|
||||||
|
Hunk(size_t left_start, size_t right_start)
|
||||||
|
: left_start_(left_start),
|
||||||
|
right_start_(right_start),
|
||||||
|
adds_(),
|
||||||
|
removes_(),
|
||||||
|
common_() {}
|
||||||
|
|
||||||
|
void PushLine(char edit, const char* line) {
|
||||||
|
switch (edit) {
|
||||||
|
case ' ':
|
||||||
|
++common_;
|
||||||
|
FlushEdits();
|
||||||
|
hunk_.push_back(std::make_pair(' ', line));
|
||||||
|
break;
|
||||||
|
case '-':
|
||||||
|
++removes_;
|
||||||
|
hunk_removes_.push_back(std::make_pair('-', line));
|
||||||
|
break;
|
||||||
|
case '+':
|
||||||
|
++adds_;
|
||||||
|
hunk_adds_.push_back(std::make_pair('+', line));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PrintTo(std::ostream* os) {
|
||||||
|
PrintHeader(os);
|
||||||
|
FlushEdits();
|
||||||
|
for (std::list<std::pair<char, const char*> >::const_iterator it =
|
||||||
|
hunk_.begin();
|
||||||
|
it != hunk_.end(); ++it) {
|
||||||
|
*os << it->first << it->second << "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_edits() const { return adds_ || removes_; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
void FlushEdits() {
|
||||||
|
hunk_.splice(hunk_.end(), hunk_removes_);
|
||||||
|
hunk_.splice(hunk_.end(), hunk_adds_);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print a unified diff header for one hunk.
|
||||||
|
// The format is
|
||||||
|
// "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
|
||||||
|
// where the left/right parts are omitted if unnecessary.
|
||||||
|
void PrintHeader(std::ostream* ss) const {
|
||||||
|
*ss << "@@ ";
|
||||||
|
if (removes_) {
|
||||||
|
*ss << "-" << left_start_ << "," << (removes_ + common_);
|
||||||
|
}
|
||||||
|
if (removes_ && adds_) {
|
||||||
|
*ss << " ";
|
||||||
|
}
|
||||||
|
if (adds_) {
|
||||||
|
*ss << "+" << right_start_ << "," << (adds_ + common_);
|
||||||
|
}
|
||||||
|
*ss << " @@\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t left_start_, right_start_;
|
||||||
|
size_t adds_, removes_, common_;
|
||||||
|
std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
// Create a list of diff hunks in Unified diff format.
|
||||||
|
// Each hunk has a header generated by PrintHeader above plus a body with
|
||||||
|
// lines prefixed with ' ' for no change, '-' for deletion and '+' for
|
||||||
|
// addition.
|
||||||
|
// 'context' represents the desired unchanged prefix/suffix around the diff.
|
||||||
|
// If two hunks are close enough that their contexts overlap, then they are
|
||||||
|
// joined into one hunk.
|
||||||
|
std::string CreateUnifiedDiff(const std::vector<std::string>& left,
|
||||||
|
const std::vector<std::string>& right,
|
||||||
|
size_t context) {
|
||||||
|
const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
|
||||||
|
|
||||||
|
size_t l_i = 0, r_i = 0, edit_i = 0;
|
||||||
|
std::stringstream ss;
|
||||||
|
while (edit_i < edits.size()) {
|
||||||
|
// Find first edit.
|
||||||
|
while (edit_i < edits.size() && edits[edit_i] == kEditMatch) {
|
||||||
|
++l_i;
|
||||||
|
++r_i;
|
||||||
|
++edit_i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the first line to include in the hunk.
|
||||||
|
const size_t prefix_context = std::min(l_i, context);
|
||||||
|
Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
|
||||||
|
for (size_t i = prefix_context; i > 0; --i) {
|
||||||
|
hunk.PushLine(' ', left[l_i - i].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate the edits until we found enough suffix for the hunk or the input
|
||||||
|
// is over.
|
||||||
|
size_t n_suffix = 0;
|
||||||
|
for (; edit_i < edits.size(); ++edit_i) {
|
||||||
|
if (n_suffix >= context) {
|
||||||
|
// Continue only if the next hunk is very close.
|
||||||
|
std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
|
||||||
|
while (it != edits.end() && *it == kEditMatch) ++it;
|
||||||
|
if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
|
||||||
|
// There is no next edit or it is too far away.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
EditType edit = edits[edit_i];
|
||||||
|
// Reset count when a non match is found.
|
||||||
|
n_suffix = edit == kEditMatch ? n_suffix + 1 : 0;
|
||||||
|
|
||||||
|
if (edit == kEditMatch || edit == kEditRemove || edit == kEditReplace) {
|
||||||
|
hunk.PushLine(edit == kEditMatch ? ' ' : '-', left[l_i].c_str());
|
||||||
|
}
|
||||||
|
if (edit == kEditAdd || edit == kEditReplace) {
|
||||||
|
hunk.PushLine('+', right[r_i].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance indices, depending on edit type.
|
||||||
|
l_i += edit != kEditAdd;
|
||||||
|
r_i += edit != kEditRemove;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hunk.has_edits()) {
|
||||||
|
// We are done. We don't want this hunk.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
hunk.PrintTo(&ss);
|
||||||
|
}
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
} // namespace testing
|
|
@ -46,7 +46,6 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <list>
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <ostream> // NOLINT
|
#include <ostream> // NOLINT
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
@ -1068,246 +1067,6 @@ AssertionResult AssertionFailure(const Message& message) {
|
||||||
|
|
||||||
namespace internal {
|
namespace internal {
|
||||||
|
|
||||||
namespace edit_distance {
|
|
||||||
std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left,
|
|
||||||
const std::vector<size_t>& right) {
|
|
||||||
std::vector<std::vector<double> > costs(
|
|
||||||
left.size() + 1, std::vector<double>(right.size() + 1));
|
|
||||||
std::vector<std::vector<EditType> > best_move(
|
|
||||||
left.size() + 1, std::vector<EditType>(right.size() + 1));
|
|
||||||
|
|
||||||
// Populate for empty right.
|
|
||||||
for (size_t l_i = 0; l_i < costs.size(); ++l_i) {
|
|
||||||
costs[l_i][0] = static_cast<double>(l_i);
|
|
||||||
best_move[l_i][0] = kRemove;
|
|
||||||
}
|
|
||||||
// Populate for empty left.
|
|
||||||
for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) {
|
|
||||||
costs[0][r_i] = static_cast<double>(r_i);
|
|
||||||
best_move[0][r_i] = kAdd;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t l_i = 0; l_i < left.size(); ++l_i) {
|
|
||||||
for (size_t r_i = 0; r_i < right.size(); ++r_i) {
|
|
||||||
if (left[l_i] == right[r_i]) {
|
|
||||||
// Found a match. Consume it.
|
|
||||||
costs[l_i + 1][r_i + 1] = costs[l_i][r_i];
|
|
||||||
best_move[l_i + 1][r_i + 1] = kMatch;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const double add = costs[l_i + 1][r_i];
|
|
||||||
const double remove = costs[l_i][r_i + 1];
|
|
||||||
const double replace = costs[l_i][r_i];
|
|
||||||
if (add < remove && add < replace) {
|
|
||||||
costs[l_i + 1][r_i + 1] = add + 1;
|
|
||||||
best_move[l_i + 1][r_i + 1] = kAdd;
|
|
||||||
} else if (remove < add && remove < replace) {
|
|
||||||
costs[l_i + 1][r_i + 1] = remove + 1;
|
|
||||||
best_move[l_i + 1][r_i + 1] = kRemove;
|
|
||||||
} else {
|
|
||||||
// We make replace a little more expensive than add/remove to lower
|
|
||||||
// their priority.
|
|
||||||
costs[l_i + 1][r_i + 1] = replace + 1.00001;
|
|
||||||
best_move[l_i + 1][r_i + 1] = kReplace;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reconstruct the best path. We do it in reverse order.
|
|
||||||
std::vector<EditType> best_path;
|
|
||||||
for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) {
|
|
||||||
EditType move = best_move[l_i][r_i];
|
|
||||||
best_path.push_back(move);
|
|
||||||
l_i -= move != kAdd;
|
|
||||||
r_i -= move != kRemove;
|
|
||||||
}
|
|
||||||
std::reverse(best_path.begin(), best_path.end());
|
|
||||||
return best_path;
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Helper class to convert string into ids with deduplication.
|
|
||||||
class InternalStrings {
|
|
||||||
public:
|
|
||||||
size_t GetId(const std::string& str) {
|
|
||||||
IdMap::iterator it = ids_.find(str);
|
|
||||||
if (it != ids_.end()) return it->second;
|
|
||||||
size_t id = ids_.size();
|
|
||||||
return ids_[str] = id;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
typedef std::map<std::string, size_t> IdMap;
|
|
||||||
IdMap ids_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
std::vector<EditType> CalculateOptimalEdits(
|
|
||||||
const std::vector<std::string>& left,
|
|
||||||
const std::vector<std::string>& right) {
|
|
||||||
std::vector<size_t> left_ids, right_ids;
|
|
||||||
{
|
|
||||||
InternalStrings intern_table;
|
|
||||||
for (size_t i = 0; i < left.size(); ++i) {
|
|
||||||
left_ids.push_back(intern_table.GetId(left[i]));
|
|
||||||
}
|
|
||||||
for (size_t i = 0; i < right.size(); ++i) {
|
|
||||||
right_ids.push_back(intern_table.GetId(right[i]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return CalculateOptimalEdits(left_ids, right_ids);
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
|
|
||||||
// Helper class that holds the state for one hunk and prints it out to the
|
|
||||||
// stream.
|
|
||||||
// It reorders adds/removes when possible to group all removes before all
|
|
||||||
// adds. It also adds the hunk header before printint into the stream.
|
|
||||||
class Hunk {
|
|
||||||
public:
|
|
||||||
Hunk(size_t left_start, size_t right_start)
|
|
||||||
: left_start_(left_start),
|
|
||||||
right_start_(right_start),
|
|
||||||
adds_(),
|
|
||||||
removes_(),
|
|
||||||
common_() {}
|
|
||||||
|
|
||||||
void PushLine(char edit, const char* line) {
|
|
||||||
switch (edit) {
|
|
||||||
case ' ':
|
|
||||||
++common_;
|
|
||||||
FlushEdits();
|
|
||||||
hunk_.push_back(std::make_pair(' ', line));
|
|
||||||
break;
|
|
||||||
case '-':
|
|
||||||
++removes_;
|
|
||||||
hunk_removes_.push_back(std::make_pair('-', line));
|
|
||||||
break;
|
|
||||||
case '+':
|
|
||||||
++adds_;
|
|
||||||
hunk_adds_.push_back(std::make_pair('+', line));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void PrintTo(std::ostream* os) {
|
|
||||||
PrintHeader(os);
|
|
||||||
FlushEdits();
|
|
||||||
for (std::list<std::pair<char, const char*> >::const_iterator it =
|
|
||||||
hunk_.begin();
|
|
||||||
it != hunk_.end(); ++it) {
|
|
||||||
*os << it->first << it->second << "\n";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool has_edits() const { return adds_ || removes_; }
|
|
||||||
|
|
||||||
private:
|
|
||||||
void FlushEdits() {
|
|
||||||
hunk_.splice(hunk_.end(), hunk_removes_);
|
|
||||||
hunk_.splice(hunk_.end(), hunk_adds_);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Print a unified diff header for one hunk.
|
|
||||||
// The format is
|
|
||||||
// "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@"
|
|
||||||
// where the left/right parts are omitted if unnecessary.
|
|
||||||
void PrintHeader(std::ostream* ss) const {
|
|
||||||
*ss << "@@ ";
|
|
||||||
if (removes_) {
|
|
||||||
*ss << "-" << left_start_ << "," << (removes_ + common_);
|
|
||||||
}
|
|
||||||
if (removes_ && adds_) {
|
|
||||||
*ss << " ";
|
|
||||||
}
|
|
||||||
if (adds_) {
|
|
||||||
*ss << "+" << right_start_ << "," << (adds_ + common_);
|
|
||||||
}
|
|
||||||
*ss << " @@\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t left_start_, right_start_;
|
|
||||||
size_t adds_, removes_, common_;
|
|
||||||
std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_;
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
// Create a list of diff hunks in Unified diff format.
|
|
||||||
// Each hunk has a header generated by PrintHeader above plus a body with
|
|
||||||
// lines prefixed with ' ' for no change, '-' for deletion and '+' for
|
|
||||||
// addition.
|
|
||||||
// 'context' represents the desired unchanged prefix/suffix around the diff.
|
|
||||||
// If two hunks are close enough that their contexts overlap, then they are
|
|
||||||
// joined into one hunk.
|
|
||||||
std::string CreateUnifiedDiff(const std::vector<std::string>& left,
|
|
||||||
const std::vector<std::string>& right,
|
|
||||||
size_t context) {
|
|
||||||
const std::vector<EditType> edits = CalculateOptimalEdits(left, right);
|
|
||||||
|
|
||||||
size_t l_i = 0, r_i = 0, edit_i = 0;
|
|
||||||
std::stringstream ss;
|
|
||||||
while (edit_i < edits.size()) {
|
|
||||||
// Find first edit.
|
|
||||||
while (edit_i < edits.size() && edits[edit_i] == kMatch) {
|
|
||||||
++l_i;
|
|
||||||
++r_i;
|
|
||||||
++edit_i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find the first line to include in the hunk.
|
|
||||||
const size_t prefix_context = std::min(l_i, context);
|
|
||||||
Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1);
|
|
||||||
for (size_t i = prefix_context; i > 0; --i) {
|
|
||||||
hunk.PushLine(' ', left[l_i - i].c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iterate the edits until we found enough suffix for the hunk or the input
|
|
||||||
// is over.
|
|
||||||
size_t n_suffix = 0;
|
|
||||||
for (; edit_i < edits.size(); ++edit_i) {
|
|
||||||
if (n_suffix >= context) {
|
|
||||||
// Continue only if the next hunk is very close.
|
|
||||||
std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
|
|
||||||
while (it != edits.end() && *it == kMatch) ++it;
|
|
||||||
if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
|
|
||||||
// There is no next edit or it is too far away.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
EditType edit = edits[edit_i];
|
|
||||||
// Reset count when a non match is found.
|
|
||||||
n_suffix = edit == kMatch ? n_suffix + 1 : 0;
|
|
||||||
|
|
||||||
if (edit == kMatch || edit == kRemove || edit == kReplace) {
|
|
||||||
hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str());
|
|
||||||
}
|
|
||||||
if (edit == kAdd || edit == kReplace) {
|
|
||||||
hunk.PushLine('+', right[r_i].c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Advance indices, depending on edit type.
|
|
||||||
l_i += edit != kAdd;
|
|
||||||
r_i += edit != kRemove;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!hunk.has_edits()) {
|
|
||||||
// We are done. We don't want this hunk.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
hunk.PrintTo(&ss);
|
|
||||||
}
|
|
||||||
return ss.str();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace edit_distance
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
// The string representation of the values received in EqFailure() are already
|
// The string representation of the values received in EqFailure() are already
|
||||||
|
@ -1379,8 +1138,7 @@ AssertionResult EqFailure(const char* lhs_expression,
|
||||||
const std::vector<std::string> rhs_lines =
|
const std::vector<std::string> rhs_lines =
|
||||||
SplitEscapedString(rhs_value);
|
SplitEscapedString(rhs_value);
|
||||||
if (lhs_lines.size() > 1 || rhs_lines.size() > 1) {
|
if (lhs_lines.size() > 1 || rhs_lines.size() > 1) {
|
||||||
msg << "\nWith diff:\n"
|
msg << "\nWith diff:\n" << CreateUnifiedDiff(lhs_lines, rhs_lines);
|
||||||
<< edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -215,6 +215,7 @@ using testing::GTEST_FLAG(stream_result_to);
|
||||||
using testing::GTEST_FLAG(throw_on_failure);
|
using testing::GTEST_FLAG(throw_on_failure);
|
||||||
using testing::IsNotSubstring;
|
using testing::IsNotSubstring;
|
||||||
using testing::IsSubstring;
|
using testing::IsSubstring;
|
||||||
|
using testing::kMaxStackTraceDepth;
|
||||||
using testing::Message;
|
using testing::Message;
|
||||||
using testing::ScopedFakeTestPartResultReporter;
|
using testing::ScopedFakeTestPartResultReporter;
|
||||||
using testing::StaticAssertTypeEq;
|
using testing::StaticAssertTypeEq;
|
||||||
|
@ -234,16 +235,18 @@ using testing::internal::AlwaysTrue;
|
||||||
using testing::internal::AppendUserMessage;
|
using testing::internal::AppendUserMessage;
|
||||||
using testing::internal::ArrayAwareFind;
|
using testing::internal::ArrayAwareFind;
|
||||||
using testing::internal::ArrayEq;
|
using testing::internal::ArrayEq;
|
||||||
|
using testing::internal::CalculateOptimalEdits;
|
||||||
using testing::internal::CodePointToUtf8;
|
using testing::internal::CodePointToUtf8;
|
||||||
using testing::internal::CompileAssertTypesEqual;
|
using testing::internal::CompileAssertTypesEqual;
|
||||||
using testing::internal::CopyArray;
|
using testing::internal::CopyArray;
|
||||||
using testing::internal::CountIf;
|
using testing::internal::CountIf;
|
||||||
|
using testing::internal::CreateUnifiedDiff;
|
||||||
|
using testing::internal::EditType;
|
||||||
using testing::internal::EqFailure;
|
using testing::internal::EqFailure;
|
||||||
using testing::internal::FloatingPoint;
|
using testing::internal::FloatingPoint;
|
||||||
using testing::internal::ForEach;
|
using testing::internal::ForEach;
|
||||||
using testing::internal::FormatEpochTimeInMillisAsIso8601;
|
using testing::internal::FormatEpochTimeInMillisAsIso8601;
|
||||||
using testing::internal::FormatTimeInMillisAsSeconds;
|
using testing::internal::FormatTimeInMillisAsSeconds;
|
||||||
using testing::internal::GTestFlagSaver;
|
|
||||||
using testing::internal::GetCurrentOsStackTraceExceptTop;
|
using testing::internal::GetCurrentOsStackTraceExceptTop;
|
||||||
using testing::internal::GetElementOr;
|
using testing::internal::GetElementOr;
|
||||||
using testing::internal::GetNextRandomSeed;
|
using testing::internal::GetNextRandomSeed;
|
||||||
|
@ -252,6 +255,7 @@ using testing::internal::GetTestTypeId;
|
||||||
using testing::internal::GetTimeInMillis;
|
using testing::internal::GetTimeInMillis;
|
||||||
using testing::internal::GetTypeId;
|
using testing::internal::GetTypeId;
|
||||||
using testing::internal::GetUnitTestImpl;
|
using testing::internal::GetUnitTestImpl;
|
||||||
|
using testing::internal::GTestFlagSaver;
|
||||||
using testing::internal::ImplicitlyConvertible;
|
using testing::internal::ImplicitlyConvertible;
|
||||||
using testing::internal::Int32;
|
using testing::internal::Int32;
|
||||||
using testing::internal::Int32FromEnvOrDie;
|
using testing::internal::Int32FromEnvOrDie;
|
||||||
|
@ -259,6 +263,8 @@ using testing::internal::IsAProtocolMessage;
|
||||||
using testing::internal::IsContainer;
|
using testing::internal::IsContainer;
|
||||||
using testing::internal::IsContainerTest;
|
using testing::internal::IsContainerTest;
|
||||||
using testing::internal::IsNotContainer;
|
using testing::internal::IsNotContainer;
|
||||||
|
using testing::internal::kMaxRandomSeed;
|
||||||
|
using testing::internal::kTestTypeIdInGoogleTest;
|
||||||
using testing::internal::NativeArray;
|
using testing::internal::NativeArray;
|
||||||
using testing::internal::OsStackTraceGetter;
|
using testing::internal::OsStackTraceGetter;
|
||||||
using testing::internal::OsStackTraceGetterInterface;
|
using testing::internal::OsStackTraceGetterInterface;
|
||||||
|
@ -280,12 +286,6 @@ using testing::internal::TestResultAccessor;
|
||||||
using testing::internal::UInt32;
|
using testing::internal::UInt32;
|
||||||
using testing::internal::UnitTestImpl;
|
using testing::internal::UnitTestImpl;
|
||||||
using testing::internal::WideStringToUtf8;
|
using testing::internal::WideStringToUtf8;
|
||||||
using testing::internal::edit_distance::CalculateOptimalEdits;
|
|
||||||
using testing::internal::edit_distance::CreateUnifiedDiff;
|
|
||||||
using testing::internal::edit_distance::EditType;
|
|
||||||
using testing::internal::kMaxRandomSeed;
|
|
||||||
using testing::internal::kTestTypeIdInGoogleTest;
|
|
||||||
using testing::kMaxStackTraceDepth;
|
|
||||||
|
|
||||||
#if GTEST_HAS_STREAM_REDIRECTION
|
#if GTEST_HAS_STREAM_REDIRECTION
|
||||||
using testing::internal::CaptureStdout;
|
using testing::internal::CaptureStdout;
|
||||||
|
@ -3517,14 +3517,14 @@ TEST(EditDistance, TestCases) {
|
||||||
{__LINE__, "ABCD", "abcd", "////",
|
{__LINE__, "ABCD", "abcd", "////",
|
||||||
"@@ -1,4 +1,4 @@\n-A\n-B\n-C\n-D\n+a\n+b\n+c\n+d\n"},
|
"@@ -1,4 +1,4 @@\n-A\n-B\n-C\n-D\n+a\n+b\n+c\n+d\n"},
|
||||||
// Path finding.
|
// Path finding.
|
||||||
{__LINE__, "ABCDEFGH", "ABXEGH1", " -/ - +",
|
{__LINE__, "ABCDEFGH", "ABXEGH1", " /- - +",
|
||||||
"@@ -1,8 +1,7 @@\n A\n B\n-C\n-D\n+X\n E\n-F\n G\n H\n+1\n"},
|
"@@ -1,8 +1,7 @@\n A\n B\n-C\n-D\n+X\n E\n-F\n G\n H\n+1\n"},
|
||||||
{__LINE__, "AAAABCCCC", "ABABCDCDC", " -/ + / ",
|
{__LINE__, "AAAABCCCC", "ABABCDCDC", " -/ + / ",
|
||||||
"@@ -1,9 +1,9 @@\n-A\n A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"},
|
"@@ -1,9 +1,9 @@\n A\n-A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"},
|
||||||
{__LINE__, "ABCDE", "BCDCD", "- +/",
|
{__LINE__, "ABCDE", "BCDCD", "- /+",
|
||||||
"@@ -1,5 +1,5 @@\n-A\n B\n C\n D\n-E\n+C\n+D\n"},
|
"@@ -1,5 +1,5 @@\n-A\n B\n C\n D\n-E\n+C\n+D\n"},
|
||||||
{__LINE__, "ABCDEFGHIJKL", "BCDCDEFGJKLJK", "- ++ -- ++",
|
{__LINE__, "ABCDEFGHIJKL", "BGDCDEFGJKLJK", "- ++ -- ++",
|
||||||
"@@ -1,4 +1,5 @@\n-A\n B\n+C\n+D\n C\n D\n"
|
"@@ -1,4 +1,5 @@\n-A\n B\n+G\n+D\n C\n D\n"
|
||||||
"@@ -6,7 +7,7 @@\n F\n G\n-H\n-I\n J\n K\n L\n+J\n+K\n"},
|
"@@ -6,7 +7,7 @@\n F\n G\n-H\n-I\n J\n K\n L\n+J\n+K\n"},
|
||||||
{}};
|
{}};
|
||||||
for (const Case* c = kCases; c->left; ++c) {
|
for (const Case* c = kCases; c->left; ++c) {
|
||||||
|
@ -3542,6 +3542,57 @@ TEST(EditDistance, TestCases) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tests that we can run CalculateOptimalEdits for a large vector, i.e. we can
|
||||||
|
// compute diffs for large strings.
|
||||||
|
TEST(EditDistance, LargeVectorWithDiffs) {
|
||||||
|
const int kSize = 300000;
|
||||||
|
std::vector<size_t> left;
|
||||||
|
std::vector<size_t> right;
|
||||||
|
std::vector<EditType> expected(kSize, testing::internal::kEditMatch);
|
||||||
|
|
||||||
|
left.reserve(kSize);
|
||||||
|
right.reserve(kSize);
|
||||||
|
for (int i = 0; i < kSize; ++i) {
|
||||||
|
// Make the contents of the vectors unique. This greatly speeds up
|
||||||
|
// the algorithm, since it doesn't spend time finding matches for
|
||||||
|
// different alignments.
|
||||||
|
left.push_back(i);
|
||||||
|
right.push_back(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 10; ++i) {
|
||||||
|
right[i] = kSize + i;
|
||||||
|
expected[i] = testing::internal::kEditReplace;
|
||||||
|
right[kSize - i - 1] = kSize * 2 + i;
|
||||||
|
expected[kSize - i - 1] = testing::internal::kEditReplace;
|
||||||
|
}
|
||||||
|
size_t memory_usage;
|
||||||
|
EXPECT_EQ(CalculateOptimalEdits(left, right, &memory_usage), expected);
|
||||||
|
EXPECT_GT(memory_usage, kSize);
|
||||||
|
EXPECT_LT(memory_usage, kSize * 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tests that we can run CalculateOptimalEdits for two vectors N and M, where
|
||||||
|
// M = N plus additional junk at the end. The current algorithm only does O(M)
|
||||||
|
// "real" work in this case, but allocates some extra memory. We test that this
|
||||||
|
// is still fast enough for common cases, and we aren't allocating an
|
||||||
|
// excessive amount of extra memory.
|
||||||
|
TEST(EditDistance, LargeVectorWithTrailingJunk) {
|
||||||
|
const int kSize = 200000;
|
||||||
|
const int kAdditionalSize = 2000;
|
||||||
|
std::vector<size_t> left(kSize, 0);
|
||||||
|
std::vector<size_t> right(kSize + kAdditionalSize, 0);
|
||||||
|
std::vector<EditType> expected(kSize + kAdditionalSize,
|
||||||
|
testing::internal::kEditMatch);
|
||||||
|
for (int i = 0; i < kAdditionalSize; ++i) {
|
||||||
|
expected[i + kSize] = testing::internal::kEditAdd;
|
||||||
|
}
|
||||||
|
size_t memory_usage;
|
||||||
|
EXPECT_EQ(CalculateOptimalEdits(left, right, &memory_usage), expected);
|
||||||
|
EXPECT_GT(memory_usage, kSize);
|
||||||
|
EXPECT_LT(memory_usage, 6000000);
|
||||||
|
}
|
||||||
|
|
||||||
// Tests EqFailure(), used for implementing *EQ* assertions.
|
// Tests EqFailure(), used for implementing *EQ* assertions.
|
||||||
TEST(AssertionTest, EqFailure) {
|
TEST(AssertionTest, EqFailure) {
|
||||||
const std::string foo_val("5"), bar_val("6");
|
const std::string foo_val("5"), bar_val("6");
|
||||||
|
|
Loading…
Reference in New Issue
Block a user