246 lines
8 KiB
Python
246 lines
8 KiB
Python
|
# Copyright 2003 Dave Abrahams
|
||
|
# Copyright 2001, 2002 Vladimir Prus
|
||
|
# Copyright 2012 Jurko Gospodnetic
|
||
|
# Distributed under the Boost Software License, Version 1.0.
|
||
|
# (See accompanying file LICENSE.txt or copy at
|
||
|
# https://www.bfgroup.xyz/b2/LICENSE.txt)
|
||
|
|
||
|
###############################################################################
|
||
|
#
|
||
|
# Based in part on an old Subversion tree.py source file (tools for comparing
|
||
|
# directory trees). See http://subversion.tigris.org for more information.
|
||
|
#
|
||
|
# Copyright (c) 2001 Sam Tobin-Hochstadt. All rights reserved.
|
||
|
#
|
||
|
# This software is licensed as described in the file COPYING, which you should
|
||
|
# have received as part of this distribution. The terms are also available at
|
||
|
# http://subversion.tigris.org/license-1.html. If newer versions of this
|
||
|
# license are posted there, you may use a newer version instead, at your
|
||
|
# option.
|
||
|
#
|
||
|
###############################################################################
|
||
|
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import os
|
||
|
import os.path
|
||
|
import stat
|
||
|
import sys
|
||
|
|
||
|
|
||
|
class TreeNode:
|
||
|
"""
|
||
|
Fundamental data type used to build file system tree structures.
|
||
|
|
||
|
If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN
|
||
|
is a list of the nodes representing that directory's children.
|
||
|
|
||
|
NAME is simply the name of the file or directory. CONTENTS is a string
|
||
|
holding the file's contents (if a file).
|
||
|
|
||
|
"""
|
||
|
|
||
|
def __init__(self, name, children=None, contents=None):
|
||
|
assert children is None or contents is None
|
||
|
self.name = name
|
||
|
self.mtime = 0
|
||
|
self.children = children
|
||
|
self.contents = contents
|
||
|
self.path = name
|
||
|
|
||
|
def add_child(self, newchild):
|
||
|
assert not self.is_file()
|
||
|
for a in self.children:
|
||
|
if a.name == newchild.name:
|
||
|
if newchild.is_file():
|
||
|
a.contents = newchild.contents
|
||
|
a.path = os.path.join(self.path, newchild.name)
|
||
|
else:
|
||
|
for i in newchild.children:
|
||
|
a.add_child(i)
|
||
|
break
|
||
|
else:
|
||
|
self.children.append(newchild)
|
||
|
newchild.path = os.path.join(self.path, newchild.name)
|
||
|
|
||
|
def get_child(self, name):
|
||
|
"""
|
||
|
If the given TreeNode directory NODE contains a child named NAME,
|
||
|
return the child; else, return None.
|
||
|
|
||
|
"""
|
||
|
for n in self.children:
|
||
|
if n.name == name:
|
||
|
return n
|
||
|
|
||
|
def is_file(self):
|
||
|
return self.children is None
|
||
|
|
||
|
def pprint(self):
|
||
|
print(" * Node name: %s" % self.name)
|
||
|
print(" Path: %s" % self.path)
|
||
|
print(" Contents: %s" % self.contents)
|
||
|
if self.is_file():
|
||
|
print(" Children: is a file.")
|
||
|
else:
|
||
|
print(" Children: %d" % len(self.children))
|
||
|
|
||
|
|
||
|
class TreeDifference:
|
||
|
def __init__(self):
|
||
|
self.added_files = []
|
||
|
self.removed_files = []
|
||
|
self.modified_files = []
|
||
|
self.touched_files = []
|
||
|
|
||
|
def append(self, other):
|
||
|
self.added_files.extend(other.added_files)
|
||
|
self.removed_files.extend(other.removed_files)
|
||
|
self.modified_files.extend(other.modified_files)
|
||
|
self.touched_files.extend(other.touched_files)
|
||
|
|
||
|
def ignore_directories(self):
|
||
|
"""Removes directories from our lists of found differences."""
|
||
|
not_dir = lambda x : x[-1] != "/"
|
||
|
self.added_files = list(filter(not_dir, self.added_files))
|
||
|
self.removed_files = list(filter(not_dir, self.removed_files))
|
||
|
self.modified_files = list(filter(not_dir, self.modified_files))
|
||
|
self.touched_files = list(filter(not_dir, self.touched_files))
|
||
|
|
||
|
def pprint(self, file=sys.stdout):
|
||
|
file.write("Added files : %s\n" % self.added_files)
|
||
|
file.write("Removed files : %s\n" % self.removed_files)
|
||
|
file.write("Modified files: %s\n" % self.modified_files)
|
||
|
file.write("Touched files : %s\n" % self.touched_files)
|
||
|
|
||
|
def empty(self):
|
||
|
return not (self.added_files or self.removed_files or
|
||
|
self.modified_files or self.touched_files)
|
||
|
|
||
|
|
||
|
def build_tree(path):
|
||
|
"""
|
||
|
Takes PATH as the folder path, walks the file system below that path, and
|
||
|
creates a tree structure based on any files and folders found there.
|
||
|
Returns the prepared tree structure plus the maximum file modification
|
||
|
timestamp under the given folder.
|
||
|
|
||
|
"""
|
||
|
return _handle_dir(os.path.normpath(path))
|
||
|
|
||
|
|
||
|
def tree_difference(a, b):
|
||
|
"""Compare TreeNodes A and B, and create a TreeDifference instance."""
|
||
|
return _do_tree_difference(a, b, "", True)
|
||
|
|
||
|
|
||
|
def _do_tree_difference(a, b, parent_path, root=False):
|
||
|
"""Internal recursive worker function for tree_difference()."""
|
||
|
|
||
|
# We do not want to list root node names.
|
||
|
if root:
|
||
|
assert not parent_path
|
||
|
assert not a.is_file()
|
||
|
assert not b.is_file()
|
||
|
full_path = ""
|
||
|
else:
|
||
|
assert a.name == b.name
|
||
|
full_path = parent_path + a.name
|
||
|
result = TreeDifference()
|
||
|
|
||
|
# A and B are both files.
|
||
|
if a.is_file() and b.is_file():
|
||
|
if a.contents != b.contents:
|
||
|
result.modified_files.append(full_path)
|
||
|
elif a.mtime != b.mtime:
|
||
|
result.touched_files.append(full_path)
|
||
|
return result
|
||
|
|
||
|
# Directory converted to file.
|
||
|
if not a.is_file() and b.is_file():
|
||
|
result.removed_files.extend(_traverse_tree(a, parent_path))
|
||
|
result.added_files.append(full_path)
|
||
|
|
||
|
# File converted to directory.
|
||
|
elif a.is_file() and not b.is_file():
|
||
|
result.removed_files.append(full_path)
|
||
|
result.added_files.extend(_traverse_tree(b, parent_path))
|
||
|
|
||
|
# A and B are both directories.
|
||
|
else:
|
||
|
if full_path:
|
||
|
full_path += "/"
|
||
|
accounted_for = [] # Children present in both trees.
|
||
|
for a_child in a.children:
|
||
|
b_child = b.get_child(a_child.name)
|
||
|
if b_child:
|
||
|
accounted_for.append(b_child)
|
||
|
result.append(_do_tree_difference(a_child, b_child, full_path))
|
||
|
else:
|
||
|
result.removed_files.append(full_path + a_child.name)
|
||
|
for b_child in b.children:
|
||
|
if b_child not in accounted_for:
|
||
|
result.added_files.extend(_traverse_tree(b_child, full_path))
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
def _traverse_tree(t, parent_path):
|
||
|
"""Returns a list of all names in a tree."""
|
||
|
assert not parent_path or parent_path[-1] == "/"
|
||
|
full_node_name = parent_path + t.name
|
||
|
if t.is_file():
|
||
|
result = [full_node_name]
|
||
|
else:
|
||
|
name_prefix = full_node_name + "/"
|
||
|
result = [name_prefix]
|
||
|
for i in t.children:
|
||
|
result.extend(_traverse_tree(i, name_prefix))
|
||
|
return result
|
||
|
|
||
|
|
||
|
def _get_text(path):
|
||
|
"""Return a string with the textual contents of a file at PATH."""
|
||
|
fp = open(path, 'rb')
|
||
|
try:
|
||
|
return fp.read()
|
||
|
finally:
|
||
|
fp.close()
|
||
|
|
||
|
|
||
|
def _handle_dir(path):
|
||
|
"""
|
||
|
Main recursive worker function for build_tree(). Returns a newly created
|
||
|
tree node representing the given normalized folder path as well as the
|
||
|
maximum file/folder modification time detected under the same path.
|
||
|
|
||
|
"""
|
||
|
files = []
|
||
|
dirs = []
|
||
|
node = TreeNode(os.path.basename(path), children=[])
|
||
|
max_mtime = node.mtime = os.stat(path).st_mtime
|
||
|
|
||
|
# List files & folders.
|
||
|
for f in os.listdir(path):
|
||
|
f = os.path.join(path, f)
|
||
|
if os.path.isdir(f):
|
||
|
dirs.append(f)
|
||
|
elif os.path.isfile(f):
|
||
|
files.append(f)
|
||
|
|
||
|
# Add a child node for each file.
|
||
|
for f in files:
|
||
|
fcontents = _get_text(f)
|
||
|
new_file_node = TreeNode(os.path.basename(f), contents=fcontents)
|
||
|
new_file_node.mtime = os.stat(f).st_mtime
|
||
|
max_mtime = max(max_mtime, new_file_node.mtime)
|
||
|
node.add_child(new_file_node)
|
||
|
|
||
|
# For each subdir, create a node, walk its tree, add it as a child.
|
||
|
for d in dirs:
|
||
|
new_dir_node, new_max_mtime = _handle_dir(d)
|
||
|
max_mtime = max(max_mtime, new_max_mtime)
|
||
|
node.add_child(new_dir_node)
|
||
|
|
||
|
return node, max_mtime
|