ElementTree accumulates all the file information in a particular directory into XML and creates a script to check the difference.
XML has the following format:
<root>
<record author="pcname" time="2021-11-27">
<file file_name="a.txt"md5="148991">
<file file_name="b.txt"md5="148992">
<file file_name="c.txt"md5="148993">
</record>
<record author="pcname" time="2021-11-28">
<file file_name="a.txt"md5="148991">
<file file_name="b.txt"md5="148992">
<file file_name="c.txt"md5="148993">
<file file_name="new.txt"md5="148993">
</record>
</root>
I wrote the following code to take the difference between the latest Record and the previous Record tag.
(HistoryXml is my newly defined class name.)
@staticmethod
def__existSameFileName(files:list [Element], name_value:str) - > bool:
for file in files:
if(file.get('file_name')==name_value):
return True
return False
@staticmethod
def__diffDeleted(prev_files:list [Element], last_files:list [Element]):
for prev_file in prev_files:
prev_file_name = prev_file.get('file_name')
if not (HistoryXml.__existSameFileName(last_files,prev_file_name)):
# deleted file found
print('deleted:'+prev_file_name)
@staticmethod
def__diffNew(prev_files:list [Element], last_files:list [Element]):
for last_file in last_files:
file_name = last_file.get('file_name')
if not (HistoryXml.__existSameFileName(prev_files, file_name)):
# new file found
print('new:'+file_name)
def diffLast(self):
record_list=self.__root.findall('record')
prev_record=record_list[-2]#Previous Data
last_record=record_list[-1]#Latest Data
prev_files=prev_record.findall('file')
last_files = last_record.findall('file')
HistoryXml.__diffNew(prev_files=prev_files, last_files=last_files)
HistoryXml.__diffDeleted(prev_files=prev_files, last_files=last_files)
The expected value of the above XML is "new:new.txt" only, but in fact both "new:new.txt" and "deleted:new.txt" are output.
So I wrote the following code and tested it, and as expected, 'newfile found in prev!' will be printed.
def diffLast(self):
record_list=self.__root.findall('record')
prev_record=record_list[-2]
last_record=record_list[-1]
prev_files=prev_record.findall('file')
last_files = last_record.findall('file')
if(HistoryXml.__existSameFileName(prev_files, 'new.txt')):
print('newfile found in prev!')
if(HistoryXml.__existSameFileName(last_files, 'new.txt')):
print('newfile found in last!')
All of them are staticmethod
, so I tried to replace them with functions (the contents have not been changed).The results are just as expected (new:new.txt
output only).
import xml.etree.ElementTree as ET
def existSameFileName(files,name_value):
for file in files:
if(file.get('file_name')==name_value):
return True
return False
def diffDeleted(prev_files, last_files):
for prev_file in prev_files:
prev_file_name = prev_file.get('file_name')
if not(existSameFileName(last_files,prev_file_name)):
# deleted file found
print('deleted:'+prev_file_name)
def diffNew(prev_files, last_files):
for last_file in last_files:
file_name = last_file.get('file_name')
if not (existSameFileName(prev_files, file_name)):
# new file found
print('new:'+file_name)
def diffLast (root):
record_list = root.findall('record')
prev_record=record_list[-2]#Previous Data
last_record=record_list[-1]#Latest Data
prev_files=prev_record.findall('file')
last_files = last_record.findall('file')
diffNew(prev_files=prev_files, last_files=last_files)
diffDeleted(prev_files=prev_files, last_files=last_files)
if__name__=='__main__':
xml_text='"
<root>
<record author="pcname" time="2021-11-27">
<file file_name="a.txt"md5="148991"/>
<file file_name="b.txt"md5="148992"/>
<file file_name="c.txt"md5="148993"/>
</record>
<record author="pcname" time="2021-11-28">
<file file_name="a.txt"md5="148991"/>
<file file_name="b.txt"md5="148992"/>
<file file_name="c.txt"md5="148993"/>
<file file_name="new.txt"md5="148993"/>
</record>
</root>
'''.strip()
root=ET.fromstring(xml_text)
diffLast (root)
Run Results
new:new.txt
© 2024 OneMinuteCode. All rights reserved.