Commit f0cf2e37 authored by olaf michaelis's avatar olaf michaelis
Browse files

Add default uri prefix replacer

parent 69cb0227
import hashlib
import re
class sanitizer():
def __init__(self, content, filename):
self.content = content
self.filename = filename
self.new_uri = self.new_uri()
self.default_uri_prefix = 'https://rdmorganiser.github.io/terms'
self.things_to_replace = [
'<uri_prefix',
'<catalog\sdc:uri',
'<question\sdc:uri',
'<questionset\sdc:uri',
'<section\sdc:uri',
'<option\sdc:uri',
'<optionset\sdc:uri',
'<condition\sdc:uri',
]
def process(self):
new_content = []
for line in self.content:
for el in self.things_to_replace:
line = self.replace_uri(el, line)
new_content.append(line)
self.content = new_content
def hash_string(self, str):
hash_object = hashlib.sha1(str.encode('utf-8'))
return hash_object.hexdigest()
def new_uri(self):
h = self.hash_string(
re.search(r'(?<=\/shared\/).*?(?=\/)', self.filename).group(0)
)
return 'https://' + h
def replace_uri(self, rxmatch, line):
if bool(re.search(rxmatch, line)) is True:
line = line.replace(self.default_uri_prefix, self.new_uri)
return line
import os
import re
class util():
def __init__(self, basedir):
self.basedir = basedir
self.xml_files = self.detect_xml_files()
def detect_xml_files(self):
xml_files = []
for root, dirs, files in os.walk(self.basedir):
for file in files:
if file.endswith('.xml') is True \
and file.startswith('_new') is False:
xml_files.append(os.path.join(root, file))
return sorted(xml_files)
def read_xml(self, filename):
print('Read file ' + filename)
arr = []
try:
filecontent = open(filename, 'r')
except Exception as e:
print(e)
else:
for line in filecontent.read().splitlines():
arr.append(line)
return(arr)
def write_xml(self, data, filename):
print('Write file ' + filename)
with open(filename, 'w') as fp:
for line in data:
fp.write(line + '\n')
def output_filename(self, filename):
folder = re.search('.*(?=\/)', filename).group(0)
shortname = re.search('[^/]+$', filename).group(0)
return os.path.join(folder, '_new_' + shortname)
#!/usr/bin/python3
import os
import sys
from lib.sanitizer import sanitizer
from lib.util import util
if __name__ == '__main__':
try:
basedir = sys.argv[1]
except IndexError:
basedir = os.getcwd()
utl = util(basedir)
for filename in utl.xml_files:
print('\nStart to process ' + filename)
content = utl.read_xml(filename)
san = sanitizer(content, filename)
san.process()
outfile = utl.output_filename(filename)
utl.write_xml(san.content, outfile)
print('Done.')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment