I'm using IfcOpenshell to read an .ifc file. make some changes, then write it to a new .ifc file. But IfcOpenshell is not writing the unicode the same way as it reads it.
I'm creating a script taht adds a pset with properties to each ifcelement. the value of these properties are copied from existing properties. So basically i'm creating a pset that gathers chosen information to a single place. This has worked great until the existing values contained unicode utf-8. It is read and decoded to show the correct value when printed, but it does not write the unicode the same way as it reads it. I tried changing the unicode used in PyCharm, no luck. I found simular posts elsewhere without finding a fix. From what i've read elsewhere it has something to do with the unicode encoder/decoder IfcOpenshell use, but i cant be sure.
def mk_pset():
global param_name
global param_type
global max_row
global param_map
wb = load_workbook(b)
sheet = wb.active
max_row = sheet.max_row
max_column = sheet.max_column
param_name = []
param_type = []
param_map=[]
global pset_name
pset_name = sheet.cell(row=2, column=1).value
for pm in range(2, max_row+1):
param_name.append((sheet.cell(pm, 2)).value)
param_type.append((sheet.cell(pm, 3)).value)
param_map.append((sheet.cell(pm,4)).value)
print(param_type,' - ',len(param_type))
print(param_name,' - ',len(param_name))
create_pset()
def create_pset():
ifcfile = ifcopenshell.open(ifc_loc)
create_guid = lambda: ifcopenshell.guid.compress(uuid.uuid1().hex)
owner_history = ifcfile.by_type("IfcOwnerHistory")[0]
element = ifcfile.by_type("IfcElement")
sets = ifcfile.by_type("IfcPropertySet")
list = []
for sett in sets:
list.append(sett.Name)
myset = set(list)
global antall_parametere
global index
index = 0
antall_parametere = len(param_name)
if pset_name not in myset:
property_values = []
tot_elem = (len(element))
cur_elem = 1
for e in element:
start_time_e=time.time()
if not e.is_a() == 'IfcOpeningElement':
type_element.append(e.is_a())
for rel_e in e.IsDefinedBy:
if rel_e.is_a('IfcRelDefinesByProperties'):
if not rel_e[5][4] == None:
index = 0
while index < antall_parametere:
try:
ind1 = 0
antall_ind1 = len(rel_e[5][4])
while ind1 < antall_ind1:
if rel_e[5][4][ind1][0] == param_map[index]:
try:
if not rel_e[5][4][ind1][2]==None:
p_type = rel_e[5][4][ind1][2].is_a()
p_verdi =rel_e[5][4][ind1][2][0]
p_t=param_type[index]
property_values.append(ifcfile.createIfcPropertySingleValue(param_name[index], param_name[index],ifcfile.create_entity(p_type,p_verdi),None),)
ind1 += 1
else:
ind1 +=1
except TypeError:
pass
break
else:
ind1 += 1
except AttributeError and IndexError:
pass
index += 1
index = 0
property_set = ifcfile.createIfcPropertySet(create_guid(), owner_history, pset_name, pset_name,property_values)
ifcfile.createIfcRelDefinesByProperties(create_guid(), owner_history, None, None, [e], property_set)
ifc_loc_edit = str(ifc_loc.replace(".ifc", "_Edited.ifc"))
property_values = []
print(cur_elem, ' av ', tot_elem, ' elementer ferdig. ',int(tot_elem-cur_elem),'elementer gjenstår. Det tok ',format(time.time()-start_time_e),' sekunder')
cur_elem += 1
ifcfile.write(ifc_loc_edit)
else:
###print("Pset finnes")
sg.PopupError("Pset er allerede oprettet i modell.")
I expect p_verdi written to be equal to the p_verdi read.
Original read (D\X2\00F8\X0\r):
#2921= IFCBUILDINGELEMENTPROXYTYPE('3QPADpsq71CHeCe7e3GDm5',#32,'D\X2\00F8\X0\r',$,$,$,$,'DA64A373-DB41-C131-1A0C-A07A0340DC05',$,.NOTDEFINED.);
Written (D\X4\000000F8\X0\r):
#2921=IFCBUILDINGELEMENTPROXYTYPE('3QPADpsq71CHeCe7e3GDm5',#32,'D\X4\000000F8\X0\r',$,$,$,$,'DA64A373-DB41-C131-1A0C-A07A0340DC05',$,.NOTDEFINED.);
Decoded to "Dør"
this happens to hard spaceing also:
('2\X2\00A0\X0\090')
prints correctly as:('2 090')
gets written:
('2\X4\000000A0\X0\090')
written form is unreadable by my ifc using software.
Not so much an answere as a workaround.
After more research i found out that most IFC reading software seems to not support X4 coding, so i made a workaround with regex. Basically finding everything and replacing \X4\0000 with \X2. This has worked with all the spec chars i've encountered so far. But as stated, is just a workaround that probably wont work for everyone.
def X4trans_2(target_file,temp_fil):
from re import findall
from os import remove,rename
dec_file = target_file.replace('.ifc', '_dec.ifc')
tempname = target_file
dec_list = []
with open(temp_fil, 'r+') as r,open(dec_file, 'w', encoding='cp1252') as f:
for line in r:
findX4 = findall(r'\\X4\\0000+[\w]+\\X0\\', str(line))
if findX4:
for fx in findX4:
X4 = str(fx)
newX = str(fx).replace('\\X4\\0000', '\X2\\')
line = line.replace(str(X4), newX) # print ('Fant X4')
f.writelines(line)
remove(temp_fil)
try:
remove(target_file)
except FileNotFoundError:
pass
rename(dec_file,tempname)
It basically opens the ifc as text, find and replace X4 with X2 and writes it again.