I need a python script that reads some trees from a text file and puts them in an excel file. Go through each tree from the last node to the root and enter the data in a specified excel. The text file contains trees with nodes that contain information separated by semicolon.Each tree is separated by double space in the file. Start with the last node in the tree and enter the data up to the root node in Excel.
The script writes for each subnode in the Routing column, the part code of the parent, with the exception of the root node, which has an empty Routing.
The text file contains:
100201F, Part name, Part description, 1, False
├── C002L0, Part name, Part description, 1, False
├── V3fd1, Part name, Part description , 1, True
|-─ 1002011, Part name, Part description:, 1, True
├── TT001HU, Part name, Part description, 1, True
134545F,Part name, Part description, 1, False
├── ...
...
Here's the script I've managed to do so far, but it doesn't work as I expected:
import pandas as pd
import re
# define the column names for the excel file
column_names = ["Routing", "Part code", "Part name", "Part description", "Batch size","Is material"]
# read the trees from the text file
with open("trees.txt","r",encoding="utf-8") as file:
trees = file.read().strip().split("\n\n")
# create an empty dataframe to store the results
df = pd.DataFrame(columns=column_names)
# loop through each tree
for tree in trees:
# split the tree into nodes and reverse the order
nodes = tree.strip().split("\n")[::-1]
# loop through each node
for i, node in enumerate(nodes):
# split the node into its data
data = node.strip().split(";")
# add the data to a dictionary
row_data = {
"Routing": re.sub(r'[^a-zA-Z0-9]', '', data[0]) if i != len(nodes)-1 else "",
"Part code": re.sub(r'[^a-zA-Z0-9]', '', data[0]),
"Part name": data[1],
"Part description": data[2],
"Batch size": data[3],
"Is material": data[4]
}
# add the child node to the "Routing" column if the current node is not the root
if i < len(nodes)-1:
child_data = nodes[i+1].strip().split(";")
row_data["Routing"] = re.sub(r'[^a-zA-Z0-9]', '', child_data[0])
# add the row to the dataframe
df = df.append(row_data, ignore_index=True)
# write the dataframe to an Excel file
df.to_excel("trees.xlsx", index=False)
The script does not traverse the tree correctly. For example, for the code TT001HU, put the routing code 1002011 instead of C002L0.
I would like the excel to look like this:
Parent code Part name Part description Batch size Routing Is material
TT001HU Part name Part description 1 C002L0 True
1002011 Part name Part description 1 C002L0 True
V3fd1 Part name Part description 1 C002L0 True
C002L0 Part name Part description 1 100201F False
100201F Part name Part description 1 False
...
Let's say your tree file looks this way:
100201F;Part name;Part description;1;False
├── C002L0;Part name;Part description;1;False
├── V3fd1;Part name;Part description;1;True
|-─ 1002011;Part name;Part description;1;True
├── TT001HU;Part name;Part description;1;True
134545F;Part name;Part description;1;False
├── C002L1;Part name;Part description;1;False
├── V3fd2;Part name;Part description;1;True
├── 1002012;Part name;Part description;1;True
├── TT001HV;Part name;Part description;1;True
then the correct code to do this is
import pandas as pd
import re
column_names = ["Parent code", "Part name", "Part description", "Batch size", "Routing", "Is material"]
with open("tree.txt", "r", encoding="utf-8") as file:
trees = file.read().strip().split("\n\n")
df = pd.DataFrame(columns=column_names)
for tree in trees:
nodes = tree.strip().split("\n")[::-1]
for i, node in enumerate(nodes):
data = node.strip().split(";")
row_data = {
"Parent code": "" if i == 0 else re.sub(r'[^a-zA-Z0-9]', '', nodes[i-1].strip().split(";")[0]),
"Part name": data[1],
"Part description": data[2],
"Batch size": data[3],
"Routing": "" if i == len(nodes)-1 else re.sub(r'[^a-zA-Z0-9]', '', nodes[i+1].strip().split(";")[0]),
"Is material": data[4]
}
df = df.append(row_data, ignore_index=True)
df.to_excel("trees.xlsx", index=False)
which will give you
EDIT: Your particular case
import pandas as pd
import re
column_names = ["Parent code", "Part name", "Part description", "Batch size", "Routing", "Is material"]
with open("trees.txt","r",encoding="utf-8") as file:
trees = file.read().strip().split("\n\n")
df = pd.DataFrame(columns=column_names)
for tree in trees:
# split the tree into nodes and reverse the order
nodes = tree.strip().split("\n")[::-1]
parent = ""
for i, node in enumerate(nodes):
# split the node into its data
data = node.strip().split(", ")
row_data = {
"Parent code": parent,
"Part name": data[1],
"Part description": data[2],
"Batch size": data[3],
"Routing": re.sub(r'[^a-zA-Z0-9]', '', data[0]),
"Is material": data[4]
}
parent = row_data["Routing"]
df = df.append(row_data, ignore_index=True)
df.to_excel("output2.xlsx", index=False)