I have a dataframe that contains 1681 evenly distributed 2D grid points. Each data point has its x and y coordinates, a label representing its category (or phase), and a color for that category.
x y label color
0 -40.0 -30.0 Fe #660066
1 -40.0 -29.0 Fe #660066
2 -40.0 -28.0 FeS #ff7f50
3 -40.0 -27.0 FeS #ff7f50
4 -40.0 -26.0 FeS #ff7f50
... ... ... ... ...
1676 0.0 6.0 Fe2(SO4)3 #8a2be2
1677 0.0 7.0 Fe2(SO4)3 #8a2be2
1678 0.0 8.0 Fe2(SO4)3 #8a2be2
1679 0.0 9.0 Fe2(SO4)3 #8a2be2
1680 0.0 10.0 Fe2(SO4)3 #8a2be2
[1681 rows x 4 columns]
I want to generate a polygon diagram that shows the linear boundary of each category (in my case also known as a "phase diagram"). Sor far I can only show this kind of diagram in a simple scatter plot like this:
import matplotlib.pyplot as plt
import pandas as pd
plt.figure(figsize=(8., 8.))
for color in df.color.unique():
df_color = df[df.color==color]
plt.scatter(
x=df_color.x,
y=df_color.y,
c=color,
s=100,
label=df_color.label.iloc[0]
)
plt.xlim([-40., 0.])
plt.ylim([-30., 10.])
plt.xlabel('Log pO2(g)')
plt.ylabel('Log pSO2(g)')
plt.legend(bbox_to_anchor=(1.05, 1.))
plt.show()
However, what I want is a phase diagram with clear linear boundaries that looks something like this:
Is there any way I can generate such phase diagram using matplotlib
? Note that the boundary is not deterministic, especially when the grid points are not dense enough. Hence there needs to be some kind of heuristics, for example the boundary line should always be in the middle of two neighboring points with different categories. I imagine there will be some sort of line fitting or interpolation needed, and matplotlib.patches.Polygon
is probably useful here.
For easy testing, I attach a code snippet for generating the data, but the polygon information shown below are not supposed to be used for generating the phase diagram
import numpy as np
import pandas as pd
from shapely.geometry import Point, Polygon
labels = ['Fe', 'Fe3O4', 'FeS', 'Fe2O3', 'FeS2', 'FeSO4', 'Fe2(SO4)3']
colors = ['#660066', '#b6fcd5', '#ff7f50', '#ffb6c1', '#c6e2ff', '#d3ffce', '#8a2be2']
polygons = []
polygons.append(Polygon([(-26.7243,-14.7423), (-26.7243,-30.0000), (-40.0000,-30.0000),
(-40.0000,-28.0181)]))
polygons.append(Polygon([(-18.1347,-0.4263), (-16.6048,1.6135), (-16.6048,-30.0000),
(-26.7243,-30.0000), (-26.7243,-14.7423), (-18.1347,-0.4263)]))
polygons.append(Polygon([(-18.1347,-0.4263), (-26.7243,-14.7423),
(-40.0000,-28.0181), (-40.0000,-22.2917), (-18.1347,-0.4263)]))
polygons.append(Polygon([(0.0000,-20.2615), (0.0000,-30.0000), (-16.6048,-30.0000),
(-16.6048,1.6135), (-16.5517,1.6865), (-6.0517,-0.9385), (0.0000,-3.9643)]))
polygons.append(Polygon([(-14.2390,10.0000), (-14.5829,7.5927), (-16.5517,1.6865),
(-16.6048,1.6135), (-18.1347,-0.4263), (-40.0000,-22.2917), (-40.0000,10.0000)]))
polygons.append(Polygon([(-6.0517,-0.9385), (-16.5517,1.6865), (-14.5829,7.5927),
(-6.0517,-0.9385)]))
polygons.append(Polygon([(0.0000,-3.9643), (-6.0517,-0.9385), (-14.5829,7.5927),
(-14.2390,10.0000), (0.0000,10.0000)]))
x_grid = np.arange(-40., 0.01, 1.)
y_grid = np.arange(-30., 10.01, 1.)
xy_grid = np.array(np.meshgrid(x_grid, y_grid)).T.reshape(-1, 2).tolist()
data = []
for coords in xy_grid:
point = Point(coords)
for i, poly in enumerate(polygons):
if poly.buffer(1e-3).contains(point):
data.append({
'x': point.x,
'y': point.y,
'label': labels[i],
'color': colors[i]
})
break
df = pd.DataFrame(data)
I am not sure if you can easily get a representation with contiguous polygons, however you could easily get the bounding polygon from a set of points using shapely.convex_hull
:
import shapely
import matplotlib.pyplot as plt
f, ax = plt.subplots(figsize=(8, 8))
for (name, color), coords in df.groupby(['label', 'color'])[['x', 'y']]:
polygon = shapely.convex_hull(shapely.MultiPoint(coords.to_numpy()))
ax.fill(*polygon.exterior.xy, color=color)
ax.annotate(name, polygon.centroid.coords[0],
ha='center', va='center')
If you want the shapely polygons:
polygons = {k: shapely.convex_hull(shapely.MultiPoint(g.to_numpy()))
for k, g in df.groupby(['label', 'color'])[['x', 'y']]}
Output:
To have contiguous polygons you can use the same strategy after adding points with a greater density and assigning them to their closest counterpart with a KDTree
:
from scipy.spatial import KDTree
# interpolate points on the initial polygons
polygons = {k: shapely.convex_hull(shapely.MultiPoint(g.to_numpy()))
for k, g in df.groupby('label')[['x', 'y']]}
def interp_ext(shape):
try:
return np.c_[shape.xy].T
except NotImplementedError:
pass
e = shape.exterior if hasattr(shape, 'exterior') else shape
points = e.interpolate(np.linspace(0, e.length, 1000))
return np.c_[Polygon(points).exterior.xy].T
df2 = (pd.DataFrame([(l, *interp_ext(p)) for l, p in polygons.items()],
columns=['label', 'x', 'y'])
.merge(df[['label', 'color']], on='label')
.explode(['x', 'y'])
)
# get bounding values
xmin, ymin, xmax, ymax = df[['x', 'y']].agg(['min', 'max']).values.ravel()
# create a grid with a higher density (here 10x)
Xs = np.arange(xmin, xmax, 0.1)
Ys = np.arange(ymin, ymax, 0.1)
Xs, Ys = (x.ravel() for x in np.meshgrid(Xs, Ys))
grid = np.c_[Xs, Ys]
# indentify closest reference point
_, idx = KDTree(df2[['x', 'y']]).query(grid)
# create new DataFrame with labels/colors
df3 = pd.DataFrame(np.c_[grid, df2[['label', 'color']].to_numpy()[idx]],
columns=['x', 'y', 'label', 'color']
)
# plot
f, ax = plt.subplots(figsize=(8, 8))
for (name, color), coords in df3.groupby(['label', 'color'])[['x', 'y']]:
polygon = shapely.convex_hull(shapely.MultiPoint(coords.to_numpy()))
ax.fill(*polygon.exterior.xy, color=color)
ax.annotate(name, polygon.centroid.coords[0],
ha='center', va='center')
Output:
Another, faster, option could be to use a Voronoi diagram based on the original shapes. I found a library (voronoi-diagram-for-polygons
) that does this but requires GeoPandas:
import geopandas as gpd
from longsgis import voronoiDiagram4plg
from shapely import Polygon, convex_hull, coverage_union
# create the initial convex hulls
tmp = (df.groupby(['label', 'color'])
.apply(lambda x: convex_hull(Polygon(x[['x', 'y']].to_numpy())))
.reset_index(name='geometry')
)
# convert to geodataframe
gdf = gpd.GeoDataFrame(tmp, geometry='geometry')
# Split using a Voronoi diagram
mask = Polygon([(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin)])
tmp = voronoiDiagram4plg(gdf, mask, densify=True)
# plot
tmp.plot(color=gdf['color'])
Output: