I have a large .tif file (~24 Gb, more, than RAM could store)
(Similar question: https://rasterio.readthedocs.io/en/latest/topics/masking-by-shapefile.html)
(Method from library: https://rasterio.readthedocs.io/en/latest/topics/masking-by-shapefile.html)
But I'm searching another faster solution. Maybe I should create numpy array with zeros and add polygon areas into that array (it shouldn't be better, than with rasterio)
Here is my code to process polygons with longitude and latitude.
import rasterio
import geopandas as gpd
from tqdm import tqdm
import os
def generate_mask(raster_path, shape_path, output_path):
gdf = gpd.read_file(shape_path)
with rasterio.open(raster_path) as src:
for num in tqdm(sorted(gdf['class'].unique())):
if os.path.isfile(f'{path}/masks/{num}.tif'):
continue
shapes = gdf.loc[(gdf['class'] == num)]['geometry']
file_name = f'{num}.tif'
for shp in shapes:
bound = rasterio.features.bounds(shp)
bbox = rasterio.windows.from_bounds(*bound, src.transform)
window_transform = rasterio.windows.transform(window=bbox, transform=src.transform)
img_crop = src.read([1,2,3], window=bbox)
print(f'Image read')
mask = rasterio.features.geometry_mask([shp],out_shape=(img_crop.shape[1], img_crop.shape[2]),
transform=window_transform, invert=True)
out_image, out_transform = rasterio.mask.mask(src, shp, crop=True, filled=True)
out_meta = src.meta
print(f'Mask {fn} is done')
with rasterio.open(f'{output_path}{file_name}', "w") as dest:
dest.write(out_image)
print(f'Mask is written')`
The problem here, that it could process one polygon at a time, moreover if I process all shapes at a time it time more than 3 hours to make mask. (It's too long, server usually kill process.)
I solved problem using osgeo.gdal.Rasterize()
(see documentation: https://gdal.org/api/python/osgeo.gdal.html)
It handles large files way better than analogs (~3-5 min per one mask)
Here is my code:
from osgeo import gdal
import os
def gdal_generate_mask(raster_path, shape_path, output_path):
# open the raster layer and get its relevant properties
raster_ds = gdal.Open(raster_path, gdal.GA_ReadOnly)
xSize = raster_ds.RasterXSize
ySize = raster_ds.RasterYSize
geotransform = raster_ds.GetGeoTransform()
projection = raster_ds.GetProjection()
# create the target layer (1 band binary)
gdf = gpd.read_file(shape_path)
for num in tqdm(sorted(gdf['class'].unique())):
driver = gdal.GetDriverByName('GTiff')
target_ds = driver.Create(f'{output_path}{num}.tif', xSize, ySize, bands=1, eType = gdal.GDT_Byte, options = ["COMPRESS=DEFLATE"])
target_ds.SetGeoTransform(geotransform)
target_ds.SetProjection(projection)
# create temp file for vector layer gdal Rasterize
vector_layer = f'/home/username/GeoProj/tmp_shp/{num}.shp'
if not os.path.isfile(vector_layer):
gdf[gdf['class'] == num].to_file(vector_layer)
# rasterize the vector layer into the target one
ds = gdal.Rasterize(target_ds, vector_layer, burnValues=[1])
P.S. I should seek an answer here: https://gis.stackexchange.com/