OK, After a bit of fiddling, I've tweaked a script from the site hyperlink in the second comment line. The purpose of the script is to clip/mask a LARGE raster (i.e. that cannot fit into a 32-bit Python 2.7.5 application) in GTiff format with a shapefile with multiple polygons (each with a "Name" record) and save the clipped rasters into a "clip" sub-directory, where each masked grid is named after each polygon's "Name". Like the original script, it assumes that the GTiff and shapefile are in the same projection and overlap correctly, and it processes ~100 masks/sec. However, I have modified that script to 1) work with a float-valued elevation grid, 2) only load the window of the larger grid into memory that is bounded by the current polygon (i.e. to reduce the memory load), 2) exports GTiff's that have the right (i.e. not shifted) geo-location and value.
HOWEVER, I having trouble with each masked grid having a what I'll call a "right-sided shadow". That is for every ~vertical line in a polygon where the right side of that line is outside of the given polygon, the masked grid will includes one raster cell to the right of that polygon-side.
Thus, my question is, what am I doing wrong that gives the masked grid a right-shadow?
I'll try to figure out how to post an example shapefile and tif so that others can reproduce. The code below also has comment lines for integer-valued satellite imagery (e.g. in as in the original code from geospatialpython.com).
# RasterClipper.py - clip a geospatial image using a shapefile
# http://geospatialpython.com/2011/02/clip-raster-using-shapefile.html
# http://gis.stackexchange.com/questions/57005/python-gdal-write-new-raster-using-projection-from-old
import os, sys, time, Tkinter as Tk, tkFileDialog
import operator
from osgeo import gdal, gdalnumeric, ogr, osr
import Image, ImageDraw
def SelectFile(req = 'Please select a file:', ft='txt'):
""" Customizable file-selection dialogue window, returns list() = [full path, root path, and filename]. """
try: # Try to select a csv dataset
foptions = dict(filetypes=[(ft+' file','*.'+ft)], defaultextension='.'+ft)
root = Tk.Tk(); root.withdraw(); fname = tkFileDialog.askopenfilename(title=req, **foptions); root.destroy()
return [fname]+list(os.path.split(fname))
except: print "Error: {0}".format(sys.exc_info()[1]); time.sleep(5); sys.exit()
def rnd(v, N): return int(round(v/float(N))*N)
def rnd2(v): return int(round(v))
# Raster image to clip
rname = SelectFile('Please select your TIF DEM:',ft='tif')
raster = rname[2]
print 'DEM:', raster
os.chdir(rname[1])
# Polygon shapefile used to clip
shp = SelectFile('Please select your shapefile of catchments (requires Name field):',ft='shp')[2]
print 'shp:', shp
qs = raw_input('Do you want to stretch the image? (y/n)')
qs = True if qs == 'y' else False
# Name of base clip raster file(s)
if not os.path.exists('./clip/'): os.mkdir('./clip/')
output = "/clip/clip"
# This function will convert the rasterized clipper shapefile
# to a mask for use within GDAL.
def imageToArray(i):
"""
Converts a Python Imaging Library array to a
gdalnumeric image.
"""
a=gdalnumeric.fromstring(i.tostring(),'b')
a.shape=i.im.size[1], i.im.size[0]
return a
def arrayToImage(a):
"""
Converts a gdalnumeric array to a
Python Imaging Library Image.
"""
i=Image.fromstring('L',(a.shape[1],a.shape[0]), (a.astype('b')).tostring())
return i
def world2Pixel(geoMatrix, x, y, N= 5, r=True):
"""
Uses a gdal geomatrix (gdal.GetGeoTransform()) to calculate
the pixel location of a geospatial coordinate
"""
ulX = geoMatrix[0]
ulY = geoMatrix[3]
xDist = geoMatrix[1]
yDist = geoMatrix[5]
rtnX = geoMatrix[2]
rtnY = geoMatrix[4]
if r:
pixel = int(round(x - ulX) / xDist)
line = int(round(ulY - y) / xDist)
else:
pixel = int(rnd(x - ulX, N) / xDist)
line = int(rnd(ulY - y, N) / xDist)
return (pixel, line)
def histogram(a, bins=range(0,256)):
"""
Histogram function for multi-dimensional array.
a = array
bins = range of numbers to match
"""
fa = a.flat
n = gdalnumeric.searchsorted(gdalnumeric.sort(fa), bins)
n = gdalnumeric.concatenate([n, [len(fa)]])
hist = n[1:]-n[:-1]
return hist
def stretch(a):
"""
Performs a histogram stretch on a gdalnumeric array image.
"""
hist = histogram(a)
im = arrayToImage(a)
lut = []
for b in range(0, len(hist), 256):
# step size
step = reduce(operator.add, hist[b:b+256]) / 255
# create equalization lookup table
n = 0
for i in range(256):
lut.append(n / step)
n = n + hist[i+b]
im = im.point(lut)
return imageToArray(im)
# Also load as a gdal image to get geotransform
# (world file) info
srcImage = gdal.Open(raster)
geoTrans_src = srcImage.GetGeoTransform()
#print geoTrans_src
pxs = int(geoTrans_src[1])
srcband = srcImage.GetRasterBand(1)
ndv = -9999.0
#ndv = 0
# Create an OGR layer from a boundary shapefile
shapef = ogr.Open(shp)
lyr = shapef.GetLayer()
minXl, maxXl, minYl, maxYl = lyr.GetExtent()
ulXl, ulYl = world2Pixel(geoTrans_src, minXl, maxYl)
lrXl, lrYl = world2Pixel(geoTrans_src, maxXl, minYl)
#poly = lyr.GetNextFeature()
for poly in lyr:
pnm = poly.GetField("Name")
# Convert the layer extent to image pixel coordinates
geom = poly.GetGeometryRef()
#print geom.GetEnvelope()
minX, maxX, minY, maxY = geom.GetEnvelope()
geoTrans = geoTrans_src
ulX, ulY = world2Pixel(geoTrans, minX, maxY)
lrX, lrY = world2Pixel(geoTrans, maxX, minY)
# Calculate the pixel size of the new image
pxWidth = int(lrX - ulX)
pxHeight = int(lrY - ulY)
# Load the source data as a gdalnumeric array
#srcArray = gdalnumeric.LoadFile(raster)
clip = gdalnumeric.BandReadAsArray(srcband, xoff=ulX, yoff=ulY, win_xsize=pxWidth, win_ysize=pxHeight)
#clip = srcArray[:, ulY:lrY, ulX:lrX]
# Create a new geomatrix for the image
geoTrans = list(geoTrans)
geoTrans[0] = minX
geoTrans[3] = maxY
# Map points to pixels for drawing the
# boundary on a blank 8-bit,
# black and white, mask image.
points = []
pixels = []
#geom = poly.GetGeometryRef()
pts = geom.GetGeometryRef(0)
for p in range(pts.GetPointCount()):
points.append((pts.GetX(p), pts.GetY(p)))
for p in points:
pixels.append(world2Pixel(geoTrans, p[0], p[1]))
rasterPoly = Image.new("L", (pxWidth, pxHeight), 1)
rasterize = ImageDraw.Draw(rasterPoly)
rasterize.polygon(pixels, 0)
mask = imageToArray(rasterPoly)
# Clip the image using the mask
#clip = gdalnumeric.choose(mask, (clip, 0)).astype(gdalnumeric.uint8)
clip = gdalnumeric.choose(mask, (clip, ndv)).astype(gdalnumeric.numpy.float)
# This image has 3 bands so we stretch each one to make them
# visually brighter
#for i in range(3):
# clip[i,:,:] = stretch(clip[i,:,:])
if qs: clip[:,:] = stretch(clip[:,:])
# Save ndvi as tiff
outputi = rname[1]+output+'_'+pnm+'.tif'
#gdalnumeric.SaveArray(clip, outputi, format="GTiff", prototype=srcImage)
driver = gdal.GetDriverByName('GTiff')
DataSet = driver.Create(outputi, pxWidth, pxHeight, 1, gdal.GDT_Float64)
#DataSet = driver.Create(outputi, pxWidth, pxHeight, 1, gdal.GDT_Int32)
DataSet.SetGeoTransform(geoTrans)
Projection = osr.SpatialReference()
Projection.ImportFromWkt(srcImage.GetProjectionRef())
DataSet.SetProjection(Projection.ExportToWkt())
# Write the array
DataSet.GetRasterBand(1).WriteArray(clip)
DataSet.GetRasterBand(1).SetNoDataValue(ndv)
# Save ndvi as an 8-bit jpeg for an easy, quick preview
#clip = clip.astype(gdalnumeric.uint8)
#gdalnumeric.SaveArray(clip, rname[1]+outputi+'.jpg', format="JPEG")
#print '\t\tSaved:', outputi, '-.tif, -.jpg'
print 'Saved:', outputi
del mask, clip, geom
del driver, DataSet
del shapef, srcImage, srcband
This functionality is already incorporated into the gdal command line utilities. Given your case, I don't see any reason why you want to do this yourself in Python.
You can loop over the geometries with OGR and for each one call gdalwarp
with the appropriate parameters.
import ogr
import subprocess
inraster = 'NE1_HR_LC_SR_W_DR\NE1_HR_LC_SR_W_DR.tif'
inshape = '110m_cultural\ne_110m_admin_0_countries_lakes.shp'
ds = ogr.Open(inshape)
lyr = ds.GetLayer(0)
lyr.ResetReading()
ft = lyr.GetNextFeature()
while ft:
country_name = ft.GetFieldAsString('admin')
outraster = inraster.replace('.tif', '_%s.tif' % country_name.replace(' ', '_'))
subprocess.call(['gdalwarp', inraster, outraster, '-cutline', inshape,
'-crop_to_cutline', '-cwhere', "'admin'='%s'" % country_name])
ft = lyr.GetNextFeature()
ds = None
I have used some example data from Natural Earth in the example above, for Brazil the cutout looks like:
If you only want to crop the image to the area of the polygon and dont mask anything outside you can transform the Shapefile so it contains the envelope of the polygons. Or simply loose the shapefile and call gdal_translate
with -projwin
to specify the area of interest.