sqlsql-serverdatabase-performancesqlgeographygeographic-distance

SQL Geometry VS decimal(8,6) Lat, Long Performance


I was looking into performance of selecting closest points within certain proximity to given coordinate.

Options are to ether use two decimal(8,6) - lat, long columns or single geography column and work with that.

I am only interested which is faster?


Solution

  • TL;DR Geography is ~10 times faster.

    Ok so I have set up test:

    Couple of tables one with id,lat,long (int, decimal(8,6),decimal(8,6)) other with id,coord (int, geography).

    Then insert 47k of random data.

    For indexing first table I used nonclustered Ascending index on lat,long with fill factor of 95. for second one GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW with fill factor 95.

    CREATE TABLE dbo.Temp
    (
    Id int NOT NULL IDENTITY (1, 1),
    Coord geography NOT NULL
    )  ON [PRIMARY]
     TEXTIMAGE_ON [PRIMARY]
    GO
    ALTER TABLE dbo.Temp ADD CONSTRAINT
        PK_Temp PRIMARY KEY CLUSTERED 
        (
        Id
        ) WITH( STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
    
    GO
    
    
    declare @i int =0
        declare @lat decimal(8,6) =0.0
          declare @long decimal(8,6) =0.0
      while (@i < 47000)
      begin
      set @lat =(select (0.9 -Rand()*1.8)*100)
     set @long =(select (0.9 -Rand()*1.8)*100)
        insert into Temp
      select geography::Point(@lat, @long,4326)
    
    
    set @i =@i+1
    
     end
    
    go
    
    
    CREATE SPATIAL INDEX [SpatialIndex_1] ON [dbo].Temp
    (
        [coord]
    )USING  GEOGRAPHY_GRID 
    WITH (GRIDS =(LEVEL_1 = LOW,LEVEL_2 = MEDIUM,LEVEL_3 = LOW,LEVEL_4 = LOW), 
    CELLS_PER_OBJECT = 16, PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = OFF, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 95) ON [PRIMARY]
    
    GO
    
    CREATE TABLE [dbo].[Temp2](
        [Id] [int] IDENTITY(1,1) NOT NULL,
        [Lat] [decimal](8, 6) NOT NULL,
        [Long] [decimal](8, 6) NOT NULL,
     CONSTRAINT [PK_Temp2] PRIMARY KEY CLUSTERED 
    (
        [Id] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
    ) ON [PRIMARY]
    
    GO
    
    
    declare @i int = 0
    declare @lat decimal(8,6)  = 0 
    declare @long decimal(8,6)  = 0
    
    while (@i < 47000)
    begin
    set @lat = (select (0.9 - (RAND()*1.8))*100)
    set @long = (select (0.9 - (RAND()*1.8))*100)
    
    insert into Temp2
    select @lat , @long
    
    set @i = @i +1
    end
    
    go
    CREATE NONCLUSTERED INDEX [Coord_IX] ON [dbo].[Temp2] 
    (
        [Lat] ASC,
        [Long] ASC
    )WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON, FILLFACTOR = 95) ON [PRIMARY]
    GO
    

    Then I ran couple of tests:

    First is for Lat,Long.

    declare @lat decimal(8,6) = 0.0,
     @lon decimal(8,6) = 0.0,
    @i int = 0,
    @start datetime = getdate()
    
    while(@i < 100)
    begin
    
    set @lat =   (select (0.9 - Rand()*1.8)*100)
    set @lon =  (select (0.9 - (RAND()*1.8))*100.0)
    
    DECLARE @lat_s FLOAT = SIN(@lat * PI() / 180),
            @lat_c FLOAT = COS(@lat * PI() / 180)
    
    
    SELECT DISTINCT top 1000 @lat, @lon, *
    FROM (
        SELECT
            lat,
            long,
            ((ACOS(@lat_s * SIN(lat * PI() / 180) + @lat_c * COS(lat * PI() / 180) * COS((@lon - long) * PI() / 180)) * 180 / PI()) * 60 * 1.1515) AS dist
        FROM dbo.Temp2
    ) t
    ORDER BY dist
    
    set @i= @i+1
    end
    print CONVERT(varchar,(getdate()-@start),108)
    go
    

    Second is for geography.

     DECLARE @g geography;
    
    
       declare @point nvarchar(50)  =''
     declare @i int =0,
         @lat decimal(8,6) =0.0,
           @long decimal(8,6) =0.0,
           @start datetime = getdate()
      while (@i < 100)
      begin
      set @lat =(select (0.9 -Rand()*1.8)*100)
     set @long =(select (0.9 -Rand()*1.8)*100)
     set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' +CONVERT(varchar(10), @long)+')')
     SET @g = geography::STGeomFromText(@point, 4326);
        SELECT TOP 1000
        @lat,
        @long,
            @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
        ,   st.[coord]
        ,   st.id
    FROM    Temp st 
    ORDER BY @g.STDistance(st.[coord]) ASC
    
    set @i =@i+1
    
     end
    print CONVERT(varchar,(getdate()-@start),108)
     go
    

    Results:

    For those who are wondering why geography has such poor performance here's execution plan - notice that it does not use spatial index, and takes ages to sort since row size is 4047 Bytes (against 25 Bytes in decimal). Trying to force index results in runtime error

    enter image description here

    P.S I also did one for flat surface but difference from spherical one is very small ~0.5s (comes back in 9.5-10.0 seconds which does seem to be slightly faster) still to have it all in one place here's script:

    print 'flat'
    declare @lat decimal(8,6) = 0.0,
     @lon decimal(8,6) = 0.0,
    @i int = 0,
    @start datetime = getdate()
    
    while(@i < 100)
    begin
    
    set @lat =   (select (0.9 - Rand()*1.8)*100)
    set @lon =  (select (0.9 - (RAND()*1.8))*100.0)
    
    SELECT DISTINCT top 1000 @lat, @lon, *
    FROM (
        SELECT
            lat,
            long,
            sqrt(power((@lat - lat),2) + (power((@lon - long),2))) AS dist
        FROM dbo.Temp2
    ) t
    
    ORDER BY dist
    
    set @i= @i+1
    end
    print CONVERT(varchar,(getdate()-@start),108)
    go
    

    UPDATE:

    After switching to SQL 2014 and forcing to use the index with 10M records:

    geography script used:

    DECLARE @g geography;
    declare @point nvarchar(50)  =''
    declare @i int =0,
            @lat decimal(8,6) =0.0,
            @long decimal(8,6) =0.0,
            @start datetime = getdate()
    set @lat =(select (0.9 -Rand()*1.8)*100)
    set @long =(select (0.9 -Rand()*1.8)*100)
    set @point = (select 'POINT('+CONVERT(varchar(10), @lat)+ '  ' 
                 +CONVERT(varchar(10), @long)+')')
    SET @g = geography::STGeomFromText(@point, 4326);
    
    SELECT TOP 1000
        @lat,
        @long,
            @g.STDistance(st.[coord]) AS [DistanceFromPoint (in meters)] 
        ,   st.[coord]
        ,   st.id
    FROM    Temp st with(index([SpatialIndex_1]))
    WHERE @g.STDistance(st.[coord])  IS NOT NULL
    ORDER BY @g.STDistance(st.[coord]) asc