MaxMind's GeoLite2 is a wonderful database and is very useful if you want to map IP addresses to countries.
To do this efficiently, I want to import it into a MySQL database with a scheme like this:
I remember writing an import script for the CSV database long time ago, but the CSV as you can download it today has a very difficult to understand format, at least to me:
network,geoname_id,registered_country_geoname_id,represented_country_geoname_id,is_anonymous_proxy,is_satellite_provider 1.0.0.0/24,2077456,2077456,,0,0 1.0.1.0/24,1814991,1814991,,0,0 1.0.2.0/23,1814991,1814991,,0,0 1.0.4.0/22,2077456,2077456,,0,0 1.0.8.0/21,1814991,1814991,,0,0 1.0.16.0/20,1861060,1861060,,0,0 1.0.32.0/19,1814991,1814991,,0,0 1.0.64.0/18,1861060,1861060,,0,0 1.0.128.0/17,1605651,1605651,,0,0
I'm really stuck at the basics here. What is the most efficient and easiest way to import the database from its CSV representation into MySQL?
It really doesn't seem possible to do this with a simple SQL script, so I've written one in C#. And since importing MySQL databases that are so big is not that simple, I implemented a direct INSERT INTO
into the script itself.
A table structure like the one on the sketch in the question is required for it to work.
using MySql.Data.MySqlClient;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace GeoIPConvert
{
public static class Program
{
public static void Main(string[] args)
{
// https://dev.maxmind.com/geoip/geoip2/geolite2/
List<Country> countries = File.ReadAllLines("Countries.csv")
.Select(line => line.Split(','))
.Where(line => line[4] != "" && line[5] != "")
.Select((line, index) => new Country
{
ID = Convert.ToInt32(line[0]),
DatabaseID = index + 1,
Flag = line[4].ToLower(),
Name = line[5].Replace("\"", "")
})
.ToList();
List<IPRange> ipRanges = File.ReadAllLines("GeoIP.csv")
.Select(line => line.Split(','))
.Where(line => line[2] != "")
.Select(line => new IPRange
{
Country = countries.First(country => country.ID == Convert.ToInt32(line[2])),
From = ConvertCidrToRange(line[0]).Item1,
To = ConvertCidrToRange(line[0]).Item2,
})
.ToList();
//string sql =
// "INSERT INTO geoip_countries(Flag, Name) VALUES\r\n" +
// string.Join(",\r\n", countries.Select(country => "(\"" + country.Flag + "\", \"" + country.Name + "\")").ToArray()) + "\r\n" +
// "INSERT INTO geoip_ipranges(CountryID, `From`, `To`) VALUES\r\n" +
// string.Join(",\r\n", ipRanges.Select(iprange => "(\"" + iprange.Country.DatabaseID + "\", \"" + iprange.From + "\", \"" + iprange.To + "\")").ToArray());
//File.WriteAllText("Import.sql", sql);
using (MySqlConnection sql = new MySqlConnection("Server=localhost;Database=test_db;Uid=root;"))
{
sql.Open();
foreach (Country country in countries)
{
new MySqlCommand("INSERT INTO geoip_countries(Flag, Name) VALUES(\"" + country.Flag + "\", \"" + country.Name + "\")", sql).ExecuteNonQuery();
}
foreach (IPRange ipRange in ipRanges)
{
new MySqlCommand("INSERT INTO geoip_ipranges(CountryID, `From`, `To`) VALUES(\"" + ipRange.Country.DatabaseID + "\", \"" + ipRange.From + "\", \"" + ipRange.To + "\")", sql).ExecuteNonQuery();
Console.WriteLine(ipRange.To);
}
sql.Close();
}
}
private static Tuple<uint, uint> ConvertCidrToRange(string cidr)
{
string[] parts = cidr.Split('.', '/');
uint ipnum = Convert.ToUInt32(parts[0]) << 24 | Convert.ToUInt32(parts[1]) << 16 | Convert.ToUInt32(parts[2]) << 8 | Convert.ToUInt32(parts[3]);
uint mask = uint.MaxValue << (32 - Convert.ToInt32(parts[4]));
return Tuple.Create(ipnum & mask, ipnum | (mask ^ uint.MaxValue));
}
}
public class Country
{
public int ID { get; set; }
public int DatabaseID { get; set; }
public string Flag { get; set; }
public string Name { get; set; }
}
public class IPRange
{
public Country Country { get; set; }
public uint From { get; set; }
public uint To { get; set; }
}
}