import http.client
import urllib.parse
def unshorten_url(url):
parsed = urllib.parse.urlparse(url)
h = http.client.HTTPConnection(parsed.netloc)
resource = parsed.path
if parsed.query != "":
resource += "?" + parsed.query
h.request('HEAD', resource )
response = h.getresponse()
if response.status/100 == 3 and response.getheader('Location'):
return unshorten_url(response.getheader('Location')) # changed to process chains of short urls
else:
return url
unshorten_url("http://data.europa.eu/esco/occupation/00030d09-2b3a-4efd-87cc-c4ea39d27c34")
Input will be : http://data.europa.eu/esco/occupation/00030d09-2b3a-4efd-87cc-c4ea39d27c34 #yes the same is returned.'
Output URL after unshorten which i need : https://ec.europa.eu/esco/portal/occupation?uri=http%3A%2F%2Fdata.europa.eu%2Fesco%2Foccupation%2F00030d09-2b3a-4efd-87cc-c4ea39d27c34&conceptLanguage=en&full=true#&uri=http://data.europa.eu/esco/occupation/00030d09-2b3a-4efd-87cc-c4ea39d27c34'
As you can see I have two URLs one which Short URL which is my input and The other one is Full URL, to achieve the required output URL I identified a pattern from a set of the same kind URLs. And I wrote this code and achieved the required output.
my_url = "http://data.europa.eu/esco/occupation/00030d09-2b3a-4efd-87cc-c4ea39d27c34"
a="https://ec.europa.eu/esco/portal/occupationuri=http%3A%2F%2Fdata.europa.eu%2Fesco%2Foccupation%2F"
b = my_url.split("/")[-1]
URL = a+ b+ "&conceptLanguage=en&full=true#&uri=" + my_url
the output i.e; Required full URL is URL. URL = " https://ec.europa.eu/esco/portal/occupation?uri=http%3A%2F%2Fdata.europa.eu%2Fesco%2Foccupation%2F00030d09-2b3a-4efd-87cc-c4ea39d27c34&conceptLanguage=en&full=true#&uri=http://data.europa.eu/esco/occupation/00030d09-2b3a-4efd-87cc-c4ea39d27c34'"