So I've made a command for my discord bot to research a Wikipedia page based on a string :
import requests
import json
import discord
from datetime import datetime, timezone
def getWiki(search):
data = {
'action': 'query',
'format': 'json',
'formatversion': 2,
'inprop': 'url',
'prop': 'extracts|pageimages|revisions|info',
'exintro': True,
'explaintext': True,
'piprop': 'original',
'rvprop': 'timestamp',
'generator': 'search',
'gsrsearch': search
}
response = requests.get('https://en.wikipedia.org/w/api.php', params=data)
jsonData = json.loads(response.text)
pageData = jsonData['query']['pages'][0]
title = pageData['title']
summary = pageData['extract']
pageUrl = pageData.get('fullurl', '')
timestamp = pageData['revisions'][0]['timestamp']
timestamp = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%SZ")
timestamp = timestamp.replace(tzinfo=timezone.utc)
embed = discord.Embed(description=summary, timestamp= timestamp, colour=0xeeeeee)
embed.set_author(name=title, url=pageUrl, icon_url='https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Wikipedia_logo_v3.svg/2048px-Wikipedia_logo_v3.svg.png')
embed.set_footer(text='Dernière révision')
wikiObject = {
'title': title,
'summary': summary,
'embed': embed
}
if pageData.get('original'):
pageImage = pageData['original']
if pageImage.get('source'):
imageUrl = pageImage['source']
embed.set_thumbnail(url=imageUrl)
wikiObject['imageUrl'] = imageUrl
return wikiObject
However, the results are very strange. For example, when searching for Kim Kardashian (which is the exact name of her page) it gives me the page of Robert Kardashian... Is there something I can do about it ?
jsonData['query']['pages']
is ordered by page ID, not by search relevance. This is a limitation of using generators. You need to use the index
property to find the actual first search result:
results = jsonData['query']['pages']
results.sort(key = lambda x: x.get('index'))
pageData = results[0]