Trying to parse a web site with pyqt5.My problem is that a tab created by this page isn't a link but a button creating the tab with a onclick function, so when i use the linkClicked signal nothing happens.The generated tab is the one i want to parse.
All examples i find online opens tabs with links so i cant get throught this.
P.S. I know Qwebkit is predicated but i must use this to be windows xp compatible.
What i have:
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtCore import QUrl
from PyQt5.QtWidgets import QApplication, QWidget
from PyQt5.QtWebKitWidgets import QWebView , QWebPage
from PyQt5 import QtWebKit
from PyQt5.QtWebKit import QWebSettings
from PyQt5.QtNetwork import *
import sys
class MainForm(QtWidgets.QMainWindow):
def __init__(self, parent=None):
super(MainForm, self).__init__(parent)
self.tabWidget = QtWidgets.QTabWidget(self)
self.setCentralWidget(self.tabWidget)
self.loadUrl(QtCore.QUrl('https://www.notams.faa.gov/dinsQueryWeb/'))
def loadUrl(self, url):
self.view = QWebView()
self.view.loadFinished.connect(self._on_load_finished)
self.view.linkClicked.connect(self.on_linkClicked)
self.view.loadStarted.connect(self.on_url_changed)
self.view.page().setLinkDelegationPolicy(QWebPage.DelegateAllLinks)
self.tabWidget.setCurrentIndex(self.tabWidget.addTab(self.view, 'loading...'))
self.view.load(url)
def _on_load_finished(self, ok):
index = self.tabWidget.indexOf(self.sender())
self.tabWidget.setTabText(index, self.sender().url().host())
self.view.page().mainFrame().evaluateJavaScript("document.getElementsByTagName('button')[0].click();")
self.view.page().mainFrame().evaluateJavaScript("document.getElementsByName('retrieveLocId')[0].value='%s';"%('lgel lgal lggg'))
self.view.page().mainFrame().evaluateJavaScript("document.getElementsByName('submit')[0].click();")
def on_linkClicked(self, url):
self.loadUrl(url)
def on_url_changed(self):
print('xdddd')
def main():
app = QtWidgets.QApplication(sys.argv)
form = MainForm()
form.show()
app.exec_()
if __name__ == '__main__':
main()
When a page requests to create a new window, createWindow(WebWindowType)
is called; if the method does not return a new QWeb[Engine]View (or is not implemented, the default) a similar method is called on the view; again, if it's not implemented, nothing happens.
If you want to load the new window in the same one it was requested, just override the WebView and return self
, and enable the support for opening new windows from javascript:
class SelfishWebView(QWebView):
def createWindow(self, windowType):
return self
class MainForm(QtWidgets.QMainWindow):
# ...
def loadUrl(self, url):
self.view = SelfishWebView()
self.view.page().settings().setAttribute(
QWebSettings.JavascriptCanOpenWindows, True)
# ...
Note that if you execute the javascript like that, it will try to recursively call again as soon as the new page is loaded (and, since there is a button in that new page, it will try to click it); you should disconnect the signal as soon as it is loaded the first time.
def _on_load_finished(self, ok):
self.view.loadFinished.disconnect(self._on_load_finished)
# ...