diff --git a/lookupdns.py b/lookupdns.py new file mode 100644 index 0000000..d44bdda --- /dev/null +++ b/lookupdns.py @@ -0,0 +1,79 @@ +#!/usr/local/bin/python3.sh +# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -* + +import sys +import os +import traceback + +from phantompy import Render + +global LOG +import logging +import warnings +warnings.filterwarnings('ignore') +LOG = logging.getLogger() + +class LookFor(Render): + + def __init__(self, url, outfile, jsfile=None): + self.uri = url + Render.__init__(self, url, outfile, jsfile) + + def ilookfor(self, html): + import json + marker = '
' + if marker not in html: return '' + i = html.find(marker) + len(marker) + html = html[i:] + assert html[0] == '{', html + i = html.find('
foo foo foo
+ + + + +... and the following file /tmp/test.js: + + document.getElementById('id2').innerHTML = "baz"; + console.log("__PHANTOM_PY_DONE__"); + +... and running this script (without attached display) ... + + xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js + +... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz". + +Note that the second occurrence of "foo" has been replaced by the web page's own +script, and the third occurrence of "foo" by the external JS file. + + +## License + +Copyright 2017 Michael Karl Franzl + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +import sys +import os +import traceback +import atexit +from PyQt5.QtCore import QUrl +from PyQt5.QtCore import QTimer +from PyQt5.QtWidgets import QApplication +from PyQt5.QtPrintSupport import QPrinter +from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage + +global LOG +import logging +import warnings +warnings.filterwarnings('ignore') +LOG = logging.getLogger() + +def prepare(): + sfile = '/tmp/test.js' + if not os.path.exists(sfile): + with open(sfile, 'wt') as ofd: + ofd.write(""" + document.getElementById('id2').innerHTML = "baz"; + console.log("__PHANTOM_PY_DONE__"); +""") + sys.stderr.write(f"wrote {sfile} ") + sfile = '/tmp/test.html' + if not os.path.exists(sfile): + with open(sfile, 'wt') as ofd: + ofd.write(""" + + +foo foo foo
+ + + +""") + sys.stderr.write(f"wrote {sfile} ") + sys.stderr.write("\n") + +class Render(QWebPage): + def __init__(self, url, outfile, jsfile=None): + self.app = QApplication(sys.argv) + + QWebPage.__init__(self) + + self.jsfile = jsfile + self.outfile = outfile + + qurl = QUrl.fromUserInput(url) + + LOG.debug(f"phantom.py: URL= {qurl} OUTFILE={outfile} JSFILE= {jsfile)") + + # The PDF generation only happens when the special string __PHANTOM_PY_DONE__ + # is sent to console.log(). The following JS string will be executed by + # default, when no external JavaScript file is specified. + self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);"; + + if jsfile: + try: + f = open(self.jsfile) + self.js_contents = f.read() + f.close() + except: + LOG.error(traceback.format_exc()) + self._exit(10) + + self.loadFinished.connect(self._loadFinished) + self.load(qurl) + self.javaScriptConsoleMessage = self._onConsoleMessage + + if False: + # Run for a maximum of 10 seconds + watchdog = QTimer() + watchdog.setSingleShot(True) + watchdog.timeout.connect(lambda: self._exit(9)) + watchdog.start(10000) + + self.app.exec_() + + def _onConsoleMessage(self, *args): + if len(args) > 3: + level, txt, lineno, filename = args + else: + level = 1 + txt, lineno, filename = args + LOG.debug(f"CONSOLE {lineno} {txt} {filename}") + if "__PHANTOM_PY_DONE__" in txt: + # If we get this magic string, it means that the external JS is done + self._print() + if "__PHANTOM_PY_EXIT__" in txt: + self._exit(0) + + def _loadFinished(self, result): + LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}") + self.runJavaScript("document.documentElement.contentEditable=true") + self.runJavaScript(self.js_contents) + + def _printer_callback(self, *args): + """print(self, QPrinter, Callable[[bool], None])""" + # print(f"_printer_callback {self.outfile} {args}") + if args[0] is False: + i = 1 + else: + i = 0 + self._exit(i) + + def _print(self): + printer = QPrinter() + printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter) + printer.setPaperSize(QPrinter.A4) + printer.setCreator("phantom.py by Michael Karl Franzl") + printer.setOutputFormat(QPrinter.PdfFormat); + printer.setOutputFileName(self.outfile); + self.print(printer, self._printer_callback) + LOG.debug("phantom.py: Printed") + + def _exit(self, val): + LOG.debug(f"phantom.py: Exiting with val {val}") + + # Run for a maximum of 10 seconds + watchdog = QTimer() + watchdog.setSingleShot(True) + watchdog.timeout.connect(lambda: sys.exit(val)) + watchdog.start(10000) + self.app.exit(val) + atexit._clear() + sys.exit(val) + +def main(): + if (len(sys.argv) < 3): + LOG.info("USAGE: ./phantom.py