# This file is part of the LibreOffice project. # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/.
from bs4 import BeautifulSoup from attachment_mimetypes import mimetypes from concurrent.futures import ThreadPoolExecutor, as_completed from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry
soup = BeautifulSoup(resp.content, 'lxml') for p in soup.find_all("p"): if'Thank you for logging in'in p.get_text(): returnTrue elif'Danke für Ihre Anmeldung'in p.get_text(): returnTrue
# Keep the index and resume from there
indexFile = os.path.join(args.outdir, forum + ".index") if os.path.isfile(indexFile): with open(indexFile) as f:
startIndex = int(f.readline().rstrip()) + 1
session = createSession()
if doLogin: ifnot login(session, url, args.config):
print("Can't log in to " + url) return
invalidCount = 0 for i in range(startIndex, 999999):
fileUrl = url + get_attachment_query(forum) + str(i)
h = session.head(fileUrl)
header = h.headers
content_type = header.get('content-type') if"html"in content_type: # Let's assume this is an invalid file link
invalidCount += 1
# Let's assume, if we get 200 invalid files, that there are no more files if invalidCount == 200:
print("No more attachments found in " + url) break else:
invalidCount = 0
r = session.get(fileUrl, allow_redirects=True) with tempfile.NamedTemporaryFile() as tmp:
tmp.write(r.content)
mimetype = magic.from_file(tmp.name, mime=True) if mimetype in mimetypes:
suffix = mimetypes[mimetype]
suffixDir = os.path.join(args.outdir, suffix) try:
os.mkdir(suffixDir) except Exception: pass
ifnot args.fileName:
processes = [] # by default, 10 at a time seems to work fine with ThreadPoolExecutor(max_workers=int(os.environ.get('PARALLELISM', 10))) as executor: for forum, config in forums.items():
processes.append(executor.submit(get_attachments_from_url, forum, config, args))
for task in as_completed(processes):
result = task.result() if result:
print(result) else:
fileNameSplit = args.fileName.split("-") if fileNameSplit[0] != "forum"or (len(fileNameSplit) != 3 and len(fileNameSplit) != 4):
print("Incorrect file name")
sys.exit(1)
forum = fileNameSplit[1]
fileId = fileNameSplit[2] if fileNameSplit[1] == "mso":
forum += "-" + fileNameSplit[2]
fileId = fileNameSplit[3]
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.