153 lines
4.6 KiB
Python
153 lines
4.6 KiB
Python
# Copyright 2017 Mycroft AI Inc.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
"""Download utility based on wget.
|
|
|
|
The utility is a real simple implementation leveraging the wget command line
|
|
application supporting resume on failed download.
|
|
"""
|
|
from glob import glob
|
|
import os
|
|
from os.path import exists, dirname
|
|
import subprocess
|
|
from threading import Thread
|
|
|
|
from .file_utils import ensure_directory_exists
|
|
|
|
_running_downloads = {} # Cache of running downloads
|
|
|
|
|
|
def _get_download_tmp(dest):
|
|
"""Get temporary file for download.
|
|
|
|
Arguments:
|
|
dest (str): path to download location
|
|
|
|
Returns:
|
|
(str) path to temporary download location
|
|
"""
|
|
tmp_base = dest + '.part'
|
|
existing = glob(tmp_base + '*')
|
|
if len(existing) > 0:
|
|
return '{}.{}'.format(tmp_base, len(existing))
|
|
else:
|
|
return tmp_base
|
|
|
|
|
|
class Downloader(Thread):
|
|
"""Simple file downloader.
|
|
|
|
Downloader is a thread based downloader instance when instanciated
|
|
it will download the provided url to a file on disk.
|
|
|
|
When the download is complete or failed the `.done` property will
|
|
be set to true and the `.status` will indicate the HTTP status code.
|
|
200 = Success.
|
|
|
|
Arguments:
|
|
url (str): Url to download
|
|
dest (str): Path to save data to
|
|
complete_action (callable): Function to run when download is complete
|
|
`func(dest)`
|
|
header: any special header needed for starting the transfer
|
|
"""
|
|
|
|
def __init__(self, url, dest, complete_action=None, header=None):
|
|
super(Downloader, self).__init__()
|
|
self.url = url
|
|
self.dest = dest
|
|
self.complete_action = complete_action
|
|
self.status = None
|
|
self.done = False
|
|
self._abort = False
|
|
self.header = header
|
|
|
|
# Create directories as needed
|
|
ensure_directory_exists(dirname(dest), permissions=0o775)
|
|
|
|
# Start thread
|
|
self.daemon = True
|
|
self.start()
|
|
|
|
def perform_download(self, dest):
|
|
"""Handle the download through wget.
|
|
|
|
Arguments:
|
|
dest (str): Save location
|
|
"""
|
|
cmd = ['wget', '-c', self.url, '-O', dest,
|
|
'--tries=20', '--read-timeout=5']
|
|
if self.header:
|
|
cmd += ['--header={}'.format(self.header)]
|
|
return subprocess.call(cmd)
|
|
|
|
def run(self):
|
|
"""Do the actual download."""
|
|
tmp = _get_download_tmp(self.dest)
|
|
self.status = self.perform_download(tmp)
|
|
if not self._abort and self.status == 0:
|
|
self.finalize(tmp)
|
|
else:
|
|
self.cleanup(tmp)
|
|
self.done = True
|
|
arg_hash = hash(self.url + self.dest)
|
|
|
|
# Remove from list of currently running downloads
|
|
if arg_hash in _running_downloads:
|
|
_running_downloads.pop(arg_hash)
|
|
|
|
def finalize(self, tmp):
|
|
"""Move temporary download data to final location.
|
|
|
|
Move the .part file to the final destination and perform any
|
|
actions that should be performed at completion.
|
|
|
|
Arguments:
|
|
tmp(str): temporary file path
|
|
"""
|
|
os.rename(tmp, self.dest)
|
|
if self.complete_action:
|
|
self.complete_action(self.dest)
|
|
|
|
def cleanup(self, tmp):
|
|
"""Cleanup after download attempt."""
|
|
if exists(tmp):
|
|
os.remove(self.dest + '.part')
|
|
if self.status == 200:
|
|
self.status = -1
|
|
|
|
def abort(self):
|
|
"""Abort download process."""
|
|
self._abort = True
|
|
|
|
|
|
def download(url, dest, complete_action=None, header=None):
|
|
"""Start a download or fetch an already running.
|
|
|
|
Arguments:
|
|
url (str): url to download
|
|
dest (str): path to save download to
|
|
complete_action (callable): Optional function to call on completion
|
|
header (str): Optional header to use for the download
|
|
|
|
Returns:
|
|
Downloader object
|
|
"""
|
|
global _running_downloads
|
|
arg_hash = hash(url + dest)
|
|
if arg_hash not in _running_downloads:
|
|
_running_downloads[arg_hash] = Downloader(url, dest, complete_action,
|
|
header)
|
|
return _running_downloads[arg_hash]
|