Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Add support for custom number of retries and user-agent in save_large_file (#278)

### Fixed

- Add proper typing @overload to `zimscraperlib.image.optimize_xxx` methods (#273)
Expand Down
37 changes: 23 additions & 14 deletions src/zimscraperlib/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,21 +121,30 @@ class BestMp4(YoutubeConfig):
}


def save_large_file(url: str, fpath: pathlib.Path) -> None:
"""download a binary file from its URL, using wget"""
def save_large_file(
url: str, fpath: pathlib.Path, retries: int = 5, user_agent: str | None = None
) -> None:
"""download a binary file from its URL, using wget

Arguments -
url:
"""
command = [
"/usr/bin/env",
"wget",
"-t",
f"{retries}",
"--retry-connrefused",
"--random-wait",
"-O",
str(fpath),
"-c",
url,
]
if user_agent:
command += ["-U", user_agent]
subprocess.run(
[
"/usr/bin/env",
"wget",
"-t",
"5",
"--retry-connrefused",
"--random-wait",
"-O",
str(fpath),
"-c",
url,
],
command,
check=True,
)

Expand Down
22 changes: 22 additions & 0 deletions tests/download/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,28 @@ def test_large_download_https(tmp_path: pathlib.Path, valid_https_url: str):
assert_downloaded_file(valid_https_url, dest_file)


@pytest.mark.slow
def test_large_download_https_custom_retry(
tmp_path: pathlib.Path, valid_https_url: str
):
dest_file = tmp_path / "favicon.ico"
save_large_file(valid_https_url, dest_file, 1)
assert_downloaded_file(valid_https_url, dest_file)


@pytest.mark.slow
def test_large_download_https_custom_ua(tmp_path: pathlib.Path, valid_https_url: str):
dest_file = tmp_path / "favicon.ico"
save_large_file(
valid_https_url,
dest_file,
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36",
)
assert_downloaded_file(valid_https_url, dest_file)


@pytest.mark.slow
@pytest.mark.parametrize(
"url,video_id",
Expand Down