Import python venv for stability

This commit is contained in:
2026-02-15 21:24:16 -08:00
parent 1343e93a59
commit 7d784705c9
4997 changed files with 1628270 additions and 0 deletions
@@ -0,0 +1,96 @@
__all__ = [
"Curl",
"AsyncCurl",
"CurlMime",
"CurlError",
"CurlInfo",
"CurlOpt",
"CurlMOpt",
"CurlECode",
"CurlHttpVersion",
"CurlSslVersion",
"CurlWsFlag",
"config_warnings",
"ffi",
"is_pro",
"lib",
"Session",
"AsyncSession",
"BrowserType",
"BrowserTypeLiteral",
"request",
"head",
"get",
"post",
"put",
"patch",
"delete",
"options",
"Cookies",
"Headers",
"Request",
"Response",
"AsyncWebSocket",
"WebSocket",
"WebSocketError",
"WebSocketClosed",
"WebSocketTimeout",
"WsCloseCode",
"ExtraFingerprints",
"CookieTypes",
"HeaderTypes",
"ProxySpec",
"exceptions",
]
import _cffi_backend # noqa: F401 # required by _wrapper
from .__version__ import __curl_version__, __description__, __title__, __version__ # noqa: F401
# This line includes _wrapper.so into the wheel
from ._wrapper import ffi, lib
from .aio import AsyncCurl
from .const import (
CurlECode,
CurlHttpVersion,
CurlInfo,
CurlMOpt,
CurlOpt,
CurlSslVersion,
CurlWsFlag,
)
from .curl import Curl, CurlError, CurlMime
from .requests import (
AsyncSession,
AsyncWebSocket,
BrowserType,
BrowserTypeLiteral,
Cookies,
CookieTypes,
ExtraFingerprints,
Headers,
HeaderTypes,
ProxySpec,
Request,
Response,
Session,
WebSocket,
WebSocketClosed,
WebSocketError,
WebSocketTimeout,
WsCloseCode,
delete,
exceptions,
get,
head,
options,
patch,
post,
put,
request,
)
from .utils import config_warnings, is_pro
config_warnings(on=False)
@@ -0,0 +1,8 @@
from importlib import metadata
from .curl import Curl
__title__ = "curl_cffi"
__description__ = metadata.metadata("curl_cffi")["Summary"]
__version__ = metadata.version("curl_cffi")
__curl_version__ = Curl().version().decode()
@@ -0,0 +1,344 @@
"""Ensure asyncio selector methods (add_reader, etc.) are available
tornado 6.1 adds AddThreadSelectorEventLoop event loop,
running select in a thread and defining these methods on the running event loop.
This factors out the functionality of AddThreadSelectorEventLoop
into a standalone SelectorThread object which can be attached to any running event loop.
Vendored from tornado v6.4.0
Redistributed under license Apache-2.0
"""
import asyncio
import atexit
import errno
import functools
import select
import socket
import threading
import typing
from contextlib import suppress
from typing import (
Any,
Callable,
Optional,
Protocol,
TypeVar,
Union,
)
_T = TypeVar("_T")
class _HasFileno(Protocol):
def fileno(self) -> int:
return 0
_FileDescriptorLike = Union[int, _HasFileno]
# Collection of selector thread event loops to shut down on exit.
_selector_loops: set["SelectorThread"] = set()
def _atexit_callback() -> None:
for loop in _selector_loops:
with loop._select_cond:
loop._closing_selector = True
loop._select_cond.notify()
with suppress(BlockingIOError):
loop._waker_w.send(b"a")
if loop._thread is not None:
# If we don't join our (daemon) thread here, we may get a deadlock
# during interpreter shutdown. I don't really understand why. This
# deadlock happens every time in CI (both travis and appveyor) but
# I've never been able to reproduce locally.
loop._thread.join()
_selector_loops.clear()
atexit.register(_atexit_callback)
class SelectorThread:
"""Define ``add_reader`` methods to be called in a background select thread.
Instances of this class start a second thread to run a selector.
This thread is completely hidden from the user;
all callbacks are run on the wrapped event loop's thread.
Typically used via ``AddThreadSelectorEventLoop``,
but can be attached to a running asyncio loop.
"""
_closed = False
def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None:
self._real_loop = real_loop
self._select_cond = threading.Condition()
self._select_args: Optional[
tuple[list[_FileDescriptorLike], list[_FileDescriptorLike]]
] = None
self._closing_selector = False
self._thread: Optional[threading.Thread] = None
self._thread_manager_handle = self._thread_manager()
async def thread_manager_anext() -> None:
# the anext builtin wasn't added until 3.10. We just need to iterate
# this generator one step.
await self._thread_manager_handle.__anext__()
# When the loop starts, start the thread. Not too soon because we can't
# clean up if we get to this point but the event loop is closed without
# starting.
self._real_loop.call_soon(
lambda: self._real_loop.create_task(thread_manager_anext())
)
self._readers: dict[_FileDescriptorLike, Callable] = {}
self._writers: dict[_FileDescriptorLike, Callable] = {}
# Writing to _waker_w will wake up the selector thread, which
# watches for _waker_r to be readable.
self._waker_r, self._waker_w = socket.socketpair()
self._waker_r.setblocking(False)
self._waker_w.setblocking(False)
_selector_loops.add(self)
self.add_reader(self._waker_r, self._consume_waker)
def close(self) -> None:
if self._closed:
return
with self._select_cond:
self._closing_selector = True
self._select_cond.notify()
self._wake_selector()
if self._thread is not None:
self._thread.join()
_selector_loops.discard(self)
self.remove_reader(self._waker_r)
self._waker_r.close()
self._waker_w.close()
self._closed = True
async def _thread_manager(self) -> typing.AsyncGenerator[None, None]:
# Create a thread to run the select system call. We manage this thread
# manually so we can trigger a clean shutdown from an atexit hook. Note
# that due to the order of operations at shutdown, only daemon threads
# can be shut down in this way (non-daemon threads would require the
# introduction of a new hook: https://bugs.python.org/issue41962)
self._thread = threading.Thread(
name="Tornado selector",
daemon=True,
target=self._run_select,
)
self._thread.start()
self._start_select()
try:
# The presense of this yield statement means that this coroutine
# is actually an asynchronous generator, which has a special
# shutdown protocol. We wait at this yield point until the
# event loop's shutdown_asyncgens method is called, at which point
# we will get a GeneratorExit exception and can shut down the
# selector thread.
yield
except GeneratorExit:
self.close()
raise
def _wake_selector(self) -> None:
if self._closed:
return
with suppress(BlockingIOError):
self._waker_w.send(b"a")
def _consume_waker(self) -> None:
with suppress(BlockingIOError):
self._waker_r.recv(1024)
def _start_select(self) -> None:
# Capture reader and writer sets here in the event loop
# thread to avoid any problems with concurrent
# modification while the select loop uses them.
with self._select_cond:
assert self._select_args is None
self._select_args = (list(self._readers.keys()), list(self._writers.keys()))
self._select_cond.notify()
def _run_select(self) -> None:
while True:
with self._select_cond:
while self._select_args is None and not self._closing_selector:
self._select_cond.wait()
if self._closing_selector:
return
assert self._select_args is not None
to_read, to_write = self._select_args
self._select_args = None
# We use the simpler interface of the select module instead of
# the more stateful interface in the selectors module because
# this class is only intended for use on windows, where
# select.select is the only option. The selector interface
# does not have well-documented thread-safety semantics that
# we can rely on so ensuring proper synchronization would be
# tricky.
try:
# On windows, selecting on a socket for write will not
# return the socket when there is an error (but selecting
# for reads works). Also select for errors when selecting
# for writes, and merge the results.
#
# This pattern is also used in
# https://github.com/python/cpython/blob/v3.8.0/Lib/selectors.py#L312-L317
rs, ws, xs = select.select(to_read, to_write, to_write)
ws = ws + xs
except OSError as e:
# After remove_reader or remove_writer is called, the file
# descriptor may subsequently be closed on the event loop
# thread. It's possible that this select thread hasn't
# gotten into the select system call by the time that
# happens in which case (at least on macOS), select may
# raise a "bad file descriptor" error. If we get that
# error, check and see if we're also being woken up by
# polling the waker alone. If we are, just return to the
# event loop and we'll get the updated set of file
# descriptors on the next iteration. Otherwise, raise the
# original error.
if e.errno == getattr(errno, "WSAENOTSOCK", errno.EBADF):
rs, _, _ = select.select([self._waker_r.fileno()], [], [], 0)
if rs:
ws = []
else:
raise
else:
raise
try:
self._real_loop.call_soon_threadsafe(self._handle_select, rs, ws)
except RuntimeError:
# "Event loop is closed". Swallow the exception for
# consistency with PollIOLoop (and logical consistency
# with the fact that we can't guarantee that an
# add_callback that completes without error will
# eventually execute).
pass
except AttributeError:
# ProactorEventLoop may raise this instead of RuntimeError
# if call_soon_threadsafe races with a call to close().
# Swallow it too for consistency.
pass
def _handle_select(
self, rs: list[_FileDescriptorLike], ws: list[_FileDescriptorLike]
) -> None:
for r in rs:
self._handle_event(r, self._readers)
for w in ws:
self._handle_event(w, self._writers)
self._start_select()
def _handle_event(
self,
fd: _FileDescriptorLike,
cb_map: dict[_FileDescriptorLike, Callable],
) -> None:
try:
callback = cb_map[fd]
except KeyError:
return
callback()
def add_reader(
self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any
) -> None:
self._readers[fd] = functools.partial(callback, *args)
self._wake_selector()
def add_writer(
self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any
) -> None:
self._writers[fd] = functools.partial(callback, *args)
self._wake_selector()
def remove_reader(self, fd: _FileDescriptorLike) -> bool:
try:
del self._readers[fd]
except KeyError:
return False
self._wake_selector()
return True
def remove_writer(self, fd: _FileDescriptorLike) -> bool:
try:
del self._writers[fd]
except KeyError:
return False
self._wake_selector()
return True
class AddThreadSelectorEventLoop(asyncio.AbstractEventLoop):
"""Wrap an event loop to add implementations of the ``add_reader`` method family.
Instances of this class start a second thread to run a selector.
This thread is completely hidden from the user; all callbacks are
run on the wrapped event loop's thread.
This class is used automatically by Tornado; applications should not need
to refer to it directly.
It is safe to wrap any event loop with this class, although it only makes sense
for event loops that do not implement the ``add_reader`` family of methods
themselves (i.e. ``WindowsProactorEventLoop``)
Closing the ``AddThreadSelectorEventLoop`` also closes the wrapped event loop.
"""
# This class is a __getattribute__-based proxy. All attributes other than those
# in this set are proxied through to the underlying loop.
MY_ATTRIBUTES = {
"_real_loop",
"_selector",
"add_reader",
"add_writer",
"close",
"remove_reader",
"remove_writer",
}
def __getattribute__(self, name: str) -> Any:
if name in AddThreadSelectorEventLoop.MY_ATTRIBUTES:
return super().__getattribute__(name)
return getattr(self._real_loop, name)
def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None:
self._real_loop = real_loop
self._selector = SelectorThread(real_loop)
def close(self) -> None:
self._selector.close()
self._real_loop.close()
def add_reader( # type: ignore
self,
fd: "_FileDescriptorLike",
callback: Callable[..., None],
*args: Any,
) -> None:
return self._selector.add_reader(fd, callback, *args)
def add_writer( # type: ignore
self,
fd: "_FileDescriptorLike",
callback: Callable[..., None],
*args: Any, # type: ignore
) -> None:
return self._selector.add_writer(fd, callback, *args)
def remove_reader(self, fd: "_FileDescriptorLike") -> bool:
return self._selector.remove_reader(fd)
def remove_writer(self, fd: "_FileDescriptorLike") -> bool:
return self._selector.remove_writer(fd)
@@ -0,0 +1,343 @@
import asyncio
import sys
import warnings
from contextlib import suppress
from typing import Any, Optional
from weakref import WeakKeyDictionary
from ._wrapper import ffi, lib
from .const import CurlECode, CurlMOpt
from .curl import DEFAULT_CACERT, Curl, CurlError
from .utils import CurlCffiWarning
__all__ = ["AsyncCurl"]
if sys.platform == "win32":
# registry of asyncio loop : selector thread
_selectors: WeakKeyDictionary = WeakKeyDictionary()
PROACTOR_WARNING = """
Proactor event loop does not implement add_reader family of methods required.
Registering an additional selector thread for add_reader support.
To avoid this warning use:
asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy())
"""
def get_selector(
asyncio_loop: asyncio.AbstractEventLoop,
) -> asyncio.AbstractEventLoop:
"""Get selector-compatible loop
Returns an object with ``add_reader`` family of methods,
either the loop itself or a SelectorThread instance.
Workaround Windows proactor removal of *reader methods.
"""
if asyncio_loop in _selectors:
return _selectors[asyncio_loop]
if not isinstance(
asyncio_loop, getattr(asyncio, "ProactorEventLoop", type(None))
):
return asyncio_loop
warnings.warn(PROACTOR_WARNING, CurlCffiWarning, stacklevel=2)
from ._asyncio_selector import AddThreadSelectorEventLoop
selector_loop = _selectors[asyncio_loop] = AddThreadSelectorEventLoop(
asyncio_loop
) # type: ignore
# patch loop.close to also close the selector thread
loop_close = asyncio_loop.close
def _close_selector_and_loop():
# restore original before calling selector.close,
# which in turn calls eventloop.close!
asyncio_loop.close = loop_close
_selectors.pop(asyncio_loop, None)
selector_loop.close()
asyncio_loop.close = _close_selector_and_loop
return selector_loop
else:
def get_selector(loop: asyncio.AbstractEventLoop) -> asyncio.AbstractEventLoop:
return loop
CURL_POLL_NONE = 0
CURL_POLL_IN = 1
CURL_POLL_OUT = 2
CURL_POLL_INOUT = 3
CURL_POLL_REMOVE = 4
CURL_SOCKET_TIMEOUT = -1
CURL_SOCKET_BAD = -1
CURL_CSELECT_IN = 0x01
CURL_CSELECT_OUT = 0x02
CURL_CSELECT_ERR = 0x04
CURLMSG_DONE = 1
CURLPIPE_NOTHING = 0
CURLPIPE_HTTP1 = 1 # deprecated
CURLPIPE_MULTIPLEX = 2
"""
libcurl provides an event-based system for multiple handles with the following API:
- curl_multi_socket_action, for detecting events
- curl_multi_info_read, for reading the transfer status
There are 2 callbacks:
- socket_function, set by CURLMOPT_SOCKETFUNCTION, will be called for socket events.
- timer_function, set by CURLMOPT_TIMERFUNCTION, will be called when timeouts happen.
And it works like the following:
Set up handles, callbacks first.
When started, curl_multi_socket_action should be called to start everything.
If there are data in/out, libcurl calls the socket_function callback, and it sets up
`process_data` as asyncio loop reader/writer function. `process_data` will call
curl_multi_info_read to determine whether a certain `await perform` has finished.
When idle, libcurl will call the timer_function callback, which sets up a later call
for socket_action to detect events.
"""
@ffi.def_extern()
def timer_function(curlm, timeout_ms: int, clientp: Any) -> int:
"""
see: https://curl.se/libcurl/c/CURLMOPT_TIMERFUNCTION.html
"""
async_curl = ffi.from_handle(clientp)
# Cancel the timer anyway, if it's -1, yes, libcurl says it should be cancelled.
# If not, to add a new timer, we need to cancel the old timer.
if async_curl._timer:
async_curl._timer.cancel() # If already called, cancel does nothing.
async_curl._timer = None
# libcurl says to install a timer which calls socket_action on fire.
async_curl._timer = async_curl.loop.call_later(
timeout_ms / 1000,
async_curl.process_data,
CURL_SOCKET_TIMEOUT, # -1
CURL_POLL_NONE, # 0
)
return 0
@ffi.def_extern()
def socket_function(curl, sockfd: int, what: int, clientp: Any, data: Any) -> int:
"""This callback is called when libcurl decides it's time to interact with certain
sockets"""
async_curl = ffi.from_handle(clientp)
loop = async_curl.loop
# Always remove and re-add fds
if sockfd in async_curl._sockfds:
loop.remove_reader(sockfd)
loop.remove_writer(sockfd)
# Need to read from the socket
if what & CURL_POLL_IN:
loop.add_reader(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_IN)
async_curl._sockfds.add(sockfd)
# Need to write to the socket
if what & CURL_POLL_OUT:
loop.add_writer(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_OUT)
async_curl._sockfds.add(sockfd)
# Need to remove the socket
if what == CURL_POLL_REMOVE:
async_curl._sockfds.remove(sockfd)
return 0
class AsyncCurl:
"""Wrapper around curl_multi handle to provide asyncio support. It uses the libcurl
socket_action APIs."""
def __init__(self, cacert: str = "", loop=None):
"""
Parameters:
cacert: CA cert path to use, by default, certs from ``certifi`` are used.
loop: EventLoop to use.
"""
self._curlm = lib.curl_multi_init()
self._cacert = cacert or DEFAULT_CACERT
self._curl2future: dict[Curl, asyncio.Future] = {} # curl to future map
self._curl2curl: dict[ffi.CData, Curl] = {} # c curl to Curl
self._sockfds: set[int] = set() # sockfds
self.loop = get_selector(
loop if loop is not None else asyncio.get_running_loop()
)
self._timeout_checker = self.loop.create_task(self._force_timeout())
self._timer: Optional[asyncio.TimerHandle] = None
self._setup()
def _setup(self):
self.setopt(CurlMOpt.TIMERFUNCTION, lib.timer_function)
self.setopt(CurlMOpt.SOCKETFUNCTION, lib.socket_function)
self._self_handle = ffi.new_handle(self)
self.setopt(CurlMOpt.SOCKETDATA, self._self_handle)
self.setopt(CurlMOpt.TIMERDATA, self._self_handle)
# self.setopt(CurlMOpt.PIPELINING, CURLPIPE_NOTHING)
async def close(self):
"""Close and cleanup running timers, readers, writers and handles."""
# Close and wait for the force timeout checker to complete
self._timeout_checker.cancel()
with suppress(asyncio.CancelledError):
await self._timeout_checker
# Close all pending futures
for curl, future in self._curl2future.items():
lib.curl_multi_remove_handle(self._curlm, curl._curl)
if not future.done() and not future.cancelled():
future.set_result(None)
# Cleanup curl_multi handle
lib.curl_multi_cleanup(self._curlm)
self._curlm = None
# Remove add readers and writers
for sockfd in self._sockfds:
self.loop.remove_reader(sockfd)
self.loop.remove_writer(sockfd)
# Cancel all time functions
if self._timer:
self._timer.cancel()
async def _force_timeout(self):
"""This coroutine is used to safeguard from any missing signals from curl, and
put everything back on track"""
while True:
if not self._curlm:
break
self.socket_action(CURL_SOCKET_TIMEOUT, CURL_POLL_NONE)
await asyncio.sleep(0.1)
def add_handle(self, curl: Curl):
"""Add a curl handle to be managed by curl_multi. This is the equivalent of
`perform` in the async world."""
curl._ensure_cacert()
errcode = lib.curl_multi_add_handle(self._curlm, curl._curl)
self._check_error(errcode)
future = self.loop.create_future()
self._curl2future[curl] = future
self._curl2curl[curl._curl] = curl
return future
def socket_action(self, sockfd: int, ev_bitmask: int) -> int:
"""wrapper for curl_multi_socket_action,
returns the number of running curl handles."""
running_handle = ffi.new("int *")
errcode = lib.curl_multi_socket_action(
self._curlm, sockfd, ev_bitmask, running_handle
)
self._check_error(errcode)
return running_handle[0]
def process_data(self, sockfd: int, ev_bitmask: int):
"""Call curl_multi_info_read to read data for given socket."""
if not self._curlm:
warnings.warn(
"Curlm already closed! quitting from process_data",
CurlCffiWarning,
stacklevel=2,
)
return
self.socket_action(sockfd, ev_bitmask)
msg_in_queue = ffi.new("int *")
while True:
try:
curl_msg = lib.curl_multi_info_read(self._curlm, msg_in_queue)
# NULL is returned as a signal that no more to be get at this point
if curl_msg == ffi.NULL:
break
if curl_msg.msg == CURLMSG_DONE:
curl = self._curl2curl[curl_msg.easy_handle]
retcode = curl_msg.data.result
if retcode == 0:
self.set_result(curl)
else:
self.set_exception(curl, curl._get_error(retcode, "perform"))
else:
print("NOT DONE") # Will not reach, for nothing else being defined.
except Exception:
warnings.warn(
"Unexpected curl multi state in process_data, "
"please open an issue on GitHub\n",
CurlCffiWarning,
stacklevel=2,
)
def _pop_future(self, curl: Curl):
errcode = lib.curl_multi_remove_handle(self._curlm, curl._curl)
self._check_error(errcode)
self._curl2curl.pop(curl._curl, None)
return self._curl2future.pop(curl, None)
def remove_handle(self, curl: Curl):
"""Cancel a future for given curl handle."""
future = self._pop_future(curl)
if future and not future.done() and not future.cancelled():
future.cancel()
def set_result(self, curl: Curl):
"""Mark a future as done for given curl handle."""
future = self._pop_future(curl)
if future and not future.done() and not future.cancelled():
future.set_result(None)
def set_exception(self, curl: Curl, exception):
"""Raise exception of a future for given curl handle."""
future = self._pop_future(curl)
if future and not future.done() and not future.cancelled():
future.set_exception(exception)
def _check_error(self, errcode: int, *args: Any):
if errcode == CurlECode.OK:
return
errmsg = lib.curl_multi_strerror(errcode)
action = " ".join([str(a) for a in args])
raise CurlError(
f"Failed in {action}, multi: ({errcode}) {errmsg}. "
"See https://curl.se/libcurl/c/libcurl-errors.html first for more "
"details. Please open an issue on GitHub to help debug this error.",
)
def setopt(self, option, value):
"""Wrapper around curl_multi_setopt."""
if option in (
CurlMOpt.PIPELINING,
CurlMOpt.MAXCONNECTS,
CurlMOpt.MAX_HOST_CONNECTIONS,
CurlMOpt.MAX_PIPELINE_LENGTH,
CurlMOpt.MAX_TOTAL_CONNECTIONS,
CurlMOpt.MAX_CONCURRENT_STREAMS,
):
c_value = ffi.new("long*", value)
else:
c_value = value
return lib.curl_multi_setopt(self._curlm, option, c_value)
@@ -0,0 +1,613 @@
# This file is automatically generated, do not modify it directly.
from enum import IntEnum
class CurlOpt(IntEnum):
"""``CULROPT_`` constancs extracted from libcurl,
see: https://curl.se/libcurl/c/curl_easy_setopt.html"""
WRITEDATA = 10000 + 1
URL = 10000 + 2
PORT = 0 + 3
PROXY = 10000 + 4
USERPWD = 10000 + 5
PROXYUSERPWD = 10000 + 6
RANGE = 10000 + 7
READDATA = 10000 + 9
ERRORBUFFER = 10000 + 10
WRITEFUNCTION = 20000 + 11
READFUNCTION = 20000 + 12
TIMEOUT = 0 + 13
INFILESIZE = 0 + 14
POSTFIELDS = 10000 + 15
REFERER = 10000 + 16
FTPPORT = 10000 + 17
USERAGENT = 10000 + 18
LOW_SPEED_LIMIT = 0 + 19
LOW_SPEED_TIME = 0 + 20
RESUME_FROM = 0 + 21
COOKIE = 10000 + 22
HTTPHEADER = 10000 + 23
HTTPPOST = 10000 + 24
SSLCERT = 10000 + 25
KEYPASSWD = 10000 + 26
CRLF = 0 + 27
QUOTE = 10000 + 28
HEADERDATA = 10000 + 29
COOKIEFILE = 10000 + 31
SSLVERSION = 0 + 32
TIMECONDITION = 0 + 33
TIMEVALUE = 0 + 34
CUSTOMREQUEST = 10000 + 36
STDERR = 10000 + 37
POSTQUOTE = 10000 + 39
VERBOSE = 0 + 41
HEADER = 0 + 42
NOPROGRESS = 0 + 43
NOBODY = 0 + 44
FAILONERROR = 0 + 45
UPLOAD = 0 + 46
POST = 0 + 47
DIRLISTONLY = 0 + 48
APPEND = 0 + 50
NETRC = 0 + 51
FOLLOWLOCATION = 0 + 52
TRANSFERTEXT = 0 + 53
PUT = 0 + 54
PROGRESSFUNCTION = 20000 + 56
XFERINFODATA = 10000 + 57
AUTOREFERER = 0 + 58
PROXYPORT = 0 + 59
POSTFIELDSIZE = 0 + 60
HTTPPROXYTUNNEL = 0 + 61
INTERFACE = 10000 + 62
KRBLEVEL = 10000 + 63
SSL_VERIFYPEER = 0 + 64
CAINFO = 10000 + 65
MAXREDIRS = 0 + 68
FILETIME = 0 + 69
TELNETOPTIONS = 10000 + 70
MAXCONNECTS = 0 + 71
FRESH_CONNECT = 0 + 74
FORBID_REUSE = 0 + 75
RANDOM_FILE = 10000 + 76
EGDSOCKET = 10000 + 77
CONNECTTIMEOUT = 0 + 78
HEADERFUNCTION = 20000 + 79
HTTPGET = 0 + 80
SSL_VERIFYHOST = 0 + 81
COOKIEJAR = 10000 + 82
SSL_CIPHER_LIST = 10000 + 83
HTTP_VERSION = 0 + 84
FTP_USE_EPSV = 0 + 85
SSLCERTTYPE = 10000 + 86
SSLKEY = 10000 + 87
SSLKEYTYPE = 10000 + 88
SSLENGINE = 10000 + 89
SSLENGINE_DEFAULT = 0 + 90
DNS_USE_GLOBAL_CACHE = 0 + 91
DNS_CACHE_TIMEOUT = 0 + 92
PREQUOTE = 10000 + 93
DEBUGFUNCTION = 20000 + 94
DEBUGDATA = 10000 + 95
COOKIESESSION = 0 + 96
CAPATH = 10000 + 97
BUFFERSIZE = 0 + 98
NOSIGNAL = 0 + 99
SHARE = 10000 + 100
PROXYTYPE = 0 + 101
ACCEPT_ENCODING = 10000 + 102
PRIVATE = 10000 + 103
HTTP200ALIASES = 10000 + 104
UNRESTRICTED_AUTH = 0 + 105
FTP_USE_EPRT = 0 + 106
HTTPAUTH = 0 + 107
SSL_CTX_FUNCTION = 20000 + 108
SSL_CTX_DATA = 10000 + 109
FTP_CREATE_MISSING_DIRS = 0 + 110
PROXYAUTH = 0 + 111
SERVER_RESPONSE_TIMEOUT = 0 + 112
IPRESOLVE = 0 + 113
MAXFILESIZE = 0 + 114
INFILESIZE_LARGE = 30000 + 115
RESUME_FROM_LARGE = 30000 + 116
MAXFILESIZE_LARGE = 30000 + 117
NETRC_FILE = 10000 + 118
USE_SSL = 0 + 119
POSTFIELDSIZE_LARGE = 30000 + 120
TCP_NODELAY = 0 + 121
FTPSSLAUTH = 0 + 129
IOCTLFUNCTION = 20000 + 130
IOCTLDATA = 10000 + 131
FTP_ACCOUNT = 10000 + 134
COOKIELIST = 10000 + 135
IGNORE_CONTENT_LENGTH = 0 + 136
FTP_SKIP_PASV_IP = 0 + 137
FTP_FILEMETHOD = 0 + 138
LOCALPORT = 0 + 139
LOCALPORTRANGE = 0 + 140
CONNECT_ONLY = 0 + 141
CONV_FROM_NETWORK_FUNCTION = 20000 + 142
CONV_TO_NETWORK_FUNCTION = 20000 + 143
CONV_FROM_UTF8_FUNCTION = 20000 + 144
MAX_SEND_SPEED_LARGE = 30000 + 145
MAX_RECV_SPEED_LARGE = 30000 + 146
FTP_ALTERNATIVE_TO_USER = 10000 + 147
SOCKOPTFUNCTION = 20000 + 148
SOCKOPTDATA = 10000 + 149
SSL_SESSIONID_CACHE = 0 + 150
SSH_AUTH_TYPES = 0 + 151
SSH_PUBLIC_KEYFILE = 10000 + 152
SSH_PRIVATE_KEYFILE = 10000 + 153
FTP_SSL_CCC = 0 + 154
TIMEOUT_MS = 0 + 155
CONNECTTIMEOUT_MS = 0 + 156
HTTP_TRANSFER_DECODING = 0 + 157
HTTP_CONTENT_DECODING = 0 + 158
NEW_FILE_PERMS = 0 + 159
NEW_DIRECTORY_PERMS = 0 + 160
POSTREDIR = 0 + 161
SSH_HOST_PUBLIC_KEY_MD5 = 10000 + 162
OPENSOCKETFUNCTION = 20000 + 163
OPENSOCKETDATA = 10000 + 164
COPYPOSTFIELDS = 10000 + 165
PROXY_TRANSFER_MODE = 0 + 166
SEEKFUNCTION = 20000 + 167
SEEKDATA = 10000 + 168
CRLFILE = 10000 + 169
ISSUERCERT = 10000 + 170
ADDRESS_SCOPE = 0 + 171
CERTINFO = 0 + 172
USERNAME = 10000 + 173
PASSWORD = 10000 + 174
PROXYUSERNAME = 10000 + 175
PROXYPASSWORD = 10000 + 176
NOPROXY = 10000 + 177
TFTP_BLKSIZE = 0 + 178
SOCKS5_GSSAPI_SERVICE = 10000 + 179
SOCKS5_GSSAPI_NEC = 0 + 180
PROTOCOLS = 0 + 181
REDIR_PROTOCOLS = 0 + 182
SSH_KNOWNHOSTS = 10000 + 183
SSH_KEYFUNCTION = 20000 + 184
SSH_KEYDATA = 10000 + 185
MAIL_FROM = 10000 + 186
MAIL_RCPT = 10000 + 187
FTP_USE_PRET = 0 + 188
RTSP_REQUEST = 0 + 189
RTSP_SESSION_ID = 10000 + 190
RTSP_STREAM_URI = 10000 + 191
RTSP_TRANSPORT = 10000 + 192
RTSP_CLIENT_CSEQ = 0 + 193
RTSP_SERVER_CSEQ = 0 + 194
INTERLEAVEDATA = 10000 + 195
INTERLEAVEFUNCTION = 20000 + 196
WILDCARDMATCH = 0 + 197
CHUNK_BGN_FUNCTION = 20000 + 198
CHUNK_END_FUNCTION = 20000 + 199
FNMATCH_FUNCTION = 20000 + 200
CHUNK_DATA = 10000 + 201
FNMATCH_DATA = 10000 + 202
RESOLVE = 10000 + 203
TLSAUTH_USERNAME = 10000 + 204
TLSAUTH_PASSWORD = 10000 + 205
TLSAUTH_TYPE = 10000 + 206
TRANSFER_ENCODING = 0 + 207
CLOSESOCKETFUNCTION = 20000 + 208
CLOSESOCKETDATA = 10000 + 209
GSSAPI_DELEGATION = 0 + 210
DNS_SERVERS = 10000 + 211
ACCEPTTIMEOUT_MS = 0 + 212
TCP_KEEPALIVE = 0 + 213
TCP_KEEPIDLE = 0 + 214
TCP_KEEPINTVL = 0 + 215
SSL_OPTIONS = 0 + 216
MAIL_AUTH = 10000 + 217
SASL_IR = 0 + 218
XFERINFOFUNCTION = 20000 + 219
XOAUTH2_BEARER = 10000 + 220
DNS_INTERFACE = 10000 + 221
DNS_LOCAL_IP4 = 10000 + 222
DNS_LOCAL_IP6 = 10000 + 223
LOGIN_OPTIONS = 10000 + 224
SSL_ENABLE_NPN = 0 + 225
SSL_ENABLE_ALPN = 0 + 226
EXPECT_100_TIMEOUT_MS = 0 + 227
PROXYHEADER = 10000 + 228
HEADEROPT = 0 + 229
PINNEDPUBLICKEY = 10000 + 230
UNIX_SOCKET_PATH = 10000 + 231
SSL_VERIFYSTATUS = 0 + 232
SSL_FALSESTART = 0 + 233
PATH_AS_IS = 0 + 234
PROXY_SERVICE_NAME = 10000 + 235
SERVICE_NAME = 10000 + 236
PIPEWAIT = 0 + 237
DEFAULT_PROTOCOL = 10000 + 238
STREAM_WEIGHT = 0 + 239
STREAM_DEPENDS = 10000 + 240
STREAM_DEPENDS_E = 10000 + 241
TFTP_NO_OPTIONS = 0 + 242
CONNECT_TO = 10000 + 243
TCP_FASTOPEN = 0 + 244
KEEP_SENDING_ON_ERROR = 0 + 245
PROXY_CAINFO = 10000 + 246
PROXY_CAPATH = 10000 + 247
PROXY_SSL_VERIFYPEER = 0 + 248
PROXY_SSL_VERIFYHOST = 0 + 249
PROXY_SSLVERSION = 0 + 250
PROXY_TLSAUTH_USERNAME = 10000 + 251
PROXY_TLSAUTH_PASSWORD = 10000 + 252
PROXY_TLSAUTH_TYPE = 10000 + 253
PROXY_SSLCERT = 10000 + 254
PROXY_SSLCERTTYPE = 10000 + 255
PROXY_SSLKEY = 10000 + 256
PROXY_SSLKEYTYPE = 10000 + 257
PROXY_KEYPASSWD = 10000 + 258
PROXY_SSL_CIPHER_LIST = 10000 + 259
PROXY_CRLFILE = 10000 + 260
PROXY_SSL_OPTIONS = 0 + 261
PRE_PROXY = 10000 + 262
PROXY_PINNEDPUBLICKEY = 10000 + 263
ABSTRACT_UNIX_SOCKET = 10000 + 264
SUPPRESS_CONNECT_HEADERS = 0 + 265
REQUEST_TARGET = 10000 + 266
SOCKS5_AUTH = 0 + 267
SSH_COMPRESSION = 0 + 268
MIMEPOST = 10000 + 269
TIMEVALUE_LARGE = 30000 + 270
HAPPY_EYEBALLS_TIMEOUT_MS = 0 + 271
RESOLVER_START_FUNCTION = 20000 + 272
RESOLVER_START_DATA = 10000 + 273
HAPROXYPROTOCOL = 0 + 274
DNS_SHUFFLE_ADDRESSES = 0 + 275
TLS13_CIPHERS = 10000 + 276
PROXY_TLS13_CIPHERS = 10000 + 277
DISALLOW_USERNAME_IN_URL = 0 + 278
DOH_URL = 10000 + 279
UPLOAD_BUFFERSIZE = 0 + 280
UPKEEP_INTERVAL_MS = 0 + 281
CURLU = 10000 + 282
TRAILERFUNCTION = 20000 + 283
TRAILERDATA = 10000 + 284
HTTP09_ALLOWED = 0 + 285
ALTSVC_CTRL = 0 + 286
ALTSVC = 10000 + 287
MAXAGE_CONN = 0 + 288
SASL_AUTHZID = 10000 + 289
MAIL_RCPT_ALLOWFAILS = 0 + 290
SSLCERT_BLOB = 40000 + 291
SSLKEY_BLOB = 40000 + 292
PROXY_SSLCERT_BLOB = 40000 + 293
PROXY_SSLKEY_BLOB = 40000 + 294
ISSUERCERT_BLOB = 40000 + 295
PROXY_ISSUERCERT = 10000 + 296
PROXY_ISSUERCERT_BLOB = 40000 + 297
SSL_EC_CURVES = 10000 + 298
HSTS_CTRL = 0 + 299
HSTS = 10000 + 300
HSTSREADFUNCTION = 20000 + 301
HSTSREADDATA = 10000 + 302
HSTSWRITEFUNCTION = 20000 + 303
HSTSWRITEDATA = 10000 + 304
AWS_SIGV4 = 10000 + 305
DOH_SSL_VERIFYPEER = 0 + 306
DOH_SSL_VERIFYHOST = 0 + 307
DOH_SSL_VERIFYSTATUS = 0 + 308
CAINFO_BLOB = 40000 + 309
PROXY_CAINFO_BLOB = 40000 + 310
SSH_HOST_PUBLIC_KEY_SHA256 = 10000 + 311
PREREQFUNCTION = 20000 + 312
PREREQDATA = 10000 + 313
MAXLIFETIME_CONN = 0 + 314
MIME_OPTIONS = 0 + 315
SSH_HOSTKEYFUNCTION = 20000 + 316
SSH_HOSTKEYDATA = 10000 + 317
PROTOCOLS_STR = 10000 + 318
REDIR_PROTOCOLS_STR = 10000 + 319
WS_OPTIONS = 0 + 320
CA_CACHE_TIMEOUT = 0 + 321
QUICK_EXIT = 0 + 322
HAPROXY_CLIENT_IP = 10000 + 323
SERVER_RESPONSE_TIMEOUT_MS = 0 + 324
ECH = 10000 + 325
TCP_KEEPCNT = 0 + 326
UPLOAD_FLAGS = 0 + 327
SSL_SIGNATURE_ALGORITHMS = 10000 + 328
HTTPBASEHEADER = 10000 + 1000
SSL_SIG_HASH_ALGS = 10000 + 1001
SSL_ENABLE_ALPS = 0 + 1002
SSL_CERT_COMPRESSION = 10000 + 1003
SSL_ENABLE_TICKET = 0 + 1004
HTTP2_PSEUDO_HEADERS_ORDER = 10000 + 1005
HTTP2_SETTINGS = 10000 + 1006
SSL_PERMUTE_EXTENSIONS = 0 + 1007
HTTP2_WINDOW_UPDATE = 0 + 1008
HTTP2_STREAMS = 10000 + 1010
TLS_GREASE = 0 + 1011
TLS_EXTENSION_ORDER = 10000 + 1012
STREAM_EXCLUSIVE = 0 + 1013
TLS_KEY_USAGE_NO_CHECK = 0 + 1014
TLS_SIGNED_CERT_TIMESTAMPS = 0 + 1015
TLS_STATUS_REQUEST = 0 + 1016
TLS_DELEGATED_CREDENTIALS = 10000 + 1017
TLS_RECORD_SIZE_LIMIT = 0 + 1018
TLS_KEY_SHARES_LIMIT = 0 + 1019
TLS_USE_NEW_ALPS_CODEPOINT = 0 + 1020
HTTP2_NO_PRIORITY = 0 + 1021
PROXY_CREDENTIAL_NO_REUSE = 0 + 1022
if locals().get("WRITEDATA"):
FILE = locals().get("WRITEDATA")
if locals().get("READDATA"):
INFILE = locals().get("READDATA")
if locals().get("HEADERDATA"):
WRITEHEADER = locals().get("HEADERDATA")
class CurlInfo(IntEnum):
"""``CURLINFO_`` constancs extracted from libcurl,
see: https://curl.se/libcurl/c/curl_easy_getinfo.html"""
TEXT = 0
EFFECTIVE_URL = 0x100000 + 1
RESPONSE_CODE = 0x200000 + 2
TOTAL_TIME = 0x300000 + 3
NAMELOOKUP_TIME = 0x300000 + 4
CONNECT_TIME = 0x300000 + 5
PRETRANSFER_TIME = 0x300000 + 6
SIZE_UPLOAD_T = 0x600000 + 7
SIZE_DOWNLOAD_T = 0x600000 + 8
SPEED_DOWNLOAD_T = 0x600000 + 9
SPEED_UPLOAD_T = 0x600000 + 10
HEADER_SIZE = 0x200000 + 11
REQUEST_SIZE = 0x200000 + 12
SSL_VERIFYRESULT = 0x200000 + 13
FILETIME = 0x200000 + 14
FILETIME_T = 0x600000 + 14
CONTENT_LENGTH_DOWNLOAD_T = 0x600000 + 15
CONTENT_LENGTH_UPLOAD_T = 0x600000 + 16
STARTTRANSFER_TIME = 0x300000 + 17
CONTENT_TYPE = 0x100000 + 18
REDIRECT_TIME = 0x300000 + 19
REDIRECT_COUNT = 0x200000 + 20
PRIVATE = 0x100000 + 21
HTTP_CONNECTCODE = 0x200000 + 22
HTTPAUTH_AVAIL = 0x200000 + 23
PROXYAUTH_AVAIL = 0x200000 + 24
OS_ERRNO = 0x200000 + 25
NUM_CONNECTS = 0x200000 + 26
SSL_ENGINES = 0x400000 + 27
COOKIELIST = 0x400000 + 28
FTP_ENTRY_PATH = 0x100000 + 30
REDIRECT_URL = 0x100000 + 31
PRIMARY_IP = 0x100000 + 32
APPCONNECT_TIME = 0x300000 + 33
CERTINFO = 0x400000 + 34
CONDITION_UNMET = 0x200000 + 35
RTSP_SESSION_ID = 0x100000 + 36
RTSP_CLIENT_CSEQ = 0x200000 + 37
RTSP_SERVER_CSEQ = 0x200000 + 38
RTSP_CSEQ_RECV = 0x200000 + 39
PRIMARY_PORT = 0x200000 + 40
LOCAL_IP = 0x100000 + 41
LOCAL_PORT = 0x200000 + 42
ACTIVESOCKET = 0x500000 + 44
TLS_SSL_PTR = 0x400000 + 45
HTTP_VERSION = 0x200000 + 46
PROXY_SSL_VERIFYRESULT = 0x200000 + 47
SCHEME = 0x100000 + 49
TOTAL_TIME_T = 0x600000 + 50
NAMELOOKUP_TIME_T = 0x600000 + 51
CONNECT_TIME_T = 0x600000 + 52
PRETRANSFER_TIME_T = 0x600000 + 53
STARTTRANSFER_TIME_T = 0x600000 + 54
REDIRECT_TIME_T = 0x600000 + 55
APPCONNECT_TIME_T = 0x600000 + 56
RETRY_AFTER = 0x600000 + 57
EFFECTIVE_METHOD = 0x100000 + 58
PROXY_ERROR = 0x200000 + 59
REFERER = 0x100000 + 60
CAINFO = 0x100000 + 61
CAPATH = 0x100000 + 62
XFER_ID = 0x600000 + 63
CONN_ID = 0x600000 + 64
QUEUE_TIME_T = 0x600000 + 65
USED_PROXY = 0x200000 + 66
POSTTRANSFER_TIME_T = 0x600000 + 67
EARLYDATA_SENT_T = 0x600000 + 68
HTTPAUTH_USED = 0x200000 + 69
PROXYAUTH_USED = 0x200000 + 70
LASTONE = 70
if locals().get("RESPONSE_CODE"):
HTTP_CODE = locals().get("RESPONSE_CODE")
class CurlMOpt(IntEnum):
"""``CURLMOPT_`` constancs extracted from libcurl,
see: https://curl.se/libcurl/c/curl_multi_setopt.html"""
SOCKETFUNCTION = 20000 + 1
SOCKETDATA = 10000 + 2
PIPELINING = 0 + 3
TIMERFUNCTION = 20000 + 4
TIMERDATA = 10000 + 5
MAXCONNECTS = 0 + 6
MAX_HOST_CONNECTIONS = 0 + 7
MAX_PIPELINE_LENGTH = 0 + 8
CONTENT_LENGTH_PENALTY_SIZE = 30000 + 9
CHUNK_LENGTH_PENALTY_SIZE = 30000 + 10
PIPELINING_SITE_BL = 10000 + 11
PIPELINING_SERVER_BL = 10000 + 12
MAX_TOTAL_CONNECTIONS = 0 + 13
PUSHFUNCTION = 20000 + 14
PUSHDATA = 10000 + 15
MAX_CONCURRENT_STREAMS = 0 + 16
class CurlECode(IntEnum):
"""``CURLECODE_`` constancs extracted from libcurl,
see: https://curl.se/libcurl/c/libcurl-errors.html"""
OK = 0
UNSUPPORTED_PROTOCOL = 1
FAILED_INIT = 2
URL_MALFORMAT = 3
NOT_BUILT_IN = 4
COULDNT_RESOLVE_PROXY = 5
COULDNT_RESOLVE_HOST = 6
COULDNT_CONNECT = 7
WEIRD_SERVER_REPLY = 8
REMOTE_ACCESS_DENIED = 9
FTP_ACCEPT_FAILED = 10
FTP_WEIRD_PASS_REPLY = 11
FTP_ACCEPT_TIMEOUT = 12
FTP_WEIRD_PASV_REPLY = 13
FTP_WEIRD_227_FORMAT = 14
FTP_CANT_GET_HOST = 15
HTTP2 = 16
FTP_COULDNT_SET_TYPE = 17
PARTIAL_FILE = 18
FTP_COULDNT_RETR_FILE = 19
OBSOLETE20 = 20
QUOTE_ERROR = 21
HTTP_RETURNED_ERROR = 22
WRITE_ERROR = 23
OBSOLETE24 = 24
UPLOAD_FAILED = 25
READ_ERROR = 26
OUT_OF_MEMORY = 27
OPERATION_TIMEDOUT = 28
OBSOLETE29 = 29
FTP_PORT_FAILED = 30
FTP_COULDNT_USE_REST = 31
OBSOLETE32 = 32
RANGE_ERROR = 33
OBSOLETE34 = 34
SSL_CONNECT_ERROR = 35
BAD_DOWNLOAD_RESUME = 36
FILE_COULDNT_READ_FILE = 37
LDAP_CANNOT_BIND = 38
LDAP_SEARCH_FAILED = 39
OBSOLETE40 = 40
OBSOLETE41 = 41
ABORTED_BY_CALLBACK = 42
BAD_FUNCTION_ARGUMENT = 43
OBSOLETE44 = 44
INTERFACE_FAILED = 45
OBSOLETE46 = 46
TOO_MANY_REDIRECTS = 47
UNKNOWN_OPTION = 48
SETOPT_OPTION_SYNTAX = 49
OBSOLETE50 = 50
OBSOLETE51 = 51
GOT_NOTHING = 52
SSL_ENGINE_NOTFOUND = 53
SSL_ENGINE_SETFAILED = 54
SEND_ERROR = 55
RECV_ERROR = 56
OBSOLETE57 = 57
SSL_CERTPROBLEM = 58
SSL_CIPHER = 59
PEER_FAILED_VERIFICATION = 60
BAD_CONTENT_ENCODING = 61
OBSOLETE62 = 62
FILESIZE_EXCEEDED = 63
USE_SSL_FAILED = 64
SEND_FAIL_REWIND = 65
SSL_ENGINE_INITFAILED = 66
LOGIN_DENIED = 67
TFTP_NOTFOUND = 68
TFTP_PERM = 69
REMOTE_DISK_FULL = 70
TFTP_ILLEGAL = 71
TFTP_UNKNOWNID = 72
REMOTE_FILE_EXISTS = 73
TFTP_NOSUCHUSER = 74
OBSOLETE75 = 75
OBSOLETE76 = 76
SSL_CACERT_BADFILE = 77
REMOTE_FILE_NOT_FOUND = 78
SSH = 79
SSL_SHUTDOWN_FAILED = 80
AGAIN = 81
SSL_CRL_BADFILE = 82
SSL_ISSUER_ERROR = 83
FTP_PRET_FAILED = 84
RTSP_CSEQ_ERROR = 85
RTSP_SESSION_ERROR = 86
FTP_BAD_FILE_LIST = 87
CHUNK_FAILED = 88
NO_CONNECTION_AVAILABLE = 89
SSL_PINNEDPUBKEYNOTMATCH = 90
SSL_INVALIDCERTSTATUS = 91
HTTP2_STREAM = 92
RECURSIVE_API_CALL = 93
AUTH_ERROR = 94
HTTP3 = 95
QUIC_CONNECT_ERROR = 96
PROXY = 97
SSL_CLIENTCERT = 98
UNRECOVERABLE_POLL = 99
TOO_LARGE = 100
ECH_REQUIRED = 101
RESERVED115115 = 102
RESERVED116116 = 103
RESERVED117117 = 104
RESERVED118118 = 105
RESERVED119119 = 106
RESERVED120120 = 107
RESERVED121121 = 108
RESERVED122122 = 109
RESERVED123123 = 110
RESERVED124124 = 111
RESERVED125125 = 112
RESERVED126126 = 113
class CurlHttpVersion(IntEnum):
"""``CURL_HTTP_VERSION`` constants from libcurl, see comments for details."""
NONE = 0
V1_0 = 1 # please use HTTP 1.0 in the request */
V1_1 = 2 # please use HTTP 1.1 in the request */
V2_0 = 3 # please use HTTP 2 in the request */
V2TLS = 4 # use version 2 for HTTPS, version 1.1 for HTTP */
V2_PRIOR_KNOWLEDGE = 5 # please use HTTP 2 without HTTP/1.1 Upgrade */
V3 = 30 # Makes use of explicit HTTP/3 with fallback.
V3ONLY = 31 # No fallback
class CurlWsFlag(IntEnum):
"""``CURL_WS_FLAG`` constants from libcurl, see comments for details."""
TEXT = 1 << 0
BINARY = 1 << 1
CONT = 1 << 2
CLOSE = 1 << 3
PING = 1 << 4
OFFSET = 1 << 5
class CurlSslVersion(IntEnum):
"""``CURL_SSLVERSION`` constants from libcurl, see comments for details."""
DEFAULT = 0
TLSv1 = 1
SSLv2 = 2
SSLv3 = 3
TLSv1_0 = 4
TLSv1_1 = 5
TLSv1_2 = 6
TLSv1_3 = 7
MAX_DEFAULT = 1 << 16
class CurlIpResolve(IntEnum):
"""``CURL_IPRESOLVE`` constants from libcurl, see comments for details."""
WHATEVER = 0 # default, uses addresses to all IP versions that your system allows
V4 = 1 # uses only IPv4 addresses/connections
V6 = 2 # uses only IPv6 addresses/connections
@@ -0,0 +1,612 @@
from __future__ import annotations
import re
import struct
import sys
import warnings
from http.cookies import SimpleCookie
from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
import certifi
from ._wrapper import ffi, lib
from .const import CurlECode, CurlHttpVersion, CurlInfo, CurlOpt, CurlWsFlag
from .utils import CurlCffiWarning
DEFAULT_CACERT = certifi.where()
REASON_PHRASE_RE = re.compile(rb"HTTP/\d\.\d [0-9]{3} (.*)")
STATUS_LINE_RE = re.compile(rb"HTTP/(\d\.\d) ([0-9]{3}) (.*)")
if TYPE_CHECKING:
class CurlWsFrame:
age: int
flags: int
offset: int
bytesleft: int
len: int
class CurlError(Exception):
"""Base exception for curl_cffi package"""
def __init__(self, msg, code: Union[CurlECode, Literal[0]] = 0, *args, **kwargs):
super().__init__(msg, *args, **kwargs)
self.code: Union[CurlECode, Literal[0]] = code
CURLINFO_TEXT = 0
CURLINFO_HEADER_IN = 1
CURLINFO_HEADER_OUT = 2
CURLINFO_DATA_IN = 3
CURLINFO_DATA_OUT = 4
CURLINFO_SSL_DATA_IN = 5
CURLINFO_SSL_DATA_OUT = 6
CURL_WRITEFUNC_PAUSE = 0x10000001
CURL_WRITEFUNC_ERROR = 0xFFFFFFFF
@ffi.def_extern()
def debug_function(curl, type_: int, data, size: int, clientp) -> int:
"""ffi callback for curl debug info"""
callback = ffi.from_handle(clientp)
text = ffi.buffer(data, size)[:]
callback(type_, text)
return 0
def bytes_to_hex(b: bytes, uppercase: bool = False) -> str:
"""
Convert a bytes object to a space-separated hex string, e.g. "0a ff 3c".
If uppercase=True, letters will be AF instead of af.
"""
fmt = "{:02X}" if uppercase else "{:02x}"
return " ".join(fmt.format(byte) for byte in b)
def debug_function_default(type_: int, data: bytes) -> None:
PREFIXES = {
CURLINFO_TEXT: "*",
CURLINFO_HEADER_IN: "<",
CURLINFO_HEADER_OUT: ">",
CURLINFO_DATA_IN: "< DATA",
CURLINFO_DATA_OUT: "> DATA",
CURLINFO_SSL_DATA_IN: "< SSL",
CURLINFO_SSL_DATA_OUT: "> SSL",
}
MAX_SHOW_BYTES = 40
prefix = PREFIXES.get(type_, "*")
# always show ssl data in binary format
if type_ in (CURLINFO_SSL_DATA_IN, CURLINFO_SSL_DATA_OUT):
hex_str = bytes_to_hex(data[:MAX_SHOW_BYTES])
postfix = "" if len(data) <= MAX_SHOW_BYTES else "..."
sys.stderr.write(f"{prefix} [{len(data)} bytes]: {hex_str}{postfix}\n")
else:
try:
text = data.decode("utf-8")
sys.stderr.write(f"{prefix} {text}")
if type_ not in (CURLINFO_TEXT, CURLINFO_HEADER_IN, CURLINFO_HEADER_OUT):
sys.stderr.write("\n")
except UnicodeDecodeError:
# Fallback to hex representation of first MAX_SHOW_BYTES bytes
hex_str = bytes_to_hex(data[:MAX_SHOW_BYTES])
postfix = "" if len(data) <= MAX_SHOW_BYTES else "..."
sys.stderr.write(f"{prefix} [{len(data)} bytes]: {hex_str}{postfix}\n")
@ffi.def_extern()
def buffer_callback(ptr, size, nmemb, userdata):
"""ffi callback for curl write function, directly writes to a buffer"""
# assert size == 1
buffer = ffi.from_handle(userdata)
buffer.write(ffi.buffer(ptr, nmemb)[:])
return nmemb * size
def ensure_int(s):
if not s:
return 0
return int(s)
@ffi.def_extern()
def write_callback(ptr, size, nmemb, userdata):
"""ffi callback for curl write function, calls the callback python function"""
# although similar enough to the function above, kept here for performance reasons
callback = ffi.from_handle(userdata)
wrote = callback(ffi.buffer(ptr, nmemb)[:])
wrote = ensure_int(wrote)
if wrote == CURL_WRITEFUNC_PAUSE or wrote == CURL_WRITEFUNC_ERROR: # noqa: SIM109
return wrote
# should make this an exception in future versions
if wrote != nmemb * size:
warnings.warn("Wrote bytes != received bytes.", CurlCffiWarning, stacklevel=2)
return nmemb * size
# Credits: @alexio777 on https://github.com/lexiforest/curl_cffi/issues/4
def slist_to_list(head) -> list[bytes]:
"""Converts curl slist to a python list."""
result = []
ptr = head
while ptr:
result.append(ffi.string(ptr.data))
ptr = ptr.next
lib.curl_slist_free_all(head)
return result
class Curl:
"""
Wrapper for ``curl_easy_*`` functions of libcurl.
"""
def __init__(self, cacert: str = "", debug: bool = False, handle=None) -> None:
"""
Parameters:
cacert: CA cert path to use, by default, certs from ``certifi`` are used.
debug: whether to show curl debug messages.
handle: a curl handle instance from ``curl_easy_init``.
"""
self._curl = handle if handle else lib.curl_easy_init()
self._headers = ffi.NULL
self._proxy_headers = ffi.NULL
self._resolve = ffi.NULL
self._cacert = cacert or DEFAULT_CACERT
self._is_cert_set = False
self._write_handle: Any = None
self._header_handle: Any = None
self._debug_handle: Any = None
self._body_handle: Any = None
# TODO: use CURL_ERROR_SIZE
self._error_buffer = ffi.new("char[]", 256)
self._debug = debug
self._set_error_buffer()
def _set_error_buffer(self) -> None:
ret = lib._curl_easy_setopt(self._curl, CurlOpt.ERRORBUFFER, self._error_buffer)
if ret != 0:
warnings.warn("Failed to set error buffer", CurlCffiWarning, stacklevel=2)
if self._debug:
self.debug()
def debug(self) -> None:
"""Set debug to True"""
self.setopt(CurlOpt.VERBOSE, 1)
self.setopt(CurlOpt.DEBUGFUNCTION, True)
def __del__(self) -> None:
self.close()
def _check_error(self, errcode: int, *args: Any) -> None:
error = self._get_error(errcode, *args)
if error is not None:
raise error
def _get_error(self, errcode: int, *args: Any):
if errcode != 0:
errmsg = ffi.string(self._error_buffer).decode(errors="backslashreplace")
action = " ".join([str(a) for a in args])
return CurlError(
f"Failed to {action}, curl: ({errcode}) {errmsg}. "
"See https://curl.se/libcurl/c/libcurl-errors.html first for more "
"details.",
code=cast(CurlECode, errcode),
)
def setopt(self, option: CurlOpt, value: Any) -> int:
"""Wrapper for ``curl_easy_setopt``.
Args:
option: option to set, using constants from CurlOpt enum
value: value to set, strings will be handled automatically
Returns:
0 if no error, see ``CurlECode``.
"""
input_option = {
# this should be int in curl, but cffi requires pointer for void*
# it will be convert back in the glue c code.
0: "long*",
10000: "char*",
20000: "void*",
30000: "int64_t*", # offset type
40000: "void*", # blob type
}
# print("option", option, "value", value)
# Convert value
value_type = input_option.get((option // 10000) * 10000)
if value_type == "long*" or value_type == "int64_t*":
c_value = ffi.new(value_type, value)
elif option == CurlOpt.WRITEDATA:
c_value = ffi.new_handle(value)
self._write_handle = c_value
lib._curl_easy_setopt(
self._curl, CurlOpt.WRITEFUNCTION, lib.buffer_callback
)
elif option == CurlOpt.HEADERDATA:
c_value = ffi.new_handle(value)
self._header_handle = c_value
lib._curl_easy_setopt(
self._curl, CurlOpt.HEADERFUNCTION, lib.buffer_callback
)
elif option == CurlOpt.WRITEFUNCTION:
c_value = ffi.new_handle(value)
self._write_handle = c_value
lib._curl_easy_setopt(self._curl, CurlOpt.WRITEFUNCTION, lib.write_callback)
option = CurlOpt.WRITEDATA
elif option == CurlOpt.HEADERFUNCTION:
c_value = ffi.new_handle(value)
self._header_handle = c_value
lib._curl_easy_setopt(
self._curl, CurlOpt.HEADERFUNCTION, lib.write_callback
)
option = CurlOpt.HEADERDATA
elif option == CurlOpt.DEBUGFUNCTION:
if value is True:
value = debug_function_default
c_value = ffi.new_handle(value)
self._debug_handle = c_value
lib._curl_easy_setopt(self._curl, CurlOpt.DEBUGFUNCTION, lib.debug_function)
option = CurlOpt.DEBUGDATA
elif value_type == "char*":
c_value = value.encode() if isinstance(value, str) else value
# Must keep a reference, otherwise may be GCed.
if option == CurlOpt.POSTFIELDS:
self._body_handle = c_value
else:
raise NotImplementedError(f"Option unsupported: {option}")
if option == CurlOpt.HTTPHEADER:
for header in value:
self._headers = lib.curl_slist_append(self._headers, header)
ret = lib._curl_easy_setopt(self._curl, option, self._headers)
elif option == CurlOpt.PROXYHEADER:
for proxy_header in value:
self._proxy_headers = lib.curl_slist_append(
self._proxy_headers, proxy_header
)
ret = lib._curl_easy_setopt(self._curl, option, self._proxy_headers)
elif option == CurlOpt.RESOLVE:
for resolve in value:
if isinstance(resolve, str):
resolve = resolve.encode()
self._resolve = lib.curl_slist_append(self._resolve, resolve)
ret = lib._curl_easy_setopt(self._curl, option, self._resolve)
else:
ret = lib._curl_easy_setopt(self._curl, option, c_value)
self._check_error(ret, "setopt", option, value)
if option == CurlOpt.CAINFO:
self._is_cert_set = True
return ret
def getinfo(self, option: CurlInfo) -> Union[bytes, int, float, list]:
"""Wrapper for ``curl_easy_getinfo``. Gets information in response after
curl.perform.
Parameters:
option: option to get info of, using constants from ``CurlInfo`` enum
Returns:
value retrieved from last perform.
"""
ret_option = {
0x100000: "char**",
0x200000: "long*",
0x300000: "double*",
0x400000: "struct curl_slist **",
0x500000: "long*",
0x600000: "int64_t*",
}
ret_cast_option = {
0x100000: ffi.string,
0x200000: int,
0x300000: float,
0x500000: int,
0x600000: int,
}
c_value = ffi.new(ret_option[option & 0xF00000])
ret = lib.curl_easy_getinfo(self._curl, option, c_value)
self._check_error(ret, "getinfo", option)
# cookielist and ssl_engines starts with 0x400000, see also: const.py
if option & 0xF00000 == 0x400000:
return slist_to_list(c_value[0])
if c_value[0] == ffi.NULL:
return b""
return ret_cast_option[option & 0xF00000](c_value[0])
def version(self) -> bytes:
"""Get the underlying libcurl version."""
return ffi.string(lib.curl_version())
def impersonate(self, target: str, default_headers: bool = True) -> int:
"""Set the browser type to impersonate.
Parameters:
target: browser to impersonate.
default_headers: whether to add default headers, like User-Agent.
Returns:
0 if no error.
"""
return lib.curl_easy_impersonate(
self._curl, target.encode(), int(default_headers)
)
def _ensure_cacert(self) -> None:
if not self._is_cert_set:
ret = self.setopt(CurlOpt.CAINFO, self._cacert)
self._check_error(ret, "set cacert")
ret = self.setopt(CurlOpt.PROXY_CAINFO, self._cacert)
self._check_error(ret, "set proxy cacert")
def perform(self, clear_headers: bool = True) -> None:
"""Wrapper for ``curl_easy_perform``, performs a curl request.
Parameters:
clear_headers: clear header slist used in this perform
Raises:
CurlError: if the perform was not successful.
"""
# make sure we set a cacert store
self._ensure_cacert()
# here we go
ret = lib.curl_easy_perform(self._curl)
try:
self._check_error(ret, "perform")
finally:
# cleaning
self.clean_after_perform(clear_headers)
def upkeep(self) -> int:
return lib.curl_easy_upkeep(self._curl)
def clean_after_perform(self, clear_headers: bool = True) -> None:
"""Clean up handles and buffers after ``perform``, called at the end of
``perform``."""
self._write_handle = None
self._header_handle = None
self._debug_handle = None
self._body_handle = None
if clear_headers:
if self._headers != ffi.NULL:
lib.curl_slist_free_all(self._headers)
self._headers = ffi.NULL
if self._proxy_headers != ffi.NULL:
lib.curl_slist_free_all(self._proxy_headers)
self._proxy_headers = ffi.NULL
def duphandle(self) -> Curl:
"""Wrapper for ``curl_easy_duphandle``.
This is not a full copy of entire curl object in python. For example, headers
handle is not copied, you have to set them again."""
new_handle = lib.curl_easy_duphandle(self._curl)
c = Curl(cacert=self._cacert, debug=self._debug, handle=new_handle)
return c
def reset(self) -> None:
"""Reset all curl options, wrapper for ``curl_easy_reset``."""
self._is_cert_set = False
if self._curl is not None:
lib.curl_easy_reset(self._curl)
self._set_error_buffer()
self._resolve = ffi.NULL
def parse_cookie_headers(self, headers: list[bytes]) -> SimpleCookie:
"""Extract ``cookies.SimpleCookie`` from header lines.
Parameters:
headers: list of headers in bytes.
Returns:
A parsed cookies.SimpleCookie instance.
"""
cookie: SimpleCookie = SimpleCookie()
for header in headers:
if header.lower().startswith(b"set-cookie: "):
cookie.load(header[12:].decode()) # len("set-cookie: ") == 12
return cookie
@staticmethod
def get_reason_phrase(status_line: bytes) -> bytes:
"""Extract reason phrase, like ``OK``, ``Not Found`` from response status
line."""
m = REASON_PHRASE_RE.match(status_line)
return m.group(1) if m else b""
@staticmethod
def parse_status_line(status_line: bytes) -> tuple[CurlHttpVersion, int, bytes]:
"""Parse status line.
Returns:
http_version, status_code, and reason phrase
"""
m = STATUS_LINE_RE.match(status_line)
if not m:
return CurlHttpVersion.V1_0, 0, b""
if m.group(1) == "2.0":
http_version = CurlHttpVersion.V2_0
elif m.group(1) == "1.1":
http_version = CurlHttpVersion.V1_1
elif m.group(1) == "1.0":
http_version = CurlHttpVersion.V1_0
else:
http_version = CurlHttpVersion.NONE
status_code = int(m.group(2))
reason = m.group(3)
return http_version, status_code, reason
def close(self) -> None:
"""Close and cleanup curl handle, wrapper for ``curl_easy_cleanup``."""
if self._curl:
lib.curl_easy_cleanup(self._curl)
self._curl = None
ffi.release(self._error_buffer)
self._resolve = ffi.NULL
def ws_recv(self, n: int = 1024) -> tuple[bytes, CurlWsFrame]:
"""Receive a frame from a websocket connection.
Args:
n: maximum data to receive.
Returns:
a tuple of frame content and curl frame meta struct.
Raises:
CurlError: if failed.
"""
buffer = ffi.new("char[]", n)
n_recv = ffi.new("size_t *")
p_frame = ffi.new("struct curl_ws_frame **")
ret = lib.curl_ws_recv(self._curl, buffer, n, n_recv, p_frame)
self._check_error(ret, "WS_RECV")
# Frame meta explained: https://curl.se/libcurl/c/curl_ws_meta.html
frame = p_frame[0]
return ffi.buffer(buffer)[: n_recv[0]], frame
def ws_send(self, payload: bytes, flags: CurlWsFlag = CurlWsFlag.BINARY) -> int:
"""Send data to a websocket connection.
Args:
payload: content to send.
flags: websocket flag to set for the frame, default: binary.
Returns:
0 if no error.
Raises:
CurlError: if failed.
"""
n_sent = ffi.new("size_t *")
buffer = ffi.from_buffer(payload)
ret = lib.curl_ws_send(self._curl, buffer, len(payload), n_sent, 0, flags)
self._check_error(ret, "WS_SEND")
return n_sent[0]
def ws_close(self, code: int = 1000, message: bytes = b"") -> int:
"""Close a websocket connection. Shorthand for :meth:`ws_send`
with close code and message. Note that to completely close the connection,
you must close the curl handle after this call with :meth:`close`.
Args:
code: close code.
message: close message.
Returns:
0 if no error.
Raises:
CurlError: if failed.
"""
return self.ws_send(struct.pack("!H", code) + message)
class CurlMime:
"""Wrapper for the ``curl_mime_`` API."""
def __init__(self, curl: Optional[Curl] = None):
"""
Args:
curl: Curl instance to use.
"""
self._curl = curl if curl else Curl()
self._form = lib.curl_mime_init(self._curl._curl)
def addpart(
self,
name: str,
*,
content_type: Optional[str] = None,
filename: Optional[str] = None,
local_path: Optional[Union[str, bytes, Path]] = None,
data: Optional[bytes] = None,
) -> None:
"""Add a mime part for a mutlipart html form.
Note: You can only use either local_path or data, not both.
Args:
name: name of the field.
content_type: content_type for the field. for example: ``image/png``.
filename: filename for the server.
local_path: file to upload on local disk.
data: file content to upload.
"""
part = lib.curl_mime_addpart(self._form)
ret = lib.curl_mime_name(part, name.encode())
if ret != 0:
raise CurlError("Add field failed.")
# mime type
if content_type is not None:
ret = lib.curl_mime_type(part, content_type.encode())
if ret != 0:
raise CurlError("Add field failed.")
# remote file name
if filename is not None:
ret = lib.curl_mime_filename(part, filename.encode())
if ret != 0:
raise CurlError("Add field failed.")
if local_path and data:
raise CurlError("Can not use local_path and data at the same time.")
# this is a filename
if local_path is not None:
if isinstance(local_path, Path):
local_path_str = str(local_path)
elif isinstance(local_path, bytes):
local_path_str = local_path.decode()
else:
local_path_str = local_path
if not Path(local_path_str).exists():
raise FileNotFoundError(f"File not found at {local_path_str}")
ret = lib.curl_mime_filedata(part, local_path_str.encode())
if ret != 0:
raise CurlError("Add field failed.")
if data is not None:
if not isinstance(data, bytes):
data = str(data).encode()
ret = lib.curl_mime_data(part, data, len(data))
@classmethod
def from_list(cls, files: list[dict]):
"""Create a multipart instance from a list of dict, for keys, see ``addpart``"""
form = cls()
for file in files:
form.addpart(**file)
return form
def attach(self, curl: Optional[Curl] = None) -> None:
"""Attach the mime instance to a curl instance."""
c = curl if curl else self._curl
c.setopt(CurlOpt.MIMEPOST, self._form)
def close(self) -> None:
"""Close the mime instance and underlying files. This method must be called
after ``perform`` or ``request``."""
lib.curl_mime_free(self._form)
self._form = ffi.NULL
def __del__(self) -> None:
self.close()
@@ -0,0 +1 @@
# Marker file for PEP 561.
@@ -0,0 +1,171 @@
__all__ = [
"Session",
"AsyncSession",
"BrowserType",
"BrowserTypeLiteral",
"CurlWsFlag",
"request",
"head",
"get",
"post",
"put",
"patch",
"delete",
"options",
"RequestsError",
"Cookies",
"Headers",
"Request",
"Response",
"AsyncWebSocket",
"WebSocket",
"WebSocketError",
"WebSocketClosed",
"WebSocketTimeout",
"WsCloseCode",
"ExtraFingerprints",
"CookieTypes",
"HeaderTypes",
"ProxySpec",
]
from typing import Optional, TYPE_CHECKING, TypedDict
from ..const import CurlWsFlag
from .cookies import Cookies, CookieTypes
from .errors import RequestsError
from .headers import Headers, HeaderTypes
from .impersonate import BrowserType, BrowserTypeLiteral, ExtraFingerprints
from .models import Request, Response
from .session import (
AsyncSession,
HttpMethod,
ProxySpec,
Session,
ThreadType,
RequestParams,
Unpack,
)
from .websockets import (
AsyncWebSocket,
WebSocket,
WebSocketClosed,
WebSocketError,
WebSocketTimeout,
WsCloseCode,
)
if TYPE_CHECKING:
class SessionRequestParams(RequestParams, total=False):
thread: Optional[ThreadType]
curl_options: Optional[dict]
debug: Optional[bool]
else:
SessionRequestParams = TypedDict
def request(
method: HttpMethod,
url: str,
thread: Optional[ThreadType] = None,
curl_options: Optional[dict] = None,
debug: Optional[bool] = None,
**kwargs: Unpack[RequestParams],
) -> Response:
"""Send an http request.
Parameters:
method: http method for the request: GET/POST/PUT/DELETE etc.
url: url for the requests.
params: query string for the requests.
data: form values(dict/list/tuple) or binary data to use in body,
``Content-Type: application/x-www-form-urlencoded`` will be added if a dict
is given.
json: json values to use in body, `Content-Type: application/json` will be added
automatically.
headers: headers to send.
cookies: cookies to use.
files: not supported, use ``multipart`` instead.
auth: HTTP basic auth, a tuple of (username, password), only basic auth is
supported.
timeout: how many seconds to wait before giving up.
allow_redirects: whether to allow redirection.
max_redirects: max redirect counts, default 30, use -1 for unlimited.
proxies: dict of proxies to use, prefer to use ``proxy`` if they are the same.
format: ``{"http": proxy_url, "https": proxy_url}``.
proxy: proxy to use, format: "http://user@pass:proxy_url".
Can't be used with `proxies` parameter.
proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
verify: whether to verify https certs.
referer: shortcut for setting referer header.
accept_encoding: shortcut for setting accept-encoding header.
content_callback: a callback function to receive response body.
``def callback(chunk: bytes) -> None:``
impersonate: which browser version to impersonate.
ja3: ja3 string to impersonate.
akamai: akamai string to impersonate.
extra_fp: extra fingerprints options, in complement to ja3 and akamai strings.
thread: thread engine to use for working with other thread implementations.
choices: eventlet, gevent.
default_headers: whether to set default browser headers when impersonating.
default_encoding: encoding for decoding response content if charset is not found
in headers. Defaults to "utf-8". Can be set to a callable for automatic
detection.
quote: Set characters to be quoted, i.e. percent-encoded. Default safe string
is ``!#$%&'()*+,/:;=?@[]~``. If set to a sting, the character will be
removed from the safe string, thus quoted. If set to False, the url will be
kept as is, without any automatic percent-encoding, you must encode the URL
yourself.
curl_options: extra curl options to use.
http_version: limiting http version, defaults to http2.
debug: print extra curl debug info.
interface: which interface to use.
cert: a tuple of (cert, key) filenames for client cert.
stream: streaming the response, default False.
max_recv_speed: maximum receive speed, bytes per second.
multipart: upload files using the multipart format, see examples for details.
discard_cookies: discard cookies from server. Default to False.
Returns:
A ``Response`` object.
"""
debug = False if debug is None else debug
with Session(thread=thread, curl_options=curl_options, debug=debug) as s:
return s.request(method=method, url=url, **kwargs)
def head(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="HEAD", url=url, **kwargs)
def get(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="GET", url=url, **kwargs)
def post(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="POST", url=url, **kwargs)
def put(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="PUT", url=url, **kwargs)
def patch(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="PATCH", url=url, **kwargs)
def delete(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="DELETE", url=url, **kwargs)
def options(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="OPTIONS", url=url, **kwargs)
def trace(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="TRACE", url=url, **kwargs)
def query(url: str, **kwargs: Unpack[SessionRequestParams]):
return request(method="QUERY", url=url, **kwargs)
@@ -0,0 +1,364 @@
# Adapted from: https://github.com/encode/httpx/blob/master/httpx/_models.py,
# which is licensed under the BSD License.
# See https://github.com/encode/httpx/blob/master/LICENSE.md
__all__ = ["Cookies"]
import re
import time
import warnings
from dataclasses import dataclass
from http.cookiejar import Cookie, CookieJar
from http.cookies import _unquote
from typing import Optional, Union
from collections.abc import Iterator, MutableMapping
from urllib.parse import urlparse
from ..utils import CurlCffiWarning
from .errors import CookieConflict, RequestsError
CookieTypes = Union["Cookies", CookieJar, dict[str, str], list[tuple[str, str]]]
@dataclass
class CurlMorsel:
name: str
value: str
hostname: str = ""
subdomains: bool = False
path: str = "/"
secure: bool = False
expires: int = 0
http_only: bool = False
@staticmethod
def parse_bool(s):
return s == "TRUE"
@staticmethod
def dump_bool(s):
return "TRUE" if s else "FALSE"
@classmethod
def from_curl_format(cls, set_cookie_line: bytes):
(
hostname,
subdomains,
path,
secure,
expires,
name,
value,
) = set_cookie_line.decode().split("\t")
if hostname and hostname[0] == "#":
http_only = True
# e.g. #HttpOnly_postman-echo.com
domain = hostname[10:] # len("#HttpOnly_") == 10
else:
http_only = False
domain = hostname
return cls(
hostname=domain,
subdomains=cls.parse_bool(subdomains),
path=path,
secure=cls.parse_bool(secure),
expires=int(expires),
name=name,
value=_unquote(value),
http_only=http_only,
)
def to_curl_format(self):
if not self.hostname:
raise RequestsError(f"Domain not found for cookie {self.name}={self.value}")
return "\t".join(
[
self.hostname,
self.dump_bool(self.subdomains),
self.path,
self.dump_bool(self.secure),
str(self.expires),
self.name,
self.value,
]
)
@classmethod
def from_cookiejar_cookie(cls, cookie: Cookie):
return cls(
name=cookie.name,
value=cookie.value or "",
hostname=cookie.domain,
subdomains=cookie.domain_specified,
path=cookie.path,
secure=cookie.secure,
expires=int(cookie.expires or 0),
http_only=False,
)
def to_cookiejar_cookie(self) -> Cookie:
# the leading dot actually does not mean anything nowadays
# https://stackoverflow.com/a/20884869/1061155
# https://github.com/python/cpython/blob/d6555abfa7384b5a40435a11bdd2aa6bbf8f5cfc/Lib/http/cookiejar.py#L1535
return Cookie(
version=0,
name=self.name,
value=self.value,
port=None,
port_specified=False,
domain=self.hostname,
domain_specified=self.subdomains,
domain_initial_dot=bool(self.hostname.startswith(".")),
path=self.path,
path_specified=bool(self.path),
secure=self.secure,
# using if explicitly to make it clear.
expires=None if self.expires == 0 else self.expires,
discard=self.expires == 0,
comment=None,
comment_url=None,
rest=dict(http_only=f"{self.http_only}"),
rfc2109=False,
)
cut_port_re = re.compile(r":\d+$", re.ASCII)
IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
class Cookies(MutableMapping[str, str]):
"""
HTTP Cookies, as a mutable mapping.
"""
def __init__(self, cookies: Optional[CookieTypes] = None) -> None:
if cookies is None or isinstance(cookies, dict):
self.jar = CookieJar()
if isinstance(cookies, dict):
for key, value in cookies.items():
self.set(key, value)
elif isinstance(cookies, list):
self.jar = CookieJar()
for key, value in cookies:
self.set(key, value)
elif isinstance(cookies, Cookies):
self.jar = CookieJar()
for cookie in cookies.jar:
self.jar.set_cookie(cookie)
else:
self.jar = cookies
def _eff_request_host(self, request) -> str:
"""
Almost equivalent to the eff_request_host function in:
https://github.com/python/cpython/blob/3.11/Lib/http/cookiejar.py#L636
"""
host = urlparse(request.url)[1]
if host == "":
host = request.headers.get("Host", "")
# remove port, if present
host = cut_port_re.sub("", host, 1)
host = host.lower()
if host.find(".") == -1 and not IPV4_RE.search(host):
host += ".local"
return host
def get_cookies_for_curl(self, request) -> list[CurlMorsel]:
"""the process is similar to ``cookiejar.add_cookie_header``, but load all
cookies"""
self.jar._cookies_lock.acquire() # type: ignore
morsels = []
try:
self.jar._policy._now = self._now = int(time.time()) # type: ignore
for cookie in self.jar:
morsel = CurlMorsel.from_cookiejar_cookie(cookie)
if not morsel.hostname:
morsel.hostname = self._eff_request_host(request)
morsels.append(morsel)
finally:
self.jar._cookies_lock.release() # type: ignore
self.jar.clear_expired_cookies()
return morsels
def update_cookies_from_curl(self, morsels: list[CurlMorsel]):
for morsel in morsels:
cookie = morsel.to_cookiejar_cookie()
self.jar.set_cookie(cookie)
self.jar.clear_expired_cookies()
def set(
self, name: str, value: str, domain: str = "", path: str = "/", secure=False
) -> None:
"""
Set a cookie value by name. May optionally include domain and path.
"""
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie
if name.startswith("__Secure-") and secure is False:
warnings.warn(
"`secure` changed to True for `__Secure-` prefixed cookies",
CurlCffiWarning,
stacklevel=2,
)
secure = True
elif name.startswith("__Host-") and (secure is False or domain or path != "/"):
warnings.warn(
"`host` changed to True, `domain` removed, `path` changed to `/` "
"for `__Host-` prefixed cookies",
CurlCffiWarning,
stacklevel=2,
)
secure = True
domain = ""
path = "/"
kwargs = {
"version": 0,
"name": name,
"value": value,
"port": None,
"port_specified": False,
"domain": domain,
"domain_specified": bool(domain),
"domain_initial_dot": domain.startswith("."),
"path": path,
"path_specified": bool(path),
"secure": secure,
"expires": None,
"discard": True,
"comment": None,
"comment_url": None,
"rest": {"HttpOnly": None},
"rfc2109": False,
}
cookie = Cookie(**kwargs)
self.jar.set_cookie(cookie)
def get( # type: ignore
self,
name: str,
default: Optional[str] = None,
domain: Optional[str] = None,
path: Optional[str] = None,
) -> Optional[str]:
"""
Get a cookie by name. May optionally include domain and path
in order to specify exactly which cookie to retrieve.
"""
value = None
matched_domain = ""
for cookie in self.jar:
if (
cookie.name == name
and (domain is None or cookie.domain == domain)
and (path is None or cookie.path == path)
):
# if cookies on two different domains do not share a same value
if (
value is not None
and not matched_domain.endswith(cookie.domain)
and not str(cookie.domain).endswith(matched_domain)
and value != cookie.value
):
message = (
f"Multiple cookies exist with name={name} on "
f"{matched_domain} and {cookie.domain}, add domain "
"parameter to suppress this error."
)
raise CookieConflict(message)
value = cookie.value
matched_domain = cookie.domain or ""
if value is None:
return default
return value
def get_dict(
self, domain: Optional[str] = None, path: Optional[str] = None
) -> dict:
"""
Cookies with the same name on different domains may overwrite each other,
do NOT use this function as a method of serialization.
"""
ret = {}
for cookie in self.jar:
if (domain is None or cookie.domain == domain) and (
path is None or cookie.path == path
):
ret[cookie.name] = cookie.value
return ret
def delete(
self,
name: str,
domain: Optional[str] = None,
path: Optional[str] = None,
) -> None:
"""
Delete a cookie by name. May optionally include domain and path
in order to specify exactly which cookie to delete.
"""
if domain is not None and path is not None:
return self.jar.clear(domain, path, name)
remove = [
cookie
for cookie in self.jar
if cookie.name == name
and (domain is None or cookie.domain == domain)
and (path is None or cookie.path == path)
]
for cookie in remove:
self.jar.clear(cookie.domain, cookie.path, cookie.name)
def clear(self, domain: Optional[str] = None, path: Optional[str] = None) -> None:
"""
Delete all cookies. Optionally include a domain and path in
order to only delete a subset of all the cookies.
"""
args = []
if domain is not None:
args.append(domain)
if path is not None:
assert domain is not None
args.append(path)
self.jar.clear(*args)
def update(self, cookies: Optional[CookieTypes] = None) -> None: # type: ignore
cookies = Cookies(cookies)
for cookie in cookies.jar:
self.jar.set_cookie(cookie)
def __setitem__(self, name: str, value: str) -> None:
return self.set(name, value)
def __getitem__(self, name: str) -> str:
value = self.get(name)
if value is None:
raise KeyError(name)
return value
def __delitem__(self, name: str) -> None:
return self.delete(name)
def __len__(self) -> int:
return len(self.jar)
def __iter__(self) -> Iterator[str]:
return (cookie.name for cookie in self.jar)
def __bool__(self) -> bool:
for _ in self.jar:
return True
return False
def __repr__(self) -> str:
cookies_repr = ", ".join(
[
f"<Cookie {cookie.name}={cookie.value} for {cookie.domain} />"
for cookie in self.jar
]
)
return f"<Cookies[{cookies_repr}]>"
@@ -0,0 +1,7 @@
# for compatibility with 0.5.x
__all__ = ["CurlError", "RequestsError", "CookieConflict", "SessionClosed"]
from ..curl import CurlError
from .exceptions import CookieConflict, SessionClosed
from .exceptions import RequestException as RequestsError
@@ -0,0 +1,227 @@
# Apache 2.0 License
# Vendored from https://github.com/psf/requests/blob/main/src/requests/exceptions.py
# With our own addtions
import json
from typing import Literal, Union
from ..const import CurlECode
from ..curl import CurlError
# Note IOError is an alias of OSError in Python 3.x
class RequestException(CurlError, OSError):
"""Base exception for curl_cffi.requests package"""
def __init__(
self,
msg,
code: Union[CurlECode, Literal[0]] = 0,
response=None,
*args,
**kwargs,
):
super().__init__(msg, code, *args, **kwargs)
self.response = response
class CookieConflict(RequestException):
"""Same cookie exists for different domains."""
class SessionClosed(RequestException):
"""The session has already been closed."""
class ImpersonateError(RequestException):
"""The impersonate config was wrong or impersonate failed."""
# not used
class InvalidJSONError(RequestException):
"""A JSON error occurred. not used"""
# not used
class JSONDecodeError(InvalidJSONError, json.JSONDecodeError):
"""Couldn't decode the text into json. not used"""
class HTTPError(RequestException):
"""An HTTP error occurred."""
class IncompleteRead(HTTPError):
"""Incomplete read of content"""
class ConnectionError(RequestException):
"""A Connection error occurred."""
class DNSError(ConnectionError):
"""Could not resolve"""
class ProxyError(RequestException):
"""A proxy error occurred."""
class SSLError(ConnectionError):
"""An SSL error occurred."""
class CertificateVerifyError(SSLError):
"""Raised when certificate validated has failed"""
class Timeout(RequestException):
"""The request timed out."""
# not used
class ConnectTimeout(ConnectionError, Timeout):
"""The request timed out while trying to connect to the remote server.
Requests that produced this error are safe to retry.
not used
"""
# not used
class ReadTimeout(Timeout):
"""The server did not send any data in the allotted amount of time. not used"""
# not used
class URLRequired(RequestException):
"""A valid URL is required to make a request. not used"""
class TooManyRedirects(RequestException):
"""Too many redirects."""
# not used
class MissingSchema(RequestException, ValueError):
"""The URL scheme (e.g. http or https) is missing. not used"""
class InvalidSchema(RequestException, ValueError):
"""The URL scheme provided is either invalid or unsupported. not used"""
class InvalidURL(RequestException, ValueError):
"""The URL provided was somehow invalid."""
# not used
class InvalidHeader(RequestException, ValueError):
"""The header value provided was somehow invalid. not used"""
# not used
class InvalidProxyURL(InvalidURL):
"""The proxy URL provided is invalid. not used"""
# not used
class ChunkedEncodingError(RequestException):
"""The server declared chunked encoding but sent an invalid chunk. not used"""
# not used
class ContentDecodingError(RequestException):
"""Failed to decode response content. not used"""
# not used
class StreamConsumedError(RequestException, TypeError):
"""The content for this response was already consumed. not used"""
# does not support
class RetryError(RequestException):
"""Custom retries logic failed. not used"""
# not used
class UnrewindableBodyError(RequestException):
"""Requests encountered an error when trying to rewind a body. not used"""
class InterfaceError(RequestException):
"""A specified outgoing interface could not be used."""
# Warnings
# TODO: use this warning as a base
class RequestsWarning(Warning):
"""Base warning for Requests. not used"""
# not used
class FileModeWarning(RequestsWarning, DeprecationWarning):
"""A file was opened in text mode, but Requests determined its binary length.
not used"""
# not used
class RequestsDependencyWarning(RequestsWarning):
"""An imported dependency doesn't match the expected version range."""
CODE2ERROR = {
0: RequestException,
CurlECode.UNSUPPORTED_PROTOCOL: InvalidSchema,
CurlECode.URL_MALFORMAT: InvalidURL,
CurlECode.COULDNT_RESOLVE_PROXY: ProxyError,
CurlECode.COULDNT_RESOLVE_HOST: DNSError,
CurlECode.COULDNT_CONNECT: ConnectionError,
CurlECode.WEIRD_SERVER_REPLY: ConnectionError,
CurlECode.REMOTE_ACCESS_DENIED: ConnectionError,
CurlECode.HTTP2: HTTPError,
CurlECode.HTTP_RETURNED_ERROR: HTTPError,
CurlECode.WRITE_ERROR: RequestException,
CurlECode.READ_ERROR: RequestException,
CurlECode.OUT_OF_MEMORY: RequestException,
CurlECode.OPERATION_TIMEDOUT: Timeout,
CurlECode.SSL_CONNECT_ERROR: SSLError,
CurlECode.INTERFACE_FAILED: InterfaceError,
CurlECode.TOO_MANY_REDIRECTS: TooManyRedirects,
CurlECode.UNKNOWN_OPTION: RequestException,
CurlECode.SETOPT_OPTION_SYNTAX: RequestException,
CurlECode.GOT_NOTHING: ConnectionError,
CurlECode.SSL_ENGINE_NOTFOUND: SSLError,
CurlECode.SSL_ENGINE_SETFAILED: SSLError,
CurlECode.SEND_ERROR: ConnectionError,
CurlECode.RECV_ERROR: ConnectionError,
CurlECode.SSL_CERTPROBLEM: SSLError,
CurlECode.SSL_CIPHER: SSLError,
CurlECode.PEER_FAILED_VERIFICATION: CertificateVerifyError,
CurlECode.BAD_CONTENT_ENCODING: HTTPError,
CurlECode.SSL_ENGINE_INITFAILED: SSLError,
CurlECode.SSL_CACERT_BADFILE: SSLError,
CurlECode.SSL_CRL_BADFILE: SSLError,
CurlECode.SSL_ISSUER_ERROR: SSLError,
CurlECode.SSL_PINNEDPUBKEYNOTMATCH: SSLError,
CurlECode.SSL_INVALIDCERTSTATUS: SSLError,
CurlECode.HTTP2_STREAM: HTTPError,
CurlECode.HTTP3: HTTPError,
CurlECode.QUIC_CONNECT_ERROR: ConnectionError,
CurlECode.PROXY: ProxyError,
CurlECode.SSL_CLIENTCERT: SSLError,
CurlECode.ECH_REQUIRED: SSLError,
CurlECode.PARTIAL_FILE: IncompleteRead,
}
# credits: https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/networking/_curlcffi.py#L241
# Unlicense
def code2error(code: Union[CurlECode, Literal[0]], msg: str):
if code == CurlECode.RECV_ERROR and "CONNECT" in msg:
return ProxyError
return CODE2ERROR.get(code, RequestException)
@@ -0,0 +1,347 @@
# Copied from: https://github.com/encode/httpx/blob/master/httpx/_models.py,
# which is licensed under the BSD License.
# See https://github.com/encode/httpx/blob/master/LICENSE.md
from collections.abc import (
ItemsView,
Iterable,
Iterator,
KeysView,
Mapping,
MutableMapping,
Sequence,
ValuesView,
)
from typing import Any, AnyStr, Optional, Union, cast
HeaderTypes = Union[
"Headers",
Mapping[str, Optional[str]],
Mapping[bytes, Optional[bytes]],
Sequence[tuple[str, str]],
Sequence[tuple[bytes, bytes]],
Sequence[Union[str, bytes]],
]
def to_str(value: Union[str, bytes], encoding: str = "utf-8") -> str:
return value if isinstance(value, str) else value.decode(encoding)
SENSITIVE_HEADERS = {"authorization", "proxy-authorization"}
def obfuscate_sensitive_headers(
items: Iterable[tuple[AnyStr, Optional[AnyStr]]],
) -> Iterator[tuple[AnyStr, Optional[AnyStr]]]:
for k, v in items:
if to_str(k.lower()) in SENSITIVE_HEADERS:
v = b"[secure]" if isinstance(v, bytes) else "[secure]" # type: ignore
yield k, v
def normalize_header_key(
value: Union[str, bytes],
lower: bool,
encoding: Optional[str] = None,
) -> bytes:
"""
Coerce str/bytes into a strictly byte-wise HTTP header key.
"""
bytes_value = (
value if isinstance(value, bytes) else value.encode(encoding or "ascii")
)
return bytes_value.lower() if lower else bytes_value
def normalize_header_value(
value: Union[str, bytes, int, None], encoding: Optional[str] = None
) -> Union[bytes, None]:
"""
Coerce str/bytes into a strictly byte-wise HTTP header value.
"""
if value is None:
return None
if isinstance(value, bytes):
return value
# The default encoding for header value should be latin-1
# See: RFC and https://github.com/python/cpython/blob/bc264eac3ad14dab748e33b3d714c2674872791f/Lib/http/client.py#L1309
if isinstance(value, int):
return str(value).encode()
return cast(str, value).encode(encoding or "latin-1")
class Headers(MutableMapping[str, Optional[str]]):
"""
HTTP headers, as a case-insensitive multi-dict.
"""
def __init__(
self, headers: Optional[HeaderTypes] = None, encoding: Optional[str] = None
):
self._list: list[tuple[bytes, bytes, Optional[bytes]]]
if isinstance(headers, Headers):
self._list = list(headers._list)
encoding = encoding or headers.encoding
elif not headers:
self._list = []
elif isinstance(headers, Mapping):
self._list = [
(
normalize_header_key(k, lower=False, encoding=encoding),
normalize_header_key(k, lower=True, encoding=encoding),
normalize_header_value(v, encoding),
)
for k, v in headers.items()
]
elif isinstance(headers, list):
# list of "Name: Value" pairs
if isinstance(headers[0], (str, bytes)):
sep = ":" if isinstance(headers[0], str) else b":"
h = []
for line in headers:
k, v = line.split(sep, maxsplit=1) # pyright: ignore
h.append((k, v.strip()))
# list of (Name, Value) pairs
elif isinstance(headers[0], tuple):
h = headers
self._list = [
(
normalize_header_key(k, lower=False, encoding=encoding),
normalize_header_key(k, lower=True, encoding=encoding),
normalize_header_value(v, encoding),
)
for k, v in h # pyright: ignore
]
self._encoding = encoding
@property
def encoding(self) -> str:
"""
Header encoding is mandated as ascii, but we allow fallbacks to utf-8
or iso-8859-1.
"""
if self._encoding is None:
for encoding in ["ascii", "utf-8"]:
for key, value in self.raw:
try:
key.decode(encoding)
value.decode(encoding) if value is not None else value
except UnicodeDecodeError:
break
else:
# The else block runs if 'break' did not occur, meaning
# all values fitted the encoding.
self._encoding = encoding
break
else:
# The ISO-8859-1 encoding covers all 256 code points in a byte,
# so will never raise decode errors.
self._encoding = "iso-8859-1"
return self._encoding
@encoding.setter
def encoding(self, value: str) -> None:
self._encoding = value
@property
def raw(self) -> list[tuple[bytes, Optional[bytes]]]:
"""
Returns a list of the raw header items, as byte pairs.
"""
return [(raw_key, value) for raw_key, _, value in self._list]
def keys(self) -> KeysView[str]:
return {key.decode(self.encoding): None for _, key, _ in self._list}.keys()
def values(self) -> ValuesView[Optional[str]]:
values_dict: dict[str, str] = {}
for _, key, value in self._list:
str_key = key.decode(self.encoding)
str_value = value.decode(self.encoding) if value is not None else "None"
if str_key in values_dict:
values_dict[str_key] += f", {str_value}"
else:
values_dict[str_key] = str_value
return values_dict.values()
def items(self) -> ItemsView[str, Optional[str]]:
"""
Return `(key, value)` items of headers. Concatenate headers
into a single comma separated value when a key occurs multiple times.
"""
values_dict: dict[str, str] = {}
for _, key, value in self._list:
str_key = key.decode(self.encoding)
str_value = value.decode(self.encoding) if value is not None else "None"
if str_key in values_dict:
values_dict[str_key] += f", {str_value}"
else:
values_dict[str_key] = str_value
return values_dict.items()
def multi_items(self) -> list[tuple[str, Optional[str]]]:
"""
Return a list of `(key, value)` pairs of headers. Allow multiple
occurrences of the same key without concatenating into a single
comma separated value.
"""
return [
(
key.decode(self.encoding),
value.decode(self.encoding) if value is not None else value,
)
for key, _, value in self._list
]
def get(self, key: str, default: Any = None) -> Any:
"""
Return a header value. If multiple occurrences of the header occur
then concatenate them together with commas.
"""
try:
return self[key]
except KeyError:
return default
def get_list(self, key: str, split_commas: bool = False) -> list[Optional[str]]:
"""
Return a list of all header values for a given key.
If `split_commas=True` is passed, then any comma separated header
values are split into multiple return strings.
"""
get_header_key = key.lower().encode(self.encoding)
values = [
item_value.decode(self.encoding) if item_value is not None else item_value
for _, item_key, item_value in self._list
if item_key.lower() == get_header_key
]
if not split_commas:
return values
split_values = []
for value in values:
split_values.extend([item.strip() for item in value.split(",")]) # type: ignore
return split_values
def update(self, headers: Optional[HeaderTypes] = None) -> None: # type: ignore
headers = Headers(headers)
for key in headers:
if key in self:
self.pop(key)
self._list.extend(headers._list)
def copy(self) -> "Headers":
return Headers(self, encoding=self.encoding)
def __getitem__(self, key: str) -> Optional[str]:
"""
Return a single header value.
If there are multiple headers with the same key, then we concatenate
them with commas. See: https://tools.ietf.org/html/rfc7230#section-3.2.2
"""
normalized_key = key.lower().encode(self.encoding)
items = [
header_value.decode(self.encoding)
if header_value is not None
else header_value
for _, header_key, header_value in self._list
if header_key == normalized_key
]
if items == [None]:
return None
if items:
return ", ".join([str(item) for item in items])
raise KeyError(key)
def __setitem__(self, key: str, value: Optional[str]) -> None:
"""
Set the header `key` to `value`, removing any duplicate entries.
Retains insertion order.
"""
set_key = key.encode(self._encoding or "utf-8")
set_value = (
value.encode(self._encoding or "utf-8") if value is not None else value
)
lookup_key = set_key.lower()
found_indexes = [
idx
for idx, (_, item_key, _) in enumerate(self._list)
if item_key == lookup_key
]
for idx in reversed(found_indexes[1:]):
del self._list[idx]
if found_indexes:
idx = found_indexes[0]
self._list[idx] = (set_key, lookup_key, set_value)
else:
self._list.append((set_key, lookup_key, set_value))
def __delitem__(self, key: str) -> None:
"""
Remove the header `key`.
"""
del_key = key.lower().encode(self.encoding)
pop_indexes = [
idx
for idx, (_, item_key, _) in enumerate(self._list)
if item_key.lower() == del_key
]
if not pop_indexes:
raise KeyError(key)
for idx in reversed(pop_indexes):
del self._list[idx]
def __contains__(self, key: Any) -> bool:
header_key = key.lower().encode(self.encoding)
return header_key in [key for _, key, _ in self._list]
def __iter__(self) -> Iterator[Any]:
return iter(self.keys())
def __len__(self) -> int:
return len(self._list)
def __eq__(self, other: Any) -> bool:
try:
other_headers = Headers(other)
except ValueError:
return False
self_list = [(key, value) for _, key, value in self._list]
other_list = [(key, value) for _, key, value in other_headers._list]
return sorted(self_list) == sorted(other_list)
def __repr__(self) -> str:
class_name = self.__class__.__name__
encoding_str = ""
if self.encoding != "ascii":
encoding_str = f", encoding={self.encoding!r}"
as_list = list(obfuscate_sensitive_headers(self.multi_items()))
as_dict = dict(as_list)
no_duplicate_keys = len(as_dict) == len(as_list)
if no_duplicate_keys:
return f"{class_name}({as_dict!r}{encoding_str})"
return f"{class_name}({as_list!r}{encoding_str})"
@@ -0,0 +1,435 @@
import warnings
from dataclasses import dataclass
from enum import Enum
from typing import Literal, Optional, TypedDict
from ..const import CurlOpt, CurlSslVersion
from ..utils import CurlCffiWarning
BrowserTypeLiteral = Literal[
# Edge
"edge99",
"edge101",
# Chrome
"chrome99",
"chrome100",
"chrome101",
"chrome104",
"chrome107",
"chrome110",
"chrome116",
"chrome119",
"chrome120",
"chrome123",
"chrome124",
"chrome131",
"chrome133a",
"chrome136",
"chrome99_android",
"chrome131_android",
# Safari
"safari153",
"safari155",
"safari170",
"safari172_ios",
"safari180",
"safari180_ios",
"safari184",
"safari184_ios",
"safari260",
"safari260_ios",
# Firefox
"firefox133",
"firefox135",
"tor145",
# alias
"chrome",
"edge",
"safari",
"safari_ios",
"safari_beta",
"safari_ios_beta",
"chrome_android",
"firefox",
# deprecated aliases
"safari15_3",
"safari15_5",
"safari17_0",
"safari17_2_ios",
"safari18_0",
"safari18_0_ios",
"safari18_4",
"safari18_4_ios",
# Canonical names
# "edge_99",
# "edge_101",
# "safari_15.3_macos",
# "safari_15.5_macos",
# "safari_17.2_ios",
# "safari_17.0_macos",
# "safari_18.0_ios",
# "safari_18.0_macos",
]
DEFAULT_CHROME = "chrome136"
DEFAULT_EDGE = "edge101"
DEFAULT_SAFARI = "safari184"
DEFAULT_SAFARI_IOS = "safari184_ios"
DEFAULT_SAFARI_BETA = "safari260"
DEFAULT_SAFARI_IOS_BETA = "safari260_ios"
DEFAULT_CHROME_ANDROID = "chrome131_android"
DEFAULT_FIREFOX = "firefox135"
DEFAULT_TOR = "tor145"
REAL_TARGET_MAP = {
"chrome": "chrome136",
"edge": "edge101",
"safari": "safari184",
"safari_ios": "safari184_ios",
"safari_beta": "safari260",
"safari_ios_beta": "safari260_ios",
"chrome_android": "chrome131_android",
"firefox": "firefox135",
"tor": "tor145",
}
def normalize_browser_type(item):
if item == "chrome": # noqa: SIM116
return DEFAULT_CHROME
elif item == "edge":
return DEFAULT_EDGE
elif item == "safari":
return DEFAULT_SAFARI
elif item == "safari_ios":
return DEFAULT_SAFARI_IOS
elif item == "safari_beta":
return DEFAULT_SAFARI_BETA
elif item == "safari_ios_beta":
return DEFAULT_SAFARI_IOS_BETA
elif item == "chrome_android":
return DEFAULT_CHROME_ANDROID
elif item == "firefox":
return DEFAULT_FIREFOX
elif item == "tor":
return DEFAULT_TOR
else:
return item
class BrowserType(str, Enum): # TODO: remove in version 1.x
edge99 = "edge99"
edge101 = "edge101"
chrome99 = "chrome99"
chrome100 = "chrome100"
chrome101 = "chrome101"
chrome104 = "chrome104"
chrome107 = "chrome107"
chrome110 = "chrome110"
chrome116 = "chrome116"
chrome119 = "chrome119"
chrome120 = "chrome120"
chrome123 = "chrome123"
chrome124 = "chrome124"
chrome131 = "chrome131"
chrome133a = "chrome133a"
chrome136 = "chrome136"
chrome99_android = "chrome99_android"
chrome131_android = "chrome131_android"
safari153 = "safari153"
safari155 = "safari155"
safari170 = "safari170"
safari172_ios = "safari172_ios"
safari180 = "safari180"
safari180_ios = "safari180_ios"
safari184 = "safari184"
safari184_ios = "safari184_ios"
safari260 = "safari260"
safari260_ios = "safari260_ios"
firefox133 = "firefox133"
firefox135 = "firefox135"
tor145 = "tor145"
# deprecated aliases
safari15_3 = "safari15_3"
safari15_5 = "safari15_5"
safari17_0 = "safari17_0"
safari17_2_ios = "safari17_2_ios"
safari18_0 = "safari18_0"
safari18_0_ios = "safari18_0_ios"
@dataclass
class ExtraFingerprints:
tls_min_version: int = CurlSslVersion.TLSv1_2
tls_grease: bool = False
tls_permute_extensions: bool = False
tls_cert_compression: Literal["zlib", "brotli"] = "brotli"
tls_signature_algorithms: Optional[list[str]] = None
tls_delegated_credential: str = ""
tls_record_size_limit: int = 0
http2_stream_weight: int = 256
http2_stream_exclusive: int = 1
http2_no_priority: bool = False
class ExtraFpDict(TypedDict, total=False):
tls_min_version: int
tls_grease: bool
tls_permute_extensions: bool
tls_cert_compression: Literal["zlib", "brotli"]
tls_signature_algorithms: Optional[list[str]]
tls_delegated_credential: str
tls_record_size_limit: int
http2_stream_weight: int
http2_stream_exclusive: int
http2_no_priority: bool
# TLS version are in the format of 0xAABB, where AA is major version and BB is minor
# version. As of today, the major version is always 03.
TLS_VERSION_MAP = {
0x0301: CurlSslVersion.TLSv1_0, # 769
0x0302: CurlSslVersion.TLSv1_1, # 770
0x0303: CurlSslVersion.TLSv1_2, # 771
0x0304: CurlSslVersion.TLSv1_3, # 772
}
# A list of the possible cipher suite ids. Taken from
# http://www.iana.org/assignments/tls-parameters/tls-parameters.xml
# via BoringSSL
TLS_CIPHER_NAME_MAP = {
0x000A: "TLS_RSA_WITH_3DES_EDE_CBC_SHA",
0x002F: "TLS_RSA_WITH_AES_128_CBC_SHA",
0x0033: "TLS_DHE_RSA_WITH_AES_128_CBC_SHA",
0x0035: "TLS_RSA_WITH_AES_256_CBC_SHA",
0x0039: "TLS_DHE_RSA_WITH_AES_256_CBC_SHA",
0x003C: "TLS_RSA_WITH_AES_128_CBC_SHA256",
0x003D: "TLS_RSA_WITH_AES_256_CBC_SHA256",
0x0067: "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
0x006B: "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",
0x008C: "TLS_PSK_WITH_AES_128_CBC_SHA",
0x008D: "TLS_PSK_WITH_AES_256_CBC_SHA",
0x009C: "TLS_RSA_WITH_AES_128_GCM_SHA256",
0x009D: "TLS_RSA_WITH_AES_256_GCM_SHA384",
0x009E: "TLS_DHE_RSA_WITH_AES_128_GCM_SHA256",
0x009F: "TLS_DHE_RSA_WITH_AES_256_GCM_SHA384",
0x1301: "TLS_AES_128_GCM_SHA256",
0x1302: "TLS_AES_256_GCM_SHA384",
0x1303: "TLS_CHACHA20_POLY1305_SHA256",
0xC008: "TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA",
0xC009: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
0xC00A: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
0xC012: "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
0xC013: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
0xC014: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
0xC023: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
0xC024: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",
0xC027: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
0xC028: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
0xC02B: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
0xC02C: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
0xC02F: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
0xC030: "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
0xC035: "TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA",
0xC036: "TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA",
0xCCA8: "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256",
0xCCA9: "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256",
0xCCAC: "TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256",
}
# RFC tls extensions: https://datatracker.ietf.org/doc/html/rfc6066
# IANA list: https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml
TLS_EXTENSION_NAME_MAP = {
0: "server_name",
1: "max_fragment_length",
2: "client_certificate_url",
3: "trusted_ca_keys",
4: "truncated_hmac",
5: "status_request",
6: "user_mapping",
7: "client_authz",
8: "server_authz",
9: "cert_type",
10: "supported_groups", # (renamed from "elliptic_curves")
11: "ec_point_formats",
12: "srp",
13: "signature_algorithms",
14: "use_srtp",
15: "heartbeat",
16: "application_layer_protocol_negotiation",
17: "status_request_v2",
18: "signed_certificate_timestamp",
19: "client_certificate_type",
20: "server_certificate_type",
21: "padding",
22: "encrypt_then_mac",
23: "extended_master_secret",
24: "token_binding",
25: "cached_info",
26: "tls_lts",
27: "compress_certificate",
28: "record_size_limit",
29: "pwd_protect",
30: "pwd_clear",
31: "password_salt",
32: "ticket_pinning",
33: "tls_cert_with_extern_psk",
34: "delegated_credential",
35: "session_ticket", # (renamed from "SessionTicket TLS")
36: "TLMSP",
37: "TLMSP_proxying",
38: "TLMSP_delegate",
39: "supported_ekt_ciphers",
# 40:"Reserved",
41: "pre_shared_key",
42: "early_data",
43: "supported_versions",
44: "cookie",
45: "psk_key_exchange_modes",
# 46:"Reserved",
47: "certificate_authorities",
48: "oid_filters",
49: "post_handshake_auth",
50: "signature_algorithms_cert",
51: "key_share",
52: "transparency_info",
# 53:"connection_id", # (deprecated)
54: "connection_id",
55: "external_id_hash",
56: "external_session_id",
57: "quic_transport_parameters",
58: "ticket_request",
59: "dnssec_chain",
60: "sequence_number_encryption_algorithms",
61: "rrc",
17513: "application_settings", # BoringSSL private usage
17613: "application_settings new", # BoringSSL private usage
# 62-2569:"Unassigned
# 2570:"Reserved
# 2571-6681:"Unassigned
# 6682:"Reserved
# 6683-10793:"Unassigned
# 10794:"Reserved
# 10795-14905:"Unassigned
# 14906:"Reserved
# 14907-19017:"Unassigned
# 19018:"Reserved
# 19019-23129:"Unassigned
# 23130:"Reserved
# 23131-27241:"Unassigned
# 27242:"Reserved
# 27243-31353:"Unassigned
# 31354:"Reserved
# 31355-35465:"Unassigned
# 35466:"Reserved
# 35467-39577:"Unassigned
# 39578:"Reserved
# 39579-43689:"Unassigned
# 43690:"Reserved
# 43691-47801:"Unassigned
# 47802:"Reserved
# 47803-51913:"Unassigned
# 51914:"Reserved
# 51915-56025:"Unassigned
# 56026:"Reserved
# 56027-60137:"Unassigned
# 60138:"Reserved
# 60139-64249:"Unassigned
# 64250:"Reserved
# 64251-64767:"Unassigned
64768: "ech_outer_extensions",
# 64769-65036:"Unassigned
65037: "encrypted_client_hello",
# 65038-65279:"Unassigned
# 65280:"Reserved for Private Use
65281: "renegotiation_info",
# 65282-65535:"Reserved for Private Use
}
TLS_EC_CURVES_MAP = {
19: "P-192",
21: "P-224",
23: "P-256",
24: "P-384",
25: "P-521",
29: "X25519",
256: "ffdhe2048",
257: "ffdhe3072",
4588: "X25519MLKEM768",
25497: "X25519Kyber768Draft00",
}
def toggle_extension(curl, extension_id: int, enable: bool):
# ECH
if extension_id == 65037:
if enable:
curl.setopt(CurlOpt.ECH, "grease")
else:
curl.setopt(CurlOpt.ECH, "")
# compress certificate
elif extension_id == 27:
if enable:
warnings.warn(
"Cert compression setting to brotli, "
"you had better specify which to use: zlib/brotli",
CurlCffiWarning,
stacklevel=1,
)
curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, "brotli")
else:
curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, "")
# ALPS: application settings
elif extension_id == 17513:
if enable:
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 1)
else:
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 0)
elif extension_id == 17613:
if enable:
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 1)
curl.setopt(CurlOpt.TLS_USE_NEW_ALPS_CODEPOINT, 1)
else:
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 0)
curl.setopt(CurlOpt.TLS_USE_NEW_ALPS_CODEPOINT, 0)
# server_name
elif extension_id == 0:
raise NotImplementedError(
"It's unlikely that the server_name(0) extension being changed."
)
# ALPN
elif extension_id == 16:
if enable:
curl.setopt(CurlOpt.SSL_ENABLE_ALPN, 1)
else:
curl.setopt(CurlOpt.SSL_ENABLE_ALPN, 0)
# status_request
elif extension_id == 5:
if enable:
curl.setopt(CurlOpt.TLS_STATUS_REQUEST, 1)
# signed_certificate_timestamps
elif extension_id == 18:
if enable:
curl.setopt(CurlOpt.TLS_SIGNED_CERT_TIMESTAMPS, 1)
# session_ticket
elif extension_id == 35:
if enable:
curl.setopt(CurlOpt.SSL_ENABLE_TICKET, 1)
else:
curl.setopt(CurlOpt.SSL_ENABLE_TICKET, 0)
# padding, should be ignored
elif extension_id == 21:
pass # type: ignore
# firefox extension, toggled by extra_fp
elif extension_id in [34, 28]:
pass
else:
raise NotImplementedError(
f"This extension({extension_id}) can not be toggled for now, it may be "
"updated later."
)
@@ -0,0 +1,314 @@
from contextlib import suppress
import queue
import re
import warnings
from concurrent.futures import Future
from typing import Any, Callable, Optional, Union
from collections.abc import Awaitable
from ..curl import Curl
from ..utils import CurlCffiWarning
from .cookies import Cookies
from .exceptions import HTTPError, RequestException
from .headers import Headers
# Use orjson if present
try:
from orjson import loads
except ImportError:
from json import loads
with suppress(ImportError):
from markdownify import markdownify as md
import readability as rd
CHARSET_RE = re.compile(r"charset=([\w-]+)")
STREAM_END = object()
def clear_queue(q: queue.Queue):
with q.mutex:
q.queue.clear()
q.all_tasks_done.notify_all()
q.unfinished_tasks = 0
class Request:
"""Representing a sent request."""
def __init__(self, url: str, headers: Headers, method: str):
self.url = url
self.headers = headers
self.method = method
class Response:
"""Contains information the server sends.
Attributes:
url: url used in the request.
content: response body in bytes.
text: response body in str.
status_code: http status code.
reason: http response reason, such as OK, Not Found.
ok: is status_code in [200, 400)?
headers: response headers.
cookies: response cookies.
elapsed: how many seconds the request cost.
encoding: http body encoding.
charset: alias for encoding.
primary_ip: primary ip of the server.
primary_port: primary port of the server.
local_ip: local ip used in this connection.
local_port: local port used in this connection.
charset_encoding: encoding specified by the Content-Type header.
default_encoding: encoding for decoding response content if charset is not found
in headers. Defaults to "utf-8". Can be set to a callable for automatic
detection.
redirect_count: how many redirects happened.
redirect_url: the final redirected url.
http_version: http version used.
history: history redirections, only headers are available.
"""
def __init__(self, curl: Optional[Curl] = None, request: Optional[Request] = None):
self.curl = curl
self.request = request
self.url = ""
self.content = b""
self.status_code = 200
self.reason = "OK"
self.ok = True
self.headers = Headers()
self.cookies = Cookies()
self.elapsed = 0.0
self.default_encoding: Union[str, Callable[[bytes], str]] = "utf-8"
self.redirect_count = 0
self.redirect_url = ""
self.http_version = 0
self.primary_ip: str = ""
self.primary_port: int = 0
self.local_ip: str = ""
self.local_port: int = 0
self.history: list[dict[str, Any]] = []
self.infos: dict[str, Any] = {}
self.queue: Optional[queue.Queue] = None
self.stream_task: Optional[Future] = None
self.astream_task: Optional[Awaitable] = None
self.quit_now = None
@property
def charset(self) -> str:
"""Alias for encoding."""
return self.encoding
@property
def encoding(self) -> str:
"""
Determines the encoding to decode byte content into text.
The method follows a specific priority to decide the encoding:
1. If ``.encoding`` has been explicitly set, it is used.
2. The encoding specified by the ``charset`` parameter in the ``Content-Type``
header.
3. The encoding specified by the ``default_encoding`` attribute. This can either
be a string (e.g., "utf-8") or a callable for charset autodetection.
"""
if not hasattr(self, "_encoding"):
encoding = self.charset_encoding
if encoding is None:
if isinstance(self.default_encoding, str):
encoding = self.default_encoding
elif callable(self.default_encoding):
encoding = self.default_encoding(self.content)
self._encoding = encoding or "utf-8"
return self._encoding
@encoding.setter
def encoding(self, value: str) -> None:
if hasattr(self, "_text"):
raise ValueError("Cannot set encoding after text has been accessed")
self._encoding = value
@property
def charset_encoding(self) -> Optional[str]:
"""Return the encoding, as specified by the Content-Type header."""
content_type = self.headers.get("Content-Type")
if content_type:
charset_match = CHARSET_RE.search(content_type)
return charset_match.group(1) if charset_match else None
return None
@property
def text(self) -> str:
if not hasattr(self, "_text"):
if not self.content:
self._text = ""
else:
self._text = self._decode(self.content)
return self._text
def markdown(self) -> str:
doc = rd.Document(self.content)
title = doc.title()
summary = doc.summary(html_partial=True)
body_as_md = md(f"<h1>{title}</h1><main>{summary}</main>")
return body_as_md
def _decode(self, content: bytes) -> str:
try:
return content.decode(self.encoding, errors="replace")
except (UnicodeDecodeError, LookupError):
return content.decode("utf-8-sig")
def raise_for_status(self):
"""Raise an error if status code is not in [200, 400)"""
if not self.ok:
raise HTTPError(f"HTTP Error {self.status_code}: {self.reason}", 0, self)
def iter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None):
"""
iterate streaming content line by line, separated by ``\\n``.
Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/
which is under the License: Apache 2.0
"""
pending = None
for chunk in self.iter_content(
chunk_size=chunk_size, decode_unicode=decode_unicode
):
if pending is not None:
chunk = pending + chunk
lines = chunk.split(delimiter) if delimiter else chunk.splitlines()
pending = (
lines.pop()
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]
else None
)
yield from lines
if pending is not None:
yield pending
def iter_content(self, chunk_size=None, decode_unicode=False):
"""
iterate streaming content chunk by chunk in bytes.
"""
if chunk_size:
warnings.warn(
"chunk_size is ignored, there is no way to tell curl that.",
CurlCffiWarning,
stacklevel=2,
)
if decode_unicode:
raise NotImplementedError()
assert self.queue and self.curl, "stream mode is not enabled."
while True:
chunk = self.queue.get()
# re-raise the exception if something wrong happened.
if isinstance(chunk, RequestException):
self.curl.reset()
raise chunk
# end of stream.
if chunk is STREAM_END:
break
yield chunk
def json(self, **kw):
"""return a parsed json object of the content."""
return loads(self.content, **kw)
def close(self):
"""Close the streaming connection, only valid in stream mode."""
if self.quit_now:
self.quit_now.set()
if self.stream_task:
self.stream_task.result()
async def aiter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None):
"""
iterate streaming content line by line, separated by ``\\n``.
Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/
which is under the License: Apache 2.0
"""
pending = None
async for chunk in self.aiter_content(
chunk_size=chunk_size, decode_unicode=decode_unicode
):
if pending is not None:
chunk = pending + chunk
lines = chunk.split(delimiter) if delimiter else chunk.splitlines()
pending = (
lines.pop()
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]
else None
)
for line in lines:
yield line
if pending is not None:
yield pending
async def aiter_content(self, chunk_size=None, decode_unicode=False):
"""
iterate streaming content chunk by chunk in bytes.
"""
if chunk_size:
warnings.warn(
"chunk_size is ignored, there is no way to tell curl that.",
CurlCffiWarning,
stacklevel=2,
)
if decode_unicode:
raise NotImplementedError()
assert self.queue and self.curl, "stream mode is not enabled."
while True:
chunk = await self.queue.get()
# re-raise the exception if something wrong happened.
if isinstance(chunk, RequestException):
await self.aclose()
raise chunk
# end of stream.
if chunk is STREAM_END:
await self.aclose()
return
yield chunk
async def atext(self) -> str:
"""
Return a decoded string.
"""
return self._decode(await self.acontent())
async def acontent(self) -> bytes:
"""wait and read the streaming content in one bytes object."""
chunks = []
async for chunk in self.aiter_content():
chunks.append(chunk)
return b"".join(chunks)
async def aclose(self):
"""Close the streaming connection, only valid in stream mode."""
if self.astream_task:
await self.astream_task
# It prints the status code of the response instead of the object's memory location.
def __repr__(self) -> str:
return f"<Response [{self.status_code}]>"
@@ -0,0 +1,698 @@
from __future__ import annotations
__all__ = ["HttpVersionLiteral", "set_curl_options", "not_set"]
import asyncio
import math
import queue
import warnings
from collections import Counter
from io import BytesIO
from json import dumps
from typing import TYPE_CHECKING, Any, Callable, Final, Literal, Optional, Union, cast
from urllib.parse import ParseResult, parse_qsl, quote, urlencode, urljoin, urlparse
from ..const import CurlHttpVersion, CurlOpt, CurlSslVersion
from ..curl import CURL_WRITEFUNC_ERROR, CurlMime
from ..utils import CurlCffiWarning
from .cookies import Cookies
from .exceptions import ImpersonateError, InvalidURL
from .headers import Headers
from .impersonate import (
TLS_CIPHER_NAME_MAP,
TLS_EC_CURVES_MAP,
TLS_VERSION_MAP,
ExtraFingerprints,
normalize_browser_type,
toggle_extension,
)
from .models import Request
if TYPE_CHECKING:
from ..curl import Curl
from .cookies import CookieTypes
from .headers import HeaderTypes
from .impersonate import BrowserTypeLiteral, ExtraFpDict
from .session import ProxySpec
HttpMethod = Literal[
"GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH", "QUERY"
]
HttpVersionLiteral = Literal["v1", "v2", "v2tls", "v2_prior_knowledge", "v3", "v3only"]
SAFE_CHARS = set("!#$%&'()*+,/:;=?@[]~")
not_set: Final[Any] = object()
# ruff: noqa: SIM116
def normalize_http_version(
version: Union[CurlHttpVersion, HttpVersionLiteral],
) -> CurlHttpVersion:
if version == "v1":
return CurlHttpVersion.V1_1
elif version == "v3":
return CurlHttpVersion.V3
elif version == "v3only":
return CurlHttpVersion.V3ONLY
elif version == "v2":
return CurlHttpVersion.V2_0
elif version == "v2tls":
return CurlHttpVersion.V2TLS
elif version == "v2_prior_knowledge":
return CurlHttpVersion.V2_PRIOR_KNOWLEDGE
return version # type: ignore
def is_absolute_url(url: str) -> bool:
"""Check if the provided url is an absolute url"""
parsed_url = urlparse(url)
return bool(parsed_url.scheme and parsed_url.hostname)
def quote_path_and_params(url: str, quote_str: str = ""):
safe = "".join(SAFE_CHARS - set(quote_str))
parsed_url = urlparse(url)
parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
encoded_get_args = urlencode(parsed_get_args, doseq=True, safe=safe)
return ParseResult(
parsed_url.scheme,
parsed_url.netloc,
quote(parsed_url.path, safe=safe),
parsed_url.params,
encoded_get_args,
parsed_url.fragment,
).geturl()
def update_url_params(url: str, params: Union[dict, list, tuple]) -> str:
"""Add URL query params to provided URL being aware of existing.
Args:
url: string of target URL
params: dict containing requested params to be added
Returns:
string with updated URL
>> url = 'http://stackoverflow.com/test?answers=true'
>> new_params = {'answers': False, 'data': ['some','values']}
>> update_url_params(url, new_params)
'http://stackoverflow.com/test?data=some&data=values&answers=false'
"""
# No need to unquote, since requote_uri will be called later.
parsed_url = urlparse(url)
# Extracting URL arguments from parsed URL, NOTE the result is a list, not dict
parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
# Merging URL arguments dict with new params
old_args_counter = Counter(x[0] for x in parsed_get_args)
if isinstance(params, dict):
params = list(params.items())
new_args_counter = Counter(x[0] for x in params)
for key, value in params:
# Bool and Dict values should be converted to json-friendly values
if isinstance(value, (bool, dict)):
value = dumps(value)
# 1 to 1 mapping, we have to search and update it.
if old_args_counter.get(key) == 1 and new_args_counter.get(key) == 1:
parsed_get_args = [
(x if x[0] != key else (key, value)) for x in parsed_get_args
]
else:
parsed_get_args.append((key, value))
# Converting URL argument to proper query string
encoded_get_args = urlencode(parsed_get_args, doseq=True)
# Creating new parsed result object based on provided with new
# URL arguments. Same thing happens inside of urlparse.
new_url = ParseResult(
parsed_url.scheme,
parsed_url.netloc,
parsed_url.path,
parsed_url.params,
encoded_get_args,
parsed_url.fragment,
).geturl()
return new_url
# Adapted from: https://github.com/psf/requests/blob/1ae6fc3137a11e11565ed22436aa1e77277ac98c/src%2Frequests%2Futils.py#L633-L682
# License: Apache 2.0
# The unreserved URI characters (RFC 3986)
UNRESERVED_SET = frozenset(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
)
def unquote_unreserved(uri: str) -> str:
"""Un-escape any percent-escape sequences in a URI that are unreserved
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
"""
parts = uri.split("%")
for i in range(1, len(parts)):
h = parts[i][0:2]
if len(h) == 2 and h.isalnum():
try:
c = chr(int(h, 16))
except ValueError as e:
raise InvalidURL(f"Invalid percent-escape sequence: '{h}'") from e
if c in UNRESERVED_SET:
parts[i] = c + parts[i][2:]
else:
parts[i] = f"%{parts[i]}"
else:
parts[i] = f"%{parts[i]}"
return "".join(parts)
def requote_uri(uri: str) -> str:
"""Re-quote the given URI.
This function passes the given URI through an unquote/quote cycle to
ensure that it is fully and consistently quoted.
"""
safe_with_percent = "!#$%&'()*+,/:;=?@[]~|"
safe_without_percent = "!#$&'()*+,/:;=?@[]~|"
try:
# Unquote only the unreserved characters
# Then quote only illegal characters (do not quote reserved,
# unreserved, or '%')
return quote(unquote_unreserved(uri), safe=safe_with_percent)
except InvalidURL:
# We couldn't unquote the given URI, so let's try quoting it, but
# there may be unquoted '%'s in the URI. We need to make sure they're
# properly quoted so they do not cause issues elsewhere.
return quote(uri, safe=safe_without_percent)
# TODO: should we move this function to headers.py?
def update_header_line(
header_lines: list[str], key: str, value: str, replace: bool = False
):
"""Update header line list by key value pair."""
found = False
for idx, line in enumerate(header_lines):
if line.lower().startswith(key.lower() + ":"):
found = True
if replace:
header_lines[idx] = f"{key}: {value}"
break
if not found:
header_lines.append(f"{key}: {value}")
def peek_queue(q: queue.Queue, default=None):
try:
return q.queue[0]
except IndexError:
return default
def peek_aio_queue(q: asyncio.Queue, default=None):
try:
return q._queue[0] # type: ignore
except IndexError:
return default
def toggle_extensions_by_ids(curl: Curl, extension_ids):
# TODO: find a better representation, rather than magic numbers
default_enabled = {0, 10, 11, 13, 16, 23, 35, 43, 45, 51, 65281}
to_enable_ids = extension_ids - default_enabled
for ext_id in to_enable_ids:
toggle_extension(curl, ext_id, enable=True)
# print("to_enable: ", to_enable_ids)
to_disable_ids = default_enabled - extension_ids
for ext_id in to_disable_ids:
toggle_extension(curl, ext_id, enable=False)
# print("to_disable: ", to_disable_ids)
def set_ja3_options(curl: Curl, ja3: str, permute: bool = False):
"""
Detailed explanation: https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
"""
tls_version, ciphers, extensions, curves, curve_formats = ja3.split(",")
curl_tls_version = TLS_VERSION_MAP[int(tls_version)]
curl.setopt(CurlOpt.SSLVERSION, curl_tls_version | CurlSslVersion.MAX_DEFAULT)
assert curl_tls_version == CurlSslVersion.TLSv1_2, "Only TLS v1.2 works for now."
cipher_names = []
for cipher in ciphers.split("-"):
cipher_id = int(cipher)
cipher_name = TLS_CIPHER_NAME_MAP.get(cipher_id)
if not cipher_name:
raise ImpersonateError(f"Cipher {hex(cipher_id)} is not found")
cipher_names.append(cipher_name)
curl.setopt(CurlOpt.SSL_CIPHER_LIST, ":".join(cipher_names))
if extensions.endswith("-21"):
extensions = extensions[:-3]
warnings.warn(
"Padding(21) extension found in ja3 string, whether to add it should "
"be managed by the SSL engine. The TLS client hello packet may contain "
"or not contain this extension, any of which should be correct.",
CurlCffiWarning,
stacklevel=1,
)
extension_ids = set(int(e) for e in extensions.split("-"))
toggle_extensions_by_ids(curl, extension_ids)
if not permute:
curl.setopt(CurlOpt.TLS_EXTENSION_ORDER, extensions)
curve_names = []
for curve in curves.split("-"):
curve_id = int(curve)
curve_name = TLS_EC_CURVES_MAP[curve_id]
curve_names.append(curve_name)
curl.setopt(CurlOpt.SSL_EC_CURVES, ":".join(curve_names))
assert int(curve_formats) == 0, "Only curve_formats == 0 is supported."
def set_akamai_options(curl: Curl, akamai: str):
"""
Detailed explanation: https://www.blackhat.com/docs/eu-17/materials/eu-17-Shuster-Passive-Fingerprinting-Of-HTTP2-Clients-wp.pdf
"""
settings, window_update, streams, header_order = akamai.split("|")
# For compatiblity with tls.peet.ws
settings = settings.replace(",", ";")
curl.setopt(CurlOpt.HTTP_VERSION, CurlHttpVersion.V2_0)
curl.setopt(CurlOpt.HTTP2_SETTINGS, settings)
curl.setopt(CurlOpt.HTTP2_WINDOW_UPDATE, int(window_update))
if streams != "0":
curl.setopt(CurlOpt.HTTP2_STREAMS, streams)
# m,a,s,p -> masp
# curl-impersonate only accepts masp format, without commas.
curl.setopt(CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, header_order.replace(",", ""))
def set_extra_fp(curl: Curl, fp: ExtraFingerprints):
if fp.tls_signature_algorithms:
curl.setopt(CurlOpt.SSL_SIG_HASH_ALGS, ",".join(fp.tls_signature_algorithms))
curl.setopt(CurlOpt.SSLVERSION, fp.tls_min_version | CurlSslVersion.MAX_DEFAULT)
curl.setopt(CurlOpt.TLS_GREASE, int(fp.tls_grease))
curl.setopt(CurlOpt.SSL_PERMUTE_EXTENSIONS, int(fp.tls_permute_extensions))
curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, fp.tls_cert_compression)
curl.setopt(CurlOpt.STREAM_WEIGHT, fp.http2_stream_weight)
curl.setopt(CurlOpt.STREAM_EXCLUSIVE, fp.http2_stream_exclusive)
if fp.tls_delegated_credential:
curl.setopt(CurlOpt.TLS_DELEGATED_CREDENTIALS, fp.tls_delegated_credential)
if fp.tls_record_size_limit:
curl.setopt(CurlOpt.TLS_RECORD_SIZE_LIMIT, fp.tls_record_size_limit)
if fp.http2_no_priority:
curl.setopt(CurlOpt.HTTP2_NO_PRIORITY, fp.http2_no_priority)
def set_curl_options(
curl: Curl,
method: HttpMethod,
url: str,
*,
params_list: list[Union[dict, list, tuple, None]] = [], # noqa: B006
base_url: Optional[str] = None,
data: Optional[Union[dict[str, str], list[tuple], str, BytesIO, bytes]] = None,
json: Optional[dict | list] = None,
headers_list: list[Optional[HeaderTypes]] = [], # noqa: B006
cookies_list: list[Optional[CookieTypes]] = [], # noqa: B006
files: Optional[dict] = None,
auth: Optional[tuple[str, str]] = None,
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
allow_redirects: Optional[bool] = True,
max_redirects: Optional[int] = 30,
proxies_list: list[Optional[ProxySpec]] = [], # noqa: B006
proxy: Optional[str] = None,
proxy_auth: Optional[tuple[str, str]] = None,
verify_list: list[Union[bool, str, None]] = [], # noqa: B006
referer: Optional[str] = None,
accept_encoding: Optional[str] = "gzip, deflate, br, zstd",
content_callback: Optional[Callable] = None,
impersonate: Optional[Union[BrowserTypeLiteral, str]] = None,
ja3: Optional[str] = None,
akamai: Optional[str] = None,
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
default_headers: bool = True,
quote: Union[str, Literal[False]] = "",
http_version: Optional[Union[CurlHttpVersion, HttpVersionLiteral]] = None,
interface: Optional[str] = None,
cert: Optional[Union[str, tuple[str, str]]] = None,
stream: Optional[bool] = None,
max_recv_speed: int = 0,
multipart: Optional[CurlMime] = None,
queue_class: Any = None,
event_class: Any = None,
curl_options: Optional[dict[CurlOpt, str]] = None,
):
c = curl
method = method.upper() # type: ignore
# method
if method == "POST":
c.setopt(CurlOpt.POST, 1)
elif method != "GET":
c.setopt(CurlOpt.CUSTOMREQUEST, method.encode())
if method == "HEAD":
c.setopt(CurlOpt.NOBODY, 1)
# url
base_params, params = params_list
if base_params:
url = update_url_params(url, base_params)
if params:
url = update_url_params(url, params)
if base_url:
url = urljoin(base_url, url)
if quote:
url = quote_path_and_params(url, quote_str=quote)
if quote is not False:
url = requote_uri(url)
c.setopt(CurlOpt.URL, url.encode())
# data/body/json
if isinstance(data, (dict, list, tuple)):
body = urlencode(data).encode()
elif isinstance(data, str):
body = data.encode()
elif isinstance(data, BytesIO):
body = data.read()
elif isinstance(data, bytes):
body = data
elif data is None:
body = b""
else:
raise TypeError("data must be dict/list/tuple, str, BytesIO or bytes")
if json is not None:
body = dumps(json, separators=(",", ":")).encode()
# Tell libcurl to be aware of bodies and related headers when,
# 1. POST/PUT/PATCH, even if the body is empty, it's up to curl to decide what to do
# 2. GET/DELETE with body, although it's against the RFC, some applications.
# e.g. Elasticsearch, use this.
if body or method in ("POST", "PUT", "PATCH"):
c.setopt(CurlOpt.POSTFIELDS, body)
# necessary if body contains '\0'
c.setopt(CurlOpt.POSTFIELDSIZE, len(body))
if method == "GET":
c.setopt(CurlOpt.CUSTOMREQUEST, method)
# headers
base_headers, headers = headers_list
# let headers encoding take precedence over base headers encoding
encoding = headers.encoding if isinstance(headers, Headers) else None
h = Headers(base_headers, encoding=encoding)
h.update(headers)
# remove Host header if it's unnecessary, otherwise curl may get confused.
# Host header will be automatically added by curl if it's not present.
# https://github.com/lexiforest/curl_cffi/issues/119
host_header = h.get("Host")
if host_header is not None:
u = urlparse(url)
if host_header == u.netloc or host_header == u.hostname:
h.pop("Host", None)
# Make curl always include empty headers.
# See: https://stackoverflow.com/a/32911474/1061155
header_lines = []
for k, v in h.multi_items():
if v is None:
header_lines.append(f"{k}:") # Explictly disable this header
elif v == "":
header_lines.append(f"{k};") # Add an empty valued header
else:
header_lines.append(f"{k}: {v}")
# Add content-type if missing
if json is not None:
update_header_line(header_lines, "Content-Type", "application/json")
if isinstance(data, dict) and method != "POST":
update_header_line(
header_lines, "Content-Type", "application/x-www-form-urlencoded"
)
if isinstance(data, (str, bytes)):
update_header_line(header_lines, "Content-Type", "application/octet-stream")
# Never send `Expect` header.
update_header_line(header_lines, "Expect", "", replace=True)
c.setopt(CurlOpt.HTTPHEADER, [h.encode() for h in header_lines])
req = Request(url, h, method)
# cookies
c.setopt(CurlOpt.COOKIEFILE, b"") # always enable the curl cookie engine first
c.setopt(CurlOpt.COOKIELIST, "ALL") # remove all the old cookies first.
base_cookies, cookies = cookies_list
if base_cookies:
for morsel in base_cookies.get_cookies_for_curl(req): # type: ignore
curl.setopt(CurlOpt.COOKIELIST, morsel.to_curl_format())
if cookies:
temp_cookies = Cookies(cookies)
for morsel in temp_cookies.get_cookies_for_curl(req):
curl.setopt(CurlOpt.COOKIELIST, morsel.to_curl_format())
# files
if files:
raise NotImplementedError(
"files is not supported, use `multipart`. See examples here: "
"https://github.com/lexiforest/curl_cffi/blob/main/examples/upload.py"
)
# multipart
if multipart:
# multipart will overrides postfields
for k, v in cast(dict, data or {}).items():
multipart.addpart(name=k, data=v.encode() if isinstance(v, str) else v)
c.setopt(CurlOpt.MIMEPOST, multipart._form)
# auth
if auth:
username, password = auth
c.setopt(CurlOpt.USERNAME, username.encode()) # pyright: ignore [reportPossiblyUnboundVariable=none]
c.setopt(CurlOpt.PASSWORD, password.encode()) # pyright: ignore [reportPossiblyUnboundVariable=none]
# timeout
if timeout is None:
timeout = 0 # indefinitely
if isinstance(timeout, tuple):
connect_timeout, read_timeout = timeout
all_timeout = connect_timeout + read_timeout
c.setopt(CurlOpt.CONNECTTIMEOUT_MS, int(connect_timeout * 1000))
if not stream:
c.setopt(CurlOpt.TIMEOUT_MS, int(all_timeout * 1000))
else:
# trick from: https://github.com/lexiforest/curl_cffi/issues/156
c.setopt(CurlOpt.LOW_SPEED_LIMIT, 1)
c.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(all_timeout))
elif isinstance(timeout, (int, float)):
if not stream:
c.setopt(CurlOpt.TIMEOUT_MS, int(timeout * 1000))
else:
c.setopt(CurlOpt.CONNECTTIMEOUT_MS, int(timeout * 1000))
c.setopt(CurlOpt.LOW_SPEED_LIMIT, 1)
c.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(timeout))
# allow_redirects
c.setopt(CurlOpt.FOLLOWLOCATION, int(allow_redirects)) # type: ignore
# max_redirects
c.setopt(CurlOpt.MAXREDIRS, max_redirects)
# proxies
base_proxies, proxies = proxies_list
if proxy and proxies:
raise TypeError("Cannot specify both 'proxy' and 'proxies'")
if proxy:
proxies = {"all": proxy}
if proxies is None:
proxies = base_proxies
if proxies:
# Turn on proxy_credential_no_reuse, which has the following benefits:
# 1. New connection will be made when proxy username changed
# 2. New TLS session will be created based on proxy address, i.e. when accessing
# the same site with different proxies, TLS session won't leak previous IP.
c.setopt(CurlOpt.PROXY_CREDENTIAL_NO_REUSE, 1)
parts = urlparse(url)
proxy = cast(Optional[str], proxies.get(parts.scheme, proxies.get("all")))
if parts.hostname:
proxy = (
proxies.get( # type: ignore
f"{parts.scheme}://{parts.hostname}",
proxies.get(f"all://{parts.hostname}"),
)
or proxy
)
if proxy is not None:
c.setopt(CurlOpt.PROXY, proxy)
if parts.scheme == "https":
if proxy.startswith("https://"):
warnings.warn(
"Make sure you are using https over https proxy, otherwise, "
"the proxy prefix should be 'http://' not 'https://', "
"see: https://github.com/lexiforest/curl_cffi/issues/6",
CurlCffiWarning,
stacklevel=2,
)
# For https site with http tunnel proxy, tell curl to enable tunneling
if not proxy.startswith("socks"):
c.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
# proxy_auth
if proxy_auth:
username, password = proxy_auth
c.setopt(CurlOpt.PROXYUSERNAME, username.encode())
c.setopt(CurlOpt.PROXYPASSWORD, password.encode())
# verify
base_verify, verify = verify_list
if verify is False or not base_verify and verify is None:
c.setopt(CurlOpt.SSL_VERIFYPEER, 0)
c.setopt(CurlOpt.SSL_VERIFYHOST, 0)
# cert for this single request
if isinstance(verify, str):
c.setopt(CurlOpt.CAINFO, verify)
# cert for the session
if verify in (None, True) and isinstance(base_verify, str):
c.setopt(CurlOpt.CAINFO, base_verify)
# referer
if referer:
c.setopt(CurlOpt.REFERER, referer.encode())
# accept_encoding
if accept_encoding is not None:
c.setopt(CurlOpt.ACCEPT_ENCODING, accept_encoding.encode())
# cert
if cert:
if isinstance(cert, str):
c.setopt(CurlOpt.SSLCERT, cert)
else:
cert, key = cert
c.setopt(CurlOpt.SSLCERT, cert)
c.setopt(CurlOpt.SSLKEY, key)
# impersonate
if impersonate:
impersonate = normalize_browser_type(impersonate)
ret = c.impersonate(impersonate, default_headers=default_headers) # type: ignore
if ret != 0:
raise ImpersonateError(f"Impersonating {impersonate} is not supported")
# extra_fp options
if extra_fp:
if isinstance(extra_fp, dict):
extra_fp = ExtraFingerprints(**extra_fp)
if impersonate:
warnings.warn(
"Extra fingerprints was altered after impersonated version was set.",
CurlCffiWarning,
stacklevel=1,
)
set_extra_fp(c, extra_fp)
# ja3 string
if ja3:
if impersonate:
warnings.warn(
"JA3 fingerprint was altered after impersonated version was set.",
CurlCffiWarning,
stacklevel=1,
)
permute = False
if isinstance(extra_fp, ExtraFingerprints) and extra_fp.tls_permute_extensions:
permute = True
if isinstance(extra_fp, dict) and extra_fp.get("tls_permute_extensions"):
permute = True
set_ja3_options(c, ja3, permute=permute)
# akamai string
if akamai:
if impersonate:
warnings.warn(
"Akamai fingerprint was altered after impersonated version was set.",
CurlCffiWarning,
stacklevel=1,
)
set_akamai_options(c, akamai)
# http_version, after impersonate, which will change this to http2
if http_version:
http_version = normalize_http_version(http_version)
c.setopt(CurlOpt.HTTP_VERSION, http_version)
buffer = None
q = None
header_recved = None
quit_now = None
if stream:
q = queue_class()
header_recved = event_class()
quit_now = event_class()
def qput(chunk):
if not header_recved.is_set():
header_recved.set()
if quit_now.is_set():
return CURL_WRITEFUNC_ERROR
q.put_nowait(chunk)
return len(chunk)
c.setopt(CurlOpt.WRITEFUNCTION, qput)
elif content_callback is not None:
c.setopt(CurlOpt.WRITEFUNCTION, content_callback)
else:
buffer = BytesIO()
c.setopt(CurlOpt.WRITEDATA, buffer)
header_buffer = BytesIO()
c.setopt(CurlOpt.HEADERDATA, header_buffer)
# interface
if interface:
c.setopt(CurlOpt.INTERFACE, interface.encode())
# max_recv_speed
# do not check, since 0 is a valid value to disable it
c.setopt(CurlOpt.MAX_RECV_SPEED_LARGE, max_recv_speed)
# set extra options, after all others, because it will alter some options
if curl_options:
for option, setting in curl_options.items():
c.setopt(option, setting)
return req, buffer, header_buffer, q, header_recved, quit_now
@@ -0,0 +1,839 @@
from __future__ import annotations
import asyncio
import struct
from enum import IntEnum
from functools import partial
from json import dumps, loads
from select import select
from typing import (
TYPE_CHECKING,
Any,
Callable,
Literal,
Optional,
TypeVar,
Union,
)
import warnings
from curl_cffi.utils import CurlCffiWarning
from ..aio import CURL_SOCKET_BAD, get_selector
from ..const import CurlECode, CurlInfo, CurlOpt, CurlWsFlag
from ..curl import Curl, CurlError
from .exceptions import SessionClosed, Timeout
from .utils import not_set, set_curl_options
if TYPE_CHECKING:
from typing_extensions import Self
from ..const import CurlHttpVersion
from ..curl import CurlWsFrame
from .cookies import CookieTypes
from .headers import HeaderTypes
from .impersonate import BrowserTypeLiteral, ExtraFingerprints, ExtraFpDict
from .session import AsyncSession, ProxySpec
T = TypeVar("T")
ON_DATA_T = Callable[["WebSocket", bytes, CurlWsFrame], None]
ON_MESSAGE_T = Callable[["WebSocket", Union[bytes, str]], None]
ON_ERROR_T = Callable[["WebSocket", CurlError], None]
ON_OPEN_T = Callable[["WebSocket"], None]
ON_CLOSE_T = Callable[["WebSocket", int, str], None]
# We need a partial for dumps() because a custom function may not accept the parameter
dumps = partial(dumps, separators=(",", ":"))
class WsCloseCode(IntEnum):
"""See: https://www.iana.org/assignments/websocket/websocket.xhtml"""
OK = 1000
GOING_AWAY = 1001
PROTOCOL_ERROR = 1002
UNSUPPORTED_DATA = 1003
UNKNOWN = 1005
ABNORMAL_CLOSURE = 1006
INVALID_DATA = 1007
POLICY_VIOLATION = 1008
MESSAGE_TOO_BIG = 1009
MANDATORY_EXTENSION = 1010
INTERNAL_ERROR = 1011
SERVICE_RESTART = 1012
TRY_AGAIN_LATER = 1013
BAD_GATEWAY = 1014
TLS_HANDSHAKE = 1015
UNAUTHORIZED = 3000
FORBIDDEN = 3003
TIMEOUT = 3008
class WebSocketError(CurlError):
"""WebSocket-specific error."""
def __init__(
self, message: str, code: Union[WsCloseCode, CurlECode, Literal[0]] = 0
):
super().__init__(message, code) # type: ignore
class WebSocketClosed(WebSocketError, SessionClosed):
"""WebSocket is already closed."""
class WebSocketTimeout(WebSocketError, Timeout):
"""WebSocket operation timed out."""
async def aselect(
fd,
mode: Literal["read", "write"] = "read",
*,
loop: asyncio.AbstractEventLoop,
timeout: Optional[float] = None,
) -> bool:
future = loop.create_future()
if mode == "read":
loop.add_reader(fd, future.set_result, None)
future.add_done_callback(lambda _: loop.remove_reader(fd))
elif mode == "write":
loop.add_writer(fd, future.set_result, None)
future.add_done_callback(lambda _: loop.remove_writer(fd))
else:
raise ValueError(f"Invalid mode: {mode}. Must be 'read' or 'write'")
try:
await asyncio.wait_for(future, timeout)
except asyncio.TimeoutError:
return False
return True
class BaseWebSocket:
def __init__(self, curl: Curl, *, autoclose: bool = True, debug: bool = False):
self._curl: Curl = curl
self.autoclose: bool = autoclose
self._close_code: Optional[int] = None
self._close_reason: Optional[str] = None
self.debug = debug
self.closed = False
@property
def curl(self):
if self._curl is not_set:
self._curl = Curl(debug=self.debug)
return self._curl
@property
def close_code(self) -> Optional[int]:
"""The WebSocket close code, if the connection has been closed."""
return self._close_code
@property
def close_reason(self) -> Optional[str]:
"""The WebSocket close reason, if the connection has been closed."""
return self._close_reason
@staticmethod
def _pack_close_frame(code: int, reason: bytes) -> bytes:
return struct.pack("!H", code) + reason
@staticmethod
def _unpack_close_frame(frame: bytes) -> tuple[int, str]:
if len(frame) < 2:
code = WsCloseCode.UNKNOWN
reason = ""
else:
try:
code = struct.unpack_from("!H", frame)[0]
reason = frame[2:].decode()
except UnicodeDecodeError as e:
raise WebSocketError(
"Invalid close message", WsCloseCode.INVALID_DATA
) from e
except Exception as e:
raise WebSocketError(
"Invalid close frame", WsCloseCode.PROTOCOL_ERROR
) from e
else:
if (
code not in WsCloseCode._value2member_map_
or code == WsCloseCode.UNKNOWN
):
raise WebSocketError(
f"Invalid close code: {code}", WsCloseCode.PROTOCOL_ERROR
)
return code, reason
def terminate(self):
"""Terminate the underlying connection."""
self.closed = True
self.curl.close()
EventTypeLiteral = Literal["open", "close", "data", "message", "error"]
class WebSocket(BaseWebSocket):
"""A WebSocket implementation using libcurl."""
def __init__(
self,
curl: Union[Curl, Any] = not_set,
*,
autoclose: bool = True,
skip_utf8_validation: bool = False,
debug: bool = False,
on_open: Optional[ON_OPEN_T] = None,
on_close: Optional[ON_CLOSE_T] = None,
on_data: Optional[ON_DATA_T] = None,
on_message: Optional[ON_MESSAGE_T] = None,
on_error: Optional[ON_ERROR_T] = None,
):
"""
Args:
autoclose: whether to close the WebSocket after receiving a close frame.
skip_utf8_validation: whether to skip UTF-8 validation for text frames in
run_forever().
debug: print extra curl debug info.
on_open: open callback, ``def on_open(ws)``
on_close: close callback, ``def on_close(ws, code, reason)``
on_data: raw data receive callback, ``def on_data(ws, data, frame)``
on_message: message receive callback, ``def on_message(ws, message)``
on_error: error callback, ``def on_error(ws, exception)``
"""
super().__init__(curl=curl, autoclose=autoclose, debug=debug)
self.skip_utf8_validation = skip_utf8_validation
self._emitters: dict[EventTypeLiteral, Callable] = {}
if on_open:
self._emitters["open"] = on_open
if on_close:
self._emitters["close"] = on_close
if on_data:
self._emitters["data"] = on_data
if on_message:
self._emitters["message"] = on_message
if on_error:
self._emitters["error"] = on_error
def __iter__(self) -> WebSocket:
if self.closed:
raise WebSocketClosed("WebSocket is closed")
return self
def __next__(self) -> bytes:
msg, flags = self.recv()
if flags & CurlWsFlag.CLOSE:
raise StopIteration
return msg
def _emit(self, event_type: EventTypeLiteral, *args) -> None:
callback = self._emitters.get(event_type)
if callback:
try:
callback(self, *args)
except Exception as e:
error_callback = self._emitters.get("error")
if error_callback:
error_callback(self, e)
else:
warnings.warn(
f"WebSocket callback '{event_type}' failed",
CurlCffiWarning,
stacklevel=2,
)
def connect(
self,
url: str,
params: Optional[Union[dict, list, tuple]] = None,
headers: Optional[HeaderTypes] = None,
cookies: Optional[CookieTypes] = None,
auth: Optional[tuple[str, str]] = None,
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
allow_redirects: bool = True,
max_redirects: int = 30,
proxies: Optional[ProxySpec] = None,
proxy: Optional[str] = None,
proxy_auth: Optional[tuple[str, str]] = None,
verify: Optional[bool] = None,
referer: Optional[str] = None,
accept_encoding: Optional[str] = "gzip, deflate, br",
impersonate: Optional[BrowserTypeLiteral] = None,
ja3: Optional[str] = None,
akamai: Optional[str] = None,
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
default_headers: bool = True,
quote: Union[str, Literal[False]] = "",
http_version: Optional[CurlHttpVersion] = None,
interface: Optional[str] = None,
cert: Optional[Union[str, tuple[str, str]]] = None,
max_recv_speed: int = 0,
curl_options: Optional[dict[CurlOpt, str]] = None,
):
"""Connect to the WebSocket.
libcurl automatically handles pings and pongs.
ref: https://curl.se/libcurl/c/libcurl-ws.html
Args:
url: url for the requests.
params: query string for the requests.
headers: headers to send.
cookies: cookies to use.
auth: HTTP basic auth, a tuple of (username, password), only basic auth is
supported.
timeout: how many seconds to wait before giving up.
allow_redirects: whether to allow redirection.
max_redirects: max redirect counts, default 30, use -1 for unlimited.
proxies: dict of proxies to use, prefer to use ``proxy`` if they are the
same. format: ``{"http": proxy_url, "https": proxy_url}``.
proxy: proxy to use, format: "http://user@pass:proxy_url".
Can't be used with `proxies` parameter.
proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
verify: whether to verify https certs.
referer: shortcut for setting referer header.
accept_encoding: shortcut for setting accept-encoding header.
impersonate: which browser version to impersonate.
ja3: ja3 string to impersonate.
akamai: akamai string to impersonate.
extra_fp: extra fingerprints options, in complement to ja3 and akamai str.
default_headers: whether to set default browser headers.
default_encoding: encoding for decoding response content if charset is not
found in headers. Defaults to "utf-8". Can be set to a callable for
automatic detection.
quote: Set characters to be quoted, i.e. percent-encoded. Default safe
string is ``!#$%&'()*+,/:;=?@[]~``. If set to a sting, the character
will be removed from the safe string, thus quoted. If set to False, the
url will be kept as is, without any automatic percent-encoding, you must
encode the URL yourself.
curl_options: extra curl options to use.
http_version: limiting http version, defaults to http2.
interface: which interface to use.
cert: a tuple of (cert, key) filenames for client cert.
max_recv_speed: maximum receive speed, bytes per second.
curl_options: extra curl options to use.
"""
curl = self.curl
set_curl_options(
curl=curl,
method="GET",
url=url,
params_list=[None, params],
headers_list=[None, headers],
cookies_list=[None, cookies],
auth=auth,
timeout=timeout,
allow_redirects=allow_redirects,
max_redirects=max_redirects,
proxies_list=[None, proxies],
proxy=proxy,
proxy_auth=proxy_auth,
verify_list=[None, verify],
referer=referer,
accept_encoding=accept_encoding,
impersonate=impersonate,
ja3=ja3,
akamai=akamai,
extra_fp=extra_fp,
default_headers=default_headers,
quote=quote,
http_version=http_version,
interface=interface,
max_recv_speed=max_recv_speed,
cert=cert,
curl_options=curl_options,
)
# Magic number defined in: https://curl.se/docs/websocket.html
curl.setopt(CurlOpt.CONNECT_ONLY, 2)
curl.perform()
return self
def recv_fragment(self) -> tuple[bytes, CurlWsFrame]:
"""Receive a single curl websocket fragment as bytes."""
if self.closed:
raise WebSocketClosed("WebSocket is already closed")
chunk, frame = self.curl.ws_recv()
if frame.flags & CurlWsFlag.CLOSE:
try:
self._close_code, self._close_reason = self._unpack_close_frame(chunk)
except WebSocketError as e:
# Follow the spec to close the connection
# Errors do not respect autoclose
self._close_code = e.code
self.close(e.code)
raise
if self.autoclose:
self.close()
return chunk, frame
def recv(self) -> tuple[bytes, int]:
"""
Receive a frame as bytes. libcurl splits frames into fragments, so we have to
collect all the chunks for a frame.
"""
chunks = []
flags = 0
sock_fd = self.curl.getinfo(CurlInfo.ACTIVESOCKET)
if sock_fd == CURL_SOCKET_BAD:
raise WebSocketError(
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
)
while True:
try:
# Try to receive the first fragment first
chunk, frame = self.recv_fragment()
flags = frame.flags
chunks.append(chunk)
if frame.bytesleft == 0 and flags & CurlWsFlag.CONT == 0:
break
except CurlError as e:
if e.code == CurlECode.AGAIN:
# According to https://curl.se/libcurl/c/curl_ws_recv.html
# > in real application: wait for socket here, e.g. using select()
_, _, _ = select([sock_fd], [], [], 0.5)
else:
raise
return b"".join(chunks), flags
def recv_str(self) -> str:
"""Receive a text frame."""
data, flags = self.recv()
if not (flags & CurlWsFlag.TEXT):
raise WebSocketError("Not valid text frame", WsCloseCode.INVALID_DATA)
return data.decode()
def recv_json(self, *, loads: Callable[[str], T] = loads) -> T:
"""Receive a JSON frame.
Args:
loads: JSON decoder, default is json.loads.
"""
data = self.recv_str()
return loads(data)
def send(self, payload: Union[str, bytes], flags: CurlWsFlag = CurlWsFlag.BINARY):
"""Send a data frame.
Args:
payload: data to send.
flags: flags for the frame.
"""
if flags & CurlWsFlag.CLOSE:
self.keep_running = False
if self.closed:
raise WebSocketClosed("WebSocket is already closed")
# curl expects bytes
if isinstance(payload, str):
payload = payload.encode()
sock_fd = self.curl.getinfo(CurlInfo.ACTIVESOCKET)
if sock_fd == CURL_SOCKET_BAD:
raise WebSocketError(
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
)
# Loop checks for CurlECode.Again
# https://curl.se/libcurl/c/curl_ws_send.html
offset = 0
while offset < len(payload):
current_buffer = payload[offset:]
try:
n_sent = self.curl.ws_send(current_buffer, flags)
except CurlError as e:
if e.code == CurlECode.AGAIN:
_, writeable, _ = select([], [sock_fd], [], 0.5)
if not writeable:
raise WebSocketError("Socket write timeout") from e
continue
raise
offset += n_sent
return offset
def send_binary(self, payload: bytes):
"""Send a binary frame.
Args:
payload: binary data to send.
"""
return self.send(payload, CurlWsFlag.BINARY)
def send_bytes(self, payload: bytes):
"""Send a binary frame, alias of :meth:`send_binary`.
Args:
payload: binary data to send.
"""
return self.send(payload, CurlWsFlag.BINARY)
def send_str(self, payload: str):
"""Send a text frame.
Args:
payload: text data to send.
"""
return self.send(payload, CurlWsFlag.TEXT)
def send_json(self, payload: Any, *, dumps: Callable[[Any], str] = dumps):
"""Send a JSON frame.
Args:
payload: data to send.
dumps: JSON encoder, default is json.dumps.
"""
return self.send_str(dumps(payload))
def ping(self, payload: Union[str, bytes]):
"""Send a ping frame.
Args:
payload: data to send.
"""
return self.send(payload, CurlWsFlag.PING)
def run_forever(self, url: str = "", **kwargs):
"""Run the WebSocket forever. See :meth:`connect` for details on parameters.
libcurl automatically handles pings and pongs.
ref: https://curl.se/libcurl/c/libcurl-ws.html
"""
if url:
self.connect(url, **kwargs)
sock_fd = self.curl.getinfo(CurlInfo.ACTIVESOCKET)
if sock_fd == CURL_SOCKET_BAD:
raise WebSocketError(
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
)
self._emit("open")
# Keep reading the messages and invoke callbacks
# TODO: Reconnect logic
chunks = []
self.keep_running = True
while self.keep_running:
try:
chunk, frame = self.recv_fragment()
flags = frame.flags
self._emit("data", chunk, frame)
chunks.append(chunk)
if not (frame.bytesleft == 0 and flags & CurlWsFlag.CONT == 0):
continue
# Avoid unnecessary computation
if "message" in self._emitters:
# Concatenate collected chunks with the final message
msg = b"".join(chunks)
if (flags & CurlWsFlag.TEXT) and not self.skip_utf8_validation:
try:
msg = msg.decode() # type: ignore
except UnicodeDecodeError as e:
self._close_code = WsCloseCode.INVALID_DATA
self.close(WsCloseCode.INVALID_DATA)
raise WebSocketError(
"Invalid UTF-8", WsCloseCode.INVALID_DATA
) from e
if (flags & CurlWsFlag.BINARY) or (flags & CurlWsFlag.TEXT):
self._emit("message", msg)
chunks = [] # Reset chunks for next message
if flags & CurlWsFlag.CLOSE:
self.keep_running = False
self._emit("close", self._close_code or 0, self._close_reason or "")
except CurlError as e:
if e.code == CurlECode.AGAIN:
_, _, _ = select([sock_fd], [], [], 0.5)
else:
self._emit("error", e)
if not self.closed:
code = WsCloseCode.UNKNOWN
if isinstance(e, WebSocketError):
code = e.code
self.close(code)
raise
def close(self, code: int = WsCloseCode.OK, message: bytes = b""):
"""Close the connection.
Args:
code: close code.
message: close reason.
"""
if self.curl is not_set:
return
# TODO: As per spec, we should wait for the server to close the connection
# But this is not a requirement
msg = self._pack_close_frame(code, message)
self.send(msg, CurlWsFlag.CLOSE)
# The only way to close the connection appears to be curl_easy_cleanup
self.terminate()
class AsyncWebSocket(BaseWebSocket):
"""An async WebSocket implementation using libcurl."""
def __init__(
self,
session: AsyncSession,
curl: Curl,
*,
autoclose: bool = True,
debug: bool = False,
):
super().__init__(curl=curl, autoclose=autoclose, debug=debug)
self.session = session
self._loop: Optional[asyncio.AbstractEventLoop] = None
self._recv_lock = asyncio.Lock()
self._send_lock = asyncio.Lock()
@property
def loop(self):
if self._loop is None:
self._loop = get_selector(asyncio.get_running_loop())
return self._loop
def __aiter__(self) -> Self:
if self.closed:
raise WebSocketClosed("WebSocket has been closed")
return self
async def __anext__(self) -> bytes:
msg, flags = await self.recv()
if flags & CurlWsFlag.CLOSE:
raise StopAsyncIteration
return msg
async def recv_fragment(
self, *, timeout: Optional[float] = None
) -> tuple[bytes, CurlWsFrame]:
"""Receive a single frame as bytes.
Args:
timeout: how many seconds to wait before giving up.
"""
if self.closed:
raise WebSocketClosed("WebSocket is closed")
if self._recv_lock.locked():
raise TypeError("Concurrent call to recv_fragment() is not allowed")
async with self._recv_lock:
try:
chunk, frame = await asyncio.wait_for(
self.loop.run_in_executor(None, self.curl.ws_recv), timeout
)
except asyncio.TimeoutError as e:
raise WebSocketTimeout("WebSocket recv_fragment() timed out") from e
if frame.flags & CurlWsFlag.CLOSE:
try:
code, message = self._close_code, self._close_reason = (
self._unpack_close_frame(chunk)
)
except WebSocketError as e:
# Follow the spec to close the connection
# Errors do not respect autoclose
self._close_code = e.code
await self.close(e.code)
raise
if self.autoclose:
await self.close(code, message.encode())
return chunk, frame
async def recv(self, *, timeout: Optional[float] = None) -> tuple[bytes, int]:
"""
Receive a frame as bytes. libcurl splits frames into fragments, so we have to
collect all the chunks for a frame.
Args:
timeout: how many seconds to wait before giving up.
"""
loop = self.loop
chunks = []
flags = 0
sock_fd = await loop.run_in_executor(
None, self.curl.getinfo, CurlInfo.ACTIVESOCKET
)
if sock_fd == CURL_SOCKET_BAD:
raise WebSocketError(
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
)
while True:
try:
chunk, frame = await self.recv_fragment(timeout=timeout)
flags = frame.flags
chunks.append(chunk)
if frame.bytesleft == 0 and flags & CurlWsFlag.CONT == 0:
break
except CurlError as e:
if e.code == CurlECode.AGAIN:
await aselect(sock_fd, loop=loop, timeout=timeout)
else:
raise
return b"".join(chunks), flags
async def recv_str(self, *, timeout: Optional[float] = None) -> str:
"""Receive a text frame.
Args:
timeout: how many seconds to wait before giving up.
"""
data, flags = await self.recv(timeout=timeout)
if not (flags & CurlWsFlag.TEXT):
raise WebSocketError("Invalid UTF-8", WsCloseCode.INVALID_DATA)
return data.decode()
async def recv_json(
self, *, loads: Callable[[str], T] = loads, timeout: Optional[float] = None
) -> T:
"""Receive a JSON frame.
Args:
loads: JSON decoder, default is json.loads.
timeout: how many seconds to wait before giving up.
"""
data = await self.recv_str(timeout=timeout)
return loads(data)
async def send(
self, payload: Union[str, bytes], flags: CurlWsFlag = CurlWsFlag.BINARY
):
"""Send a data frame.
Args:
payload: data to send.
flags: flags for the frame.
"""
if self.closed:
raise WebSocketClosed("WebSocket is closed")
# curl expects bytes
if isinstance(payload, str):
payload = payload.encode()
sock_fd = await self.loop.run_in_executor(
None, self.curl.getinfo, CurlInfo.ACTIVESOCKET
)
if sock_fd == CURL_SOCKET_BAD:
raise WebSocketError(
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
)
# TODO: Why does concurrently sending fail
async with self._send_lock:
offset = 0
# Loop checks for CurlECode.Again
# https://curl.se/libcurl/c/curl_ws_send.html
while offset < len(payload):
current_buffer = payload[offset:]
try:
n_sent = await self.loop.run_in_executor(
None, self.curl.ws_send, current_buffer, flags
)
except CurlError as e:
if e.code == CurlECode.AGAIN:
writeable = await aselect(
sock_fd, mode="write", loop=self.loop, timeout=0.5
)
if not writeable:
raise WebSocketError("Socket write timeout") from e
continue
raise
offset += n_sent
return offset
async def send_binary(self, payload: bytes):
"""Send a binary frame.
Args:
payload: binary data to send.
"""
return await self.send(payload, CurlWsFlag.BINARY)
async def send_bytes(self, payload: bytes):
"""Send a binary frame, alias of :meth:`send_binary`.
Args:
payload: binary data to send.
"""
return await self.send(payload, CurlWsFlag.BINARY)
async def send_str(self, payload: str):
"""Send a text frame.
Args:
payload: text data to send.
"""
return await self.send(payload, CurlWsFlag.TEXT)
async def send_json(self, payload: Any, *, dumps: Callable[[Any], str] = dumps):
"""Send a JSON frame.
Args:
payload: data to send.
dumps: JSON encoder, default is json.dumps.
"""
return await self.send_str(dumps(payload))
async def ping(self, payload: Union[str, bytes]):
"""Send a ping frame.
Args:
payload: data to send.
"""
return await self.send(payload, CurlWsFlag.PING)
async def close(self, code: int = WsCloseCode.OK, message: bytes = b""):
"""Close the connection.
Args:
code: close code.
message: close reason.
"""
# TODO: As per spec, we should wait for the server to close the connection
# But this is not a requirement
msg = self._pack_close_frame(code, message)
await self.send(msg, CurlWsFlag.CLOSE)
# The only way to close the connection appears to be curl_easy_cleanup
self.terminate()
def terminate(self):
"""Terminate the underlying connection."""
super().terminate()
if not self.session._closed:
# WebSocket curls CANNOT be reused
self.session.push_curl(None)
@@ -0,0 +1,16 @@
import warnings
class CurlCffiWarning(UserWarning, RuntimeWarning):
pass
def config_warnings(on: bool = False):
if on:
warnings.simplefilter("default", category=CurlCffiWarning)
else:
warnings.simplefilter("ignore", category=CurlCffiWarning)
def is_pro():
return False