Import python venv for stability
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
__all__ = [
|
||||
"Curl",
|
||||
"AsyncCurl",
|
||||
"CurlMime",
|
||||
"CurlError",
|
||||
"CurlInfo",
|
||||
"CurlOpt",
|
||||
"CurlMOpt",
|
||||
"CurlECode",
|
||||
"CurlHttpVersion",
|
||||
"CurlSslVersion",
|
||||
"CurlWsFlag",
|
||||
"config_warnings",
|
||||
"ffi",
|
||||
"is_pro",
|
||||
"lib",
|
||||
"Session",
|
||||
"AsyncSession",
|
||||
"BrowserType",
|
||||
"BrowserTypeLiteral",
|
||||
"request",
|
||||
"head",
|
||||
"get",
|
||||
"post",
|
||||
"put",
|
||||
"patch",
|
||||
"delete",
|
||||
"options",
|
||||
"Cookies",
|
||||
"Headers",
|
||||
"Request",
|
||||
"Response",
|
||||
"AsyncWebSocket",
|
||||
"WebSocket",
|
||||
"WebSocketError",
|
||||
"WebSocketClosed",
|
||||
"WebSocketTimeout",
|
||||
"WsCloseCode",
|
||||
"ExtraFingerprints",
|
||||
"CookieTypes",
|
||||
"HeaderTypes",
|
||||
"ProxySpec",
|
||||
"exceptions",
|
||||
]
|
||||
|
||||
import _cffi_backend # noqa: F401 # required by _wrapper
|
||||
|
||||
from .__version__ import __curl_version__, __description__, __title__, __version__ # noqa: F401
|
||||
|
||||
# This line includes _wrapper.so into the wheel
|
||||
from ._wrapper import ffi, lib
|
||||
from .aio import AsyncCurl
|
||||
from .const import (
|
||||
CurlECode,
|
||||
CurlHttpVersion,
|
||||
CurlInfo,
|
||||
CurlMOpt,
|
||||
CurlOpt,
|
||||
CurlSslVersion,
|
||||
CurlWsFlag,
|
||||
)
|
||||
from .curl import Curl, CurlError, CurlMime
|
||||
|
||||
from .requests import (
|
||||
AsyncSession,
|
||||
AsyncWebSocket,
|
||||
BrowserType,
|
||||
BrowserTypeLiteral,
|
||||
Cookies,
|
||||
CookieTypes,
|
||||
ExtraFingerprints,
|
||||
Headers,
|
||||
HeaderTypes,
|
||||
ProxySpec,
|
||||
Request,
|
||||
Response,
|
||||
Session,
|
||||
WebSocket,
|
||||
WebSocketClosed,
|
||||
WebSocketError,
|
||||
WebSocketTimeout,
|
||||
WsCloseCode,
|
||||
delete,
|
||||
exceptions,
|
||||
get,
|
||||
head,
|
||||
options,
|
||||
patch,
|
||||
post,
|
||||
put,
|
||||
request,
|
||||
)
|
||||
|
||||
from .utils import config_warnings, is_pro
|
||||
|
||||
config_warnings(on=False)
|
||||
@@ -0,0 +1,8 @@
|
||||
from importlib import metadata
|
||||
|
||||
from .curl import Curl
|
||||
|
||||
__title__ = "curl_cffi"
|
||||
__description__ = metadata.metadata("curl_cffi")["Summary"]
|
||||
__version__ = metadata.version("curl_cffi")
|
||||
__curl_version__ = Curl().version().decode()
|
||||
+344
@@ -0,0 +1,344 @@
|
||||
"""Ensure asyncio selector methods (add_reader, etc.) are available
|
||||
tornado 6.1 adds AddThreadSelectorEventLoop event loop,
|
||||
running select in a thread and defining these methods on the running event loop.
|
||||
This factors out the functionality of AddThreadSelectorEventLoop
|
||||
into a standalone SelectorThread object which can be attached to any running event loop.
|
||||
Vendored from tornado v6.4.0
|
||||
Redistributed under license Apache-2.0
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import atexit
|
||||
import errno
|
||||
import functools
|
||||
import select
|
||||
import socket
|
||||
import threading
|
||||
import typing
|
||||
from contextlib import suppress
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Optional,
|
||||
Protocol,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
|
||||
_T = TypeVar("_T")
|
||||
|
||||
|
||||
class _HasFileno(Protocol):
|
||||
def fileno(self) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
_FileDescriptorLike = Union[int, _HasFileno]
|
||||
|
||||
|
||||
# Collection of selector thread event loops to shut down on exit.
|
||||
_selector_loops: set["SelectorThread"] = set()
|
||||
|
||||
|
||||
def _atexit_callback() -> None:
|
||||
for loop in _selector_loops:
|
||||
with loop._select_cond:
|
||||
loop._closing_selector = True
|
||||
loop._select_cond.notify()
|
||||
with suppress(BlockingIOError):
|
||||
loop._waker_w.send(b"a")
|
||||
if loop._thread is not None:
|
||||
# If we don't join our (daemon) thread here, we may get a deadlock
|
||||
# during interpreter shutdown. I don't really understand why. This
|
||||
# deadlock happens every time in CI (both travis and appveyor) but
|
||||
# I've never been able to reproduce locally.
|
||||
loop._thread.join()
|
||||
_selector_loops.clear()
|
||||
|
||||
|
||||
atexit.register(_atexit_callback)
|
||||
|
||||
|
||||
class SelectorThread:
|
||||
"""Define ``add_reader`` methods to be called in a background select thread.
|
||||
|
||||
Instances of this class start a second thread to run a selector.
|
||||
This thread is completely hidden from the user;
|
||||
all callbacks are run on the wrapped event loop's thread.
|
||||
|
||||
Typically used via ``AddThreadSelectorEventLoop``,
|
||||
but can be attached to a running asyncio loop.
|
||||
"""
|
||||
|
||||
_closed = False
|
||||
|
||||
def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None:
|
||||
self._real_loop = real_loop
|
||||
|
||||
self._select_cond = threading.Condition()
|
||||
self._select_args: Optional[
|
||||
tuple[list[_FileDescriptorLike], list[_FileDescriptorLike]]
|
||||
] = None
|
||||
self._closing_selector = False
|
||||
self._thread: Optional[threading.Thread] = None
|
||||
self._thread_manager_handle = self._thread_manager()
|
||||
|
||||
async def thread_manager_anext() -> None:
|
||||
# the anext builtin wasn't added until 3.10. We just need to iterate
|
||||
# this generator one step.
|
||||
await self._thread_manager_handle.__anext__()
|
||||
|
||||
# When the loop starts, start the thread. Not too soon because we can't
|
||||
# clean up if we get to this point but the event loop is closed without
|
||||
# starting.
|
||||
self._real_loop.call_soon(
|
||||
lambda: self._real_loop.create_task(thread_manager_anext())
|
||||
)
|
||||
|
||||
self._readers: dict[_FileDescriptorLike, Callable] = {}
|
||||
self._writers: dict[_FileDescriptorLike, Callable] = {}
|
||||
|
||||
# Writing to _waker_w will wake up the selector thread, which
|
||||
# watches for _waker_r to be readable.
|
||||
self._waker_r, self._waker_w = socket.socketpair()
|
||||
self._waker_r.setblocking(False)
|
||||
self._waker_w.setblocking(False)
|
||||
_selector_loops.add(self)
|
||||
self.add_reader(self._waker_r, self._consume_waker)
|
||||
|
||||
def close(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
with self._select_cond:
|
||||
self._closing_selector = True
|
||||
self._select_cond.notify()
|
||||
self._wake_selector()
|
||||
if self._thread is not None:
|
||||
self._thread.join()
|
||||
_selector_loops.discard(self)
|
||||
self.remove_reader(self._waker_r)
|
||||
self._waker_r.close()
|
||||
self._waker_w.close()
|
||||
self._closed = True
|
||||
|
||||
async def _thread_manager(self) -> typing.AsyncGenerator[None, None]:
|
||||
# Create a thread to run the select system call. We manage this thread
|
||||
# manually so we can trigger a clean shutdown from an atexit hook. Note
|
||||
# that due to the order of operations at shutdown, only daemon threads
|
||||
# can be shut down in this way (non-daemon threads would require the
|
||||
# introduction of a new hook: https://bugs.python.org/issue41962)
|
||||
self._thread = threading.Thread(
|
||||
name="Tornado selector",
|
||||
daemon=True,
|
||||
target=self._run_select,
|
||||
)
|
||||
self._thread.start()
|
||||
self._start_select()
|
||||
try:
|
||||
# The presense of this yield statement means that this coroutine
|
||||
# is actually an asynchronous generator, which has a special
|
||||
# shutdown protocol. We wait at this yield point until the
|
||||
# event loop's shutdown_asyncgens method is called, at which point
|
||||
# we will get a GeneratorExit exception and can shut down the
|
||||
# selector thread.
|
||||
yield
|
||||
except GeneratorExit:
|
||||
self.close()
|
||||
raise
|
||||
|
||||
def _wake_selector(self) -> None:
|
||||
if self._closed:
|
||||
return
|
||||
with suppress(BlockingIOError):
|
||||
self._waker_w.send(b"a")
|
||||
|
||||
def _consume_waker(self) -> None:
|
||||
with suppress(BlockingIOError):
|
||||
self._waker_r.recv(1024)
|
||||
|
||||
def _start_select(self) -> None:
|
||||
# Capture reader and writer sets here in the event loop
|
||||
# thread to avoid any problems with concurrent
|
||||
# modification while the select loop uses them.
|
||||
with self._select_cond:
|
||||
assert self._select_args is None
|
||||
self._select_args = (list(self._readers.keys()), list(self._writers.keys()))
|
||||
self._select_cond.notify()
|
||||
|
||||
def _run_select(self) -> None:
|
||||
while True:
|
||||
with self._select_cond:
|
||||
while self._select_args is None and not self._closing_selector:
|
||||
self._select_cond.wait()
|
||||
if self._closing_selector:
|
||||
return
|
||||
assert self._select_args is not None
|
||||
to_read, to_write = self._select_args
|
||||
self._select_args = None
|
||||
|
||||
# We use the simpler interface of the select module instead of
|
||||
# the more stateful interface in the selectors module because
|
||||
# this class is only intended for use on windows, where
|
||||
# select.select is the only option. The selector interface
|
||||
# does not have well-documented thread-safety semantics that
|
||||
# we can rely on so ensuring proper synchronization would be
|
||||
# tricky.
|
||||
try:
|
||||
# On windows, selecting on a socket for write will not
|
||||
# return the socket when there is an error (but selecting
|
||||
# for reads works). Also select for errors when selecting
|
||||
# for writes, and merge the results.
|
||||
#
|
||||
# This pattern is also used in
|
||||
# https://github.com/python/cpython/blob/v3.8.0/Lib/selectors.py#L312-L317
|
||||
rs, ws, xs = select.select(to_read, to_write, to_write)
|
||||
ws = ws + xs
|
||||
except OSError as e:
|
||||
# After remove_reader or remove_writer is called, the file
|
||||
# descriptor may subsequently be closed on the event loop
|
||||
# thread. It's possible that this select thread hasn't
|
||||
# gotten into the select system call by the time that
|
||||
# happens in which case (at least on macOS), select may
|
||||
# raise a "bad file descriptor" error. If we get that
|
||||
# error, check and see if we're also being woken up by
|
||||
# polling the waker alone. If we are, just return to the
|
||||
# event loop and we'll get the updated set of file
|
||||
# descriptors on the next iteration. Otherwise, raise the
|
||||
# original error.
|
||||
if e.errno == getattr(errno, "WSAENOTSOCK", errno.EBADF):
|
||||
rs, _, _ = select.select([self._waker_r.fileno()], [], [], 0)
|
||||
if rs:
|
||||
ws = []
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
raise
|
||||
|
||||
try:
|
||||
self._real_loop.call_soon_threadsafe(self._handle_select, rs, ws)
|
||||
except RuntimeError:
|
||||
# "Event loop is closed". Swallow the exception for
|
||||
# consistency with PollIOLoop (and logical consistency
|
||||
# with the fact that we can't guarantee that an
|
||||
# add_callback that completes without error will
|
||||
# eventually execute).
|
||||
pass
|
||||
except AttributeError:
|
||||
# ProactorEventLoop may raise this instead of RuntimeError
|
||||
# if call_soon_threadsafe races with a call to close().
|
||||
# Swallow it too for consistency.
|
||||
pass
|
||||
|
||||
def _handle_select(
|
||||
self, rs: list[_FileDescriptorLike], ws: list[_FileDescriptorLike]
|
||||
) -> None:
|
||||
for r in rs:
|
||||
self._handle_event(r, self._readers)
|
||||
for w in ws:
|
||||
self._handle_event(w, self._writers)
|
||||
self._start_select()
|
||||
|
||||
def _handle_event(
|
||||
self,
|
||||
fd: _FileDescriptorLike,
|
||||
cb_map: dict[_FileDescriptorLike, Callable],
|
||||
) -> None:
|
||||
try:
|
||||
callback = cb_map[fd]
|
||||
except KeyError:
|
||||
return
|
||||
callback()
|
||||
|
||||
def add_reader(
|
||||
self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any
|
||||
) -> None:
|
||||
self._readers[fd] = functools.partial(callback, *args)
|
||||
self._wake_selector()
|
||||
|
||||
def add_writer(
|
||||
self, fd: _FileDescriptorLike, callback: Callable[..., None], *args: Any
|
||||
) -> None:
|
||||
self._writers[fd] = functools.partial(callback, *args)
|
||||
self._wake_selector()
|
||||
|
||||
def remove_reader(self, fd: _FileDescriptorLike) -> bool:
|
||||
try:
|
||||
del self._readers[fd]
|
||||
except KeyError:
|
||||
return False
|
||||
self._wake_selector()
|
||||
return True
|
||||
|
||||
def remove_writer(self, fd: _FileDescriptorLike) -> bool:
|
||||
try:
|
||||
del self._writers[fd]
|
||||
except KeyError:
|
||||
return False
|
||||
self._wake_selector()
|
||||
return True
|
||||
|
||||
|
||||
class AddThreadSelectorEventLoop(asyncio.AbstractEventLoop):
|
||||
"""Wrap an event loop to add implementations of the ``add_reader`` method family.
|
||||
|
||||
Instances of this class start a second thread to run a selector.
|
||||
This thread is completely hidden from the user; all callbacks are
|
||||
run on the wrapped event loop's thread.
|
||||
|
||||
This class is used automatically by Tornado; applications should not need
|
||||
to refer to it directly.
|
||||
|
||||
It is safe to wrap any event loop with this class, although it only makes sense
|
||||
for event loops that do not implement the ``add_reader`` family of methods
|
||||
themselves (i.e. ``WindowsProactorEventLoop``)
|
||||
|
||||
Closing the ``AddThreadSelectorEventLoop`` also closes the wrapped event loop.
|
||||
|
||||
"""
|
||||
|
||||
# This class is a __getattribute__-based proxy. All attributes other than those
|
||||
# in this set are proxied through to the underlying loop.
|
||||
MY_ATTRIBUTES = {
|
||||
"_real_loop",
|
||||
"_selector",
|
||||
"add_reader",
|
||||
"add_writer",
|
||||
"close",
|
||||
"remove_reader",
|
||||
"remove_writer",
|
||||
}
|
||||
|
||||
def __getattribute__(self, name: str) -> Any:
|
||||
if name in AddThreadSelectorEventLoop.MY_ATTRIBUTES:
|
||||
return super().__getattribute__(name)
|
||||
return getattr(self._real_loop, name)
|
||||
|
||||
def __init__(self, real_loop: asyncio.AbstractEventLoop) -> None:
|
||||
self._real_loop = real_loop
|
||||
self._selector = SelectorThread(real_loop)
|
||||
|
||||
def close(self) -> None:
|
||||
self._selector.close()
|
||||
self._real_loop.close()
|
||||
|
||||
def add_reader( # type: ignore
|
||||
self,
|
||||
fd: "_FileDescriptorLike",
|
||||
callback: Callable[..., None],
|
||||
*args: Any,
|
||||
) -> None:
|
||||
return self._selector.add_reader(fd, callback, *args)
|
||||
|
||||
def add_writer( # type: ignore
|
||||
self,
|
||||
fd: "_FileDescriptorLike",
|
||||
callback: Callable[..., None],
|
||||
*args: Any, # type: ignore
|
||||
) -> None:
|
||||
return self._selector.add_writer(fd, callback, *args)
|
||||
|
||||
def remove_reader(self, fd: "_FileDescriptorLike") -> bool:
|
||||
return self._selector.remove_reader(fd)
|
||||
|
||||
def remove_writer(self, fd: "_FileDescriptorLike") -> bool:
|
||||
return self._selector.remove_writer(fd)
|
||||
BIN
Binary file not shown.
@@ -0,0 +1,343 @@
|
||||
import asyncio
|
||||
import sys
|
||||
import warnings
|
||||
from contextlib import suppress
|
||||
from typing import Any, Optional
|
||||
from weakref import WeakKeyDictionary
|
||||
|
||||
from ._wrapper import ffi, lib
|
||||
from .const import CurlECode, CurlMOpt
|
||||
from .curl import DEFAULT_CACERT, Curl, CurlError
|
||||
from .utils import CurlCffiWarning
|
||||
|
||||
__all__ = ["AsyncCurl"]
|
||||
|
||||
if sys.platform == "win32":
|
||||
# registry of asyncio loop : selector thread
|
||||
_selectors: WeakKeyDictionary = WeakKeyDictionary()
|
||||
PROACTOR_WARNING = """
|
||||
Proactor event loop does not implement add_reader family of methods required.
|
||||
Registering an additional selector thread for add_reader support.
|
||||
To avoid this warning use:
|
||||
asyncio.set_event_loop_policy(WindowsSelectorEventLoopPolicy())
|
||||
"""
|
||||
|
||||
def get_selector(
|
||||
asyncio_loop: asyncio.AbstractEventLoop,
|
||||
) -> asyncio.AbstractEventLoop:
|
||||
"""Get selector-compatible loop
|
||||
|
||||
Returns an object with ``add_reader`` family of methods,
|
||||
either the loop itself or a SelectorThread instance.
|
||||
|
||||
Workaround Windows proactor removal of *reader methods.
|
||||
"""
|
||||
|
||||
if asyncio_loop in _selectors:
|
||||
return _selectors[asyncio_loop]
|
||||
|
||||
if not isinstance(
|
||||
asyncio_loop, getattr(asyncio, "ProactorEventLoop", type(None))
|
||||
):
|
||||
return asyncio_loop
|
||||
|
||||
warnings.warn(PROACTOR_WARNING, CurlCffiWarning, stacklevel=2)
|
||||
|
||||
from ._asyncio_selector import AddThreadSelectorEventLoop
|
||||
|
||||
selector_loop = _selectors[asyncio_loop] = AddThreadSelectorEventLoop(
|
||||
asyncio_loop
|
||||
) # type: ignore
|
||||
|
||||
# patch loop.close to also close the selector thread
|
||||
loop_close = asyncio_loop.close
|
||||
|
||||
def _close_selector_and_loop():
|
||||
# restore original before calling selector.close,
|
||||
# which in turn calls eventloop.close!
|
||||
asyncio_loop.close = loop_close
|
||||
_selectors.pop(asyncio_loop, None)
|
||||
selector_loop.close()
|
||||
|
||||
asyncio_loop.close = _close_selector_and_loop
|
||||
return selector_loop
|
||||
|
||||
else:
|
||||
|
||||
def get_selector(loop: asyncio.AbstractEventLoop) -> asyncio.AbstractEventLoop:
|
||||
return loop
|
||||
|
||||
|
||||
CURL_POLL_NONE = 0
|
||||
CURL_POLL_IN = 1
|
||||
CURL_POLL_OUT = 2
|
||||
CURL_POLL_INOUT = 3
|
||||
CURL_POLL_REMOVE = 4
|
||||
|
||||
CURL_SOCKET_TIMEOUT = -1
|
||||
CURL_SOCKET_BAD = -1
|
||||
|
||||
CURL_CSELECT_IN = 0x01
|
||||
CURL_CSELECT_OUT = 0x02
|
||||
CURL_CSELECT_ERR = 0x04
|
||||
|
||||
CURLMSG_DONE = 1
|
||||
|
||||
CURLPIPE_NOTHING = 0
|
||||
CURLPIPE_HTTP1 = 1 # deprecated
|
||||
CURLPIPE_MULTIPLEX = 2
|
||||
|
||||
|
||||
"""
|
||||
libcurl provides an event-based system for multiple handles with the following API:
|
||||
|
||||
- curl_multi_socket_action, for detecting events
|
||||
- curl_multi_info_read, for reading the transfer status
|
||||
|
||||
There are 2 callbacks:
|
||||
|
||||
- socket_function, set by CURLMOPT_SOCKETFUNCTION, will be called for socket events.
|
||||
- timer_function, set by CURLMOPT_TIMERFUNCTION, will be called when timeouts happen.
|
||||
|
||||
And it works like the following:
|
||||
|
||||
Set up handles, callbacks first.
|
||||
|
||||
When started, curl_multi_socket_action should be called to start everything.
|
||||
|
||||
If there are data in/out, libcurl calls the socket_function callback, and it sets up
|
||||
`process_data` as asyncio loop reader/writer function. `process_data` will call
|
||||
curl_multi_info_read to determine whether a certain `await perform` has finished.
|
||||
|
||||
When idle, libcurl will call the timer_function callback, which sets up a later call
|
||||
for socket_action to detect events.
|
||||
"""
|
||||
|
||||
|
||||
@ffi.def_extern()
|
||||
def timer_function(curlm, timeout_ms: int, clientp: Any) -> int:
|
||||
"""
|
||||
see: https://curl.se/libcurl/c/CURLMOPT_TIMERFUNCTION.html
|
||||
"""
|
||||
async_curl = ffi.from_handle(clientp)
|
||||
|
||||
# Cancel the timer anyway, if it's -1, yes, libcurl says it should be cancelled.
|
||||
# If not, to add a new timer, we need to cancel the old timer.
|
||||
if async_curl._timer:
|
||||
async_curl._timer.cancel() # If already called, cancel does nothing.
|
||||
async_curl._timer = None
|
||||
|
||||
# libcurl says to install a timer which calls socket_action on fire.
|
||||
async_curl._timer = async_curl.loop.call_later(
|
||||
timeout_ms / 1000,
|
||||
async_curl.process_data,
|
||||
CURL_SOCKET_TIMEOUT, # -1
|
||||
CURL_POLL_NONE, # 0
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
@ffi.def_extern()
|
||||
def socket_function(curl, sockfd: int, what: int, clientp: Any, data: Any) -> int:
|
||||
"""This callback is called when libcurl decides it's time to interact with certain
|
||||
sockets"""
|
||||
|
||||
async_curl = ffi.from_handle(clientp)
|
||||
loop = async_curl.loop
|
||||
|
||||
# Always remove and re-add fds
|
||||
if sockfd in async_curl._sockfds:
|
||||
loop.remove_reader(sockfd)
|
||||
loop.remove_writer(sockfd)
|
||||
|
||||
# Need to read from the socket
|
||||
if what & CURL_POLL_IN:
|
||||
loop.add_reader(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_IN)
|
||||
async_curl._sockfds.add(sockfd)
|
||||
|
||||
# Need to write to the socket
|
||||
if what & CURL_POLL_OUT:
|
||||
loop.add_writer(sockfd, async_curl.process_data, sockfd, CURL_CSELECT_OUT)
|
||||
async_curl._sockfds.add(sockfd)
|
||||
|
||||
# Need to remove the socket
|
||||
if what == CURL_POLL_REMOVE:
|
||||
async_curl._sockfds.remove(sockfd)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class AsyncCurl:
|
||||
"""Wrapper around curl_multi handle to provide asyncio support. It uses the libcurl
|
||||
socket_action APIs."""
|
||||
|
||||
def __init__(self, cacert: str = "", loop=None):
|
||||
"""
|
||||
Parameters:
|
||||
cacert: CA cert path to use, by default, certs from ``certifi`` are used.
|
||||
loop: EventLoop to use.
|
||||
"""
|
||||
self._curlm = lib.curl_multi_init()
|
||||
self._cacert = cacert or DEFAULT_CACERT
|
||||
self._curl2future: dict[Curl, asyncio.Future] = {} # curl to future map
|
||||
self._curl2curl: dict[ffi.CData, Curl] = {} # c curl to Curl
|
||||
self._sockfds: set[int] = set() # sockfds
|
||||
self.loop = get_selector(
|
||||
loop if loop is not None else asyncio.get_running_loop()
|
||||
)
|
||||
self._timeout_checker = self.loop.create_task(self._force_timeout())
|
||||
self._timer: Optional[asyncio.TimerHandle] = None
|
||||
self._setup()
|
||||
|
||||
def _setup(self):
|
||||
self.setopt(CurlMOpt.TIMERFUNCTION, lib.timer_function)
|
||||
self.setopt(CurlMOpt.SOCKETFUNCTION, lib.socket_function)
|
||||
self._self_handle = ffi.new_handle(self)
|
||||
self.setopt(CurlMOpt.SOCKETDATA, self._self_handle)
|
||||
self.setopt(CurlMOpt.TIMERDATA, self._self_handle)
|
||||
# self.setopt(CurlMOpt.PIPELINING, CURLPIPE_NOTHING)
|
||||
|
||||
async def close(self):
|
||||
"""Close and cleanup running timers, readers, writers and handles."""
|
||||
|
||||
# Close and wait for the force timeout checker to complete
|
||||
self._timeout_checker.cancel()
|
||||
with suppress(asyncio.CancelledError):
|
||||
await self._timeout_checker
|
||||
|
||||
# Close all pending futures
|
||||
for curl, future in self._curl2future.items():
|
||||
lib.curl_multi_remove_handle(self._curlm, curl._curl)
|
||||
if not future.done() and not future.cancelled():
|
||||
future.set_result(None)
|
||||
|
||||
# Cleanup curl_multi handle
|
||||
lib.curl_multi_cleanup(self._curlm)
|
||||
self._curlm = None
|
||||
|
||||
# Remove add readers and writers
|
||||
for sockfd in self._sockfds:
|
||||
self.loop.remove_reader(sockfd)
|
||||
self.loop.remove_writer(sockfd)
|
||||
|
||||
# Cancel all time functions
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
|
||||
async def _force_timeout(self):
|
||||
"""This coroutine is used to safeguard from any missing signals from curl, and
|
||||
put everything back on track"""
|
||||
while True:
|
||||
if not self._curlm:
|
||||
break
|
||||
self.socket_action(CURL_SOCKET_TIMEOUT, CURL_POLL_NONE)
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
def add_handle(self, curl: Curl):
|
||||
"""Add a curl handle to be managed by curl_multi. This is the equivalent of
|
||||
`perform` in the async world."""
|
||||
|
||||
curl._ensure_cacert()
|
||||
errcode = lib.curl_multi_add_handle(self._curlm, curl._curl)
|
||||
self._check_error(errcode)
|
||||
future = self.loop.create_future()
|
||||
self._curl2future[curl] = future
|
||||
self._curl2curl[curl._curl] = curl
|
||||
return future
|
||||
|
||||
def socket_action(self, sockfd: int, ev_bitmask: int) -> int:
|
||||
"""wrapper for curl_multi_socket_action,
|
||||
returns the number of running curl handles."""
|
||||
running_handle = ffi.new("int *")
|
||||
errcode = lib.curl_multi_socket_action(
|
||||
self._curlm, sockfd, ev_bitmask, running_handle
|
||||
)
|
||||
self._check_error(errcode)
|
||||
return running_handle[0]
|
||||
|
||||
def process_data(self, sockfd: int, ev_bitmask: int):
|
||||
"""Call curl_multi_info_read to read data for given socket."""
|
||||
if not self._curlm:
|
||||
warnings.warn(
|
||||
"Curlm already closed! quitting from process_data",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
return
|
||||
|
||||
self.socket_action(sockfd, ev_bitmask)
|
||||
|
||||
msg_in_queue = ffi.new("int *")
|
||||
while True:
|
||||
try:
|
||||
curl_msg = lib.curl_multi_info_read(self._curlm, msg_in_queue)
|
||||
# NULL is returned as a signal that no more to be get at this point
|
||||
if curl_msg == ffi.NULL:
|
||||
break
|
||||
if curl_msg.msg == CURLMSG_DONE:
|
||||
curl = self._curl2curl[curl_msg.easy_handle]
|
||||
retcode = curl_msg.data.result
|
||||
if retcode == 0:
|
||||
self.set_result(curl)
|
||||
else:
|
||||
self.set_exception(curl, curl._get_error(retcode, "perform"))
|
||||
else:
|
||||
print("NOT DONE") # Will not reach, for nothing else being defined.
|
||||
except Exception:
|
||||
warnings.warn(
|
||||
"Unexpected curl multi state in process_data, "
|
||||
"please open an issue on GitHub\n",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def _pop_future(self, curl: Curl):
|
||||
errcode = lib.curl_multi_remove_handle(self._curlm, curl._curl)
|
||||
self._check_error(errcode)
|
||||
self._curl2curl.pop(curl._curl, None)
|
||||
return self._curl2future.pop(curl, None)
|
||||
|
||||
def remove_handle(self, curl: Curl):
|
||||
"""Cancel a future for given curl handle."""
|
||||
future = self._pop_future(curl)
|
||||
if future and not future.done() and not future.cancelled():
|
||||
future.cancel()
|
||||
|
||||
def set_result(self, curl: Curl):
|
||||
"""Mark a future as done for given curl handle."""
|
||||
future = self._pop_future(curl)
|
||||
if future and not future.done() and not future.cancelled():
|
||||
future.set_result(None)
|
||||
|
||||
def set_exception(self, curl: Curl, exception):
|
||||
"""Raise exception of a future for given curl handle."""
|
||||
future = self._pop_future(curl)
|
||||
if future and not future.done() and not future.cancelled():
|
||||
future.set_exception(exception)
|
||||
|
||||
def _check_error(self, errcode: int, *args: Any):
|
||||
if errcode == CurlECode.OK:
|
||||
return
|
||||
errmsg = lib.curl_multi_strerror(errcode)
|
||||
action = " ".join([str(a) for a in args])
|
||||
raise CurlError(
|
||||
f"Failed in {action}, multi: ({errcode}) {errmsg}. "
|
||||
"See https://curl.se/libcurl/c/libcurl-errors.html first for more "
|
||||
"details. Please open an issue on GitHub to help debug this error.",
|
||||
)
|
||||
|
||||
def setopt(self, option, value):
|
||||
"""Wrapper around curl_multi_setopt."""
|
||||
if option in (
|
||||
CurlMOpt.PIPELINING,
|
||||
CurlMOpt.MAXCONNECTS,
|
||||
CurlMOpt.MAX_HOST_CONNECTIONS,
|
||||
CurlMOpt.MAX_PIPELINE_LENGTH,
|
||||
CurlMOpt.MAX_TOTAL_CONNECTIONS,
|
||||
CurlMOpt.MAX_CONCURRENT_STREAMS,
|
||||
):
|
||||
c_value = ffi.new("long*", value)
|
||||
else:
|
||||
c_value = value
|
||||
return lib.curl_multi_setopt(self._curlm, option, c_value)
|
||||
@@ -0,0 +1,613 @@
|
||||
# This file is automatically generated, do not modify it directly.
|
||||
|
||||
from enum import IntEnum
|
||||
|
||||
|
||||
class CurlOpt(IntEnum):
|
||||
"""``CULROPT_`` constancs extracted from libcurl,
|
||||
see: https://curl.se/libcurl/c/curl_easy_setopt.html"""
|
||||
|
||||
WRITEDATA = 10000 + 1
|
||||
URL = 10000 + 2
|
||||
PORT = 0 + 3
|
||||
PROXY = 10000 + 4
|
||||
USERPWD = 10000 + 5
|
||||
PROXYUSERPWD = 10000 + 6
|
||||
RANGE = 10000 + 7
|
||||
READDATA = 10000 + 9
|
||||
ERRORBUFFER = 10000 + 10
|
||||
WRITEFUNCTION = 20000 + 11
|
||||
READFUNCTION = 20000 + 12
|
||||
TIMEOUT = 0 + 13
|
||||
INFILESIZE = 0 + 14
|
||||
POSTFIELDS = 10000 + 15
|
||||
REFERER = 10000 + 16
|
||||
FTPPORT = 10000 + 17
|
||||
USERAGENT = 10000 + 18
|
||||
LOW_SPEED_LIMIT = 0 + 19
|
||||
LOW_SPEED_TIME = 0 + 20
|
||||
RESUME_FROM = 0 + 21
|
||||
COOKIE = 10000 + 22
|
||||
HTTPHEADER = 10000 + 23
|
||||
HTTPPOST = 10000 + 24
|
||||
SSLCERT = 10000 + 25
|
||||
KEYPASSWD = 10000 + 26
|
||||
CRLF = 0 + 27
|
||||
QUOTE = 10000 + 28
|
||||
HEADERDATA = 10000 + 29
|
||||
COOKIEFILE = 10000 + 31
|
||||
SSLVERSION = 0 + 32
|
||||
TIMECONDITION = 0 + 33
|
||||
TIMEVALUE = 0 + 34
|
||||
CUSTOMREQUEST = 10000 + 36
|
||||
STDERR = 10000 + 37
|
||||
POSTQUOTE = 10000 + 39
|
||||
VERBOSE = 0 + 41
|
||||
HEADER = 0 + 42
|
||||
NOPROGRESS = 0 + 43
|
||||
NOBODY = 0 + 44
|
||||
FAILONERROR = 0 + 45
|
||||
UPLOAD = 0 + 46
|
||||
POST = 0 + 47
|
||||
DIRLISTONLY = 0 + 48
|
||||
APPEND = 0 + 50
|
||||
NETRC = 0 + 51
|
||||
FOLLOWLOCATION = 0 + 52
|
||||
TRANSFERTEXT = 0 + 53
|
||||
PUT = 0 + 54
|
||||
PROGRESSFUNCTION = 20000 + 56
|
||||
XFERINFODATA = 10000 + 57
|
||||
AUTOREFERER = 0 + 58
|
||||
PROXYPORT = 0 + 59
|
||||
POSTFIELDSIZE = 0 + 60
|
||||
HTTPPROXYTUNNEL = 0 + 61
|
||||
INTERFACE = 10000 + 62
|
||||
KRBLEVEL = 10000 + 63
|
||||
SSL_VERIFYPEER = 0 + 64
|
||||
CAINFO = 10000 + 65
|
||||
MAXREDIRS = 0 + 68
|
||||
FILETIME = 0 + 69
|
||||
TELNETOPTIONS = 10000 + 70
|
||||
MAXCONNECTS = 0 + 71
|
||||
FRESH_CONNECT = 0 + 74
|
||||
FORBID_REUSE = 0 + 75
|
||||
RANDOM_FILE = 10000 + 76
|
||||
EGDSOCKET = 10000 + 77
|
||||
CONNECTTIMEOUT = 0 + 78
|
||||
HEADERFUNCTION = 20000 + 79
|
||||
HTTPGET = 0 + 80
|
||||
SSL_VERIFYHOST = 0 + 81
|
||||
COOKIEJAR = 10000 + 82
|
||||
SSL_CIPHER_LIST = 10000 + 83
|
||||
HTTP_VERSION = 0 + 84
|
||||
FTP_USE_EPSV = 0 + 85
|
||||
SSLCERTTYPE = 10000 + 86
|
||||
SSLKEY = 10000 + 87
|
||||
SSLKEYTYPE = 10000 + 88
|
||||
SSLENGINE = 10000 + 89
|
||||
SSLENGINE_DEFAULT = 0 + 90
|
||||
DNS_USE_GLOBAL_CACHE = 0 + 91
|
||||
DNS_CACHE_TIMEOUT = 0 + 92
|
||||
PREQUOTE = 10000 + 93
|
||||
DEBUGFUNCTION = 20000 + 94
|
||||
DEBUGDATA = 10000 + 95
|
||||
COOKIESESSION = 0 + 96
|
||||
CAPATH = 10000 + 97
|
||||
BUFFERSIZE = 0 + 98
|
||||
NOSIGNAL = 0 + 99
|
||||
SHARE = 10000 + 100
|
||||
PROXYTYPE = 0 + 101
|
||||
ACCEPT_ENCODING = 10000 + 102
|
||||
PRIVATE = 10000 + 103
|
||||
HTTP200ALIASES = 10000 + 104
|
||||
UNRESTRICTED_AUTH = 0 + 105
|
||||
FTP_USE_EPRT = 0 + 106
|
||||
HTTPAUTH = 0 + 107
|
||||
SSL_CTX_FUNCTION = 20000 + 108
|
||||
SSL_CTX_DATA = 10000 + 109
|
||||
FTP_CREATE_MISSING_DIRS = 0 + 110
|
||||
PROXYAUTH = 0 + 111
|
||||
SERVER_RESPONSE_TIMEOUT = 0 + 112
|
||||
IPRESOLVE = 0 + 113
|
||||
MAXFILESIZE = 0 + 114
|
||||
INFILESIZE_LARGE = 30000 + 115
|
||||
RESUME_FROM_LARGE = 30000 + 116
|
||||
MAXFILESIZE_LARGE = 30000 + 117
|
||||
NETRC_FILE = 10000 + 118
|
||||
USE_SSL = 0 + 119
|
||||
POSTFIELDSIZE_LARGE = 30000 + 120
|
||||
TCP_NODELAY = 0 + 121
|
||||
FTPSSLAUTH = 0 + 129
|
||||
IOCTLFUNCTION = 20000 + 130
|
||||
IOCTLDATA = 10000 + 131
|
||||
FTP_ACCOUNT = 10000 + 134
|
||||
COOKIELIST = 10000 + 135
|
||||
IGNORE_CONTENT_LENGTH = 0 + 136
|
||||
FTP_SKIP_PASV_IP = 0 + 137
|
||||
FTP_FILEMETHOD = 0 + 138
|
||||
LOCALPORT = 0 + 139
|
||||
LOCALPORTRANGE = 0 + 140
|
||||
CONNECT_ONLY = 0 + 141
|
||||
CONV_FROM_NETWORK_FUNCTION = 20000 + 142
|
||||
CONV_TO_NETWORK_FUNCTION = 20000 + 143
|
||||
CONV_FROM_UTF8_FUNCTION = 20000 + 144
|
||||
MAX_SEND_SPEED_LARGE = 30000 + 145
|
||||
MAX_RECV_SPEED_LARGE = 30000 + 146
|
||||
FTP_ALTERNATIVE_TO_USER = 10000 + 147
|
||||
SOCKOPTFUNCTION = 20000 + 148
|
||||
SOCKOPTDATA = 10000 + 149
|
||||
SSL_SESSIONID_CACHE = 0 + 150
|
||||
SSH_AUTH_TYPES = 0 + 151
|
||||
SSH_PUBLIC_KEYFILE = 10000 + 152
|
||||
SSH_PRIVATE_KEYFILE = 10000 + 153
|
||||
FTP_SSL_CCC = 0 + 154
|
||||
TIMEOUT_MS = 0 + 155
|
||||
CONNECTTIMEOUT_MS = 0 + 156
|
||||
HTTP_TRANSFER_DECODING = 0 + 157
|
||||
HTTP_CONTENT_DECODING = 0 + 158
|
||||
NEW_FILE_PERMS = 0 + 159
|
||||
NEW_DIRECTORY_PERMS = 0 + 160
|
||||
POSTREDIR = 0 + 161
|
||||
SSH_HOST_PUBLIC_KEY_MD5 = 10000 + 162
|
||||
OPENSOCKETFUNCTION = 20000 + 163
|
||||
OPENSOCKETDATA = 10000 + 164
|
||||
COPYPOSTFIELDS = 10000 + 165
|
||||
PROXY_TRANSFER_MODE = 0 + 166
|
||||
SEEKFUNCTION = 20000 + 167
|
||||
SEEKDATA = 10000 + 168
|
||||
CRLFILE = 10000 + 169
|
||||
ISSUERCERT = 10000 + 170
|
||||
ADDRESS_SCOPE = 0 + 171
|
||||
CERTINFO = 0 + 172
|
||||
USERNAME = 10000 + 173
|
||||
PASSWORD = 10000 + 174
|
||||
PROXYUSERNAME = 10000 + 175
|
||||
PROXYPASSWORD = 10000 + 176
|
||||
NOPROXY = 10000 + 177
|
||||
TFTP_BLKSIZE = 0 + 178
|
||||
SOCKS5_GSSAPI_SERVICE = 10000 + 179
|
||||
SOCKS5_GSSAPI_NEC = 0 + 180
|
||||
PROTOCOLS = 0 + 181
|
||||
REDIR_PROTOCOLS = 0 + 182
|
||||
SSH_KNOWNHOSTS = 10000 + 183
|
||||
SSH_KEYFUNCTION = 20000 + 184
|
||||
SSH_KEYDATA = 10000 + 185
|
||||
MAIL_FROM = 10000 + 186
|
||||
MAIL_RCPT = 10000 + 187
|
||||
FTP_USE_PRET = 0 + 188
|
||||
RTSP_REQUEST = 0 + 189
|
||||
RTSP_SESSION_ID = 10000 + 190
|
||||
RTSP_STREAM_URI = 10000 + 191
|
||||
RTSP_TRANSPORT = 10000 + 192
|
||||
RTSP_CLIENT_CSEQ = 0 + 193
|
||||
RTSP_SERVER_CSEQ = 0 + 194
|
||||
INTERLEAVEDATA = 10000 + 195
|
||||
INTERLEAVEFUNCTION = 20000 + 196
|
||||
WILDCARDMATCH = 0 + 197
|
||||
CHUNK_BGN_FUNCTION = 20000 + 198
|
||||
CHUNK_END_FUNCTION = 20000 + 199
|
||||
FNMATCH_FUNCTION = 20000 + 200
|
||||
CHUNK_DATA = 10000 + 201
|
||||
FNMATCH_DATA = 10000 + 202
|
||||
RESOLVE = 10000 + 203
|
||||
TLSAUTH_USERNAME = 10000 + 204
|
||||
TLSAUTH_PASSWORD = 10000 + 205
|
||||
TLSAUTH_TYPE = 10000 + 206
|
||||
TRANSFER_ENCODING = 0 + 207
|
||||
CLOSESOCKETFUNCTION = 20000 + 208
|
||||
CLOSESOCKETDATA = 10000 + 209
|
||||
GSSAPI_DELEGATION = 0 + 210
|
||||
DNS_SERVERS = 10000 + 211
|
||||
ACCEPTTIMEOUT_MS = 0 + 212
|
||||
TCP_KEEPALIVE = 0 + 213
|
||||
TCP_KEEPIDLE = 0 + 214
|
||||
TCP_KEEPINTVL = 0 + 215
|
||||
SSL_OPTIONS = 0 + 216
|
||||
MAIL_AUTH = 10000 + 217
|
||||
SASL_IR = 0 + 218
|
||||
XFERINFOFUNCTION = 20000 + 219
|
||||
XOAUTH2_BEARER = 10000 + 220
|
||||
DNS_INTERFACE = 10000 + 221
|
||||
DNS_LOCAL_IP4 = 10000 + 222
|
||||
DNS_LOCAL_IP6 = 10000 + 223
|
||||
LOGIN_OPTIONS = 10000 + 224
|
||||
SSL_ENABLE_NPN = 0 + 225
|
||||
SSL_ENABLE_ALPN = 0 + 226
|
||||
EXPECT_100_TIMEOUT_MS = 0 + 227
|
||||
PROXYHEADER = 10000 + 228
|
||||
HEADEROPT = 0 + 229
|
||||
PINNEDPUBLICKEY = 10000 + 230
|
||||
UNIX_SOCKET_PATH = 10000 + 231
|
||||
SSL_VERIFYSTATUS = 0 + 232
|
||||
SSL_FALSESTART = 0 + 233
|
||||
PATH_AS_IS = 0 + 234
|
||||
PROXY_SERVICE_NAME = 10000 + 235
|
||||
SERVICE_NAME = 10000 + 236
|
||||
PIPEWAIT = 0 + 237
|
||||
DEFAULT_PROTOCOL = 10000 + 238
|
||||
STREAM_WEIGHT = 0 + 239
|
||||
STREAM_DEPENDS = 10000 + 240
|
||||
STREAM_DEPENDS_E = 10000 + 241
|
||||
TFTP_NO_OPTIONS = 0 + 242
|
||||
CONNECT_TO = 10000 + 243
|
||||
TCP_FASTOPEN = 0 + 244
|
||||
KEEP_SENDING_ON_ERROR = 0 + 245
|
||||
PROXY_CAINFO = 10000 + 246
|
||||
PROXY_CAPATH = 10000 + 247
|
||||
PROXY_SSL_VERIFYPEER = 0 + 248
|
||||
PROXY_SSL_VERIFYHOST = 0 + 249
|
||||
PROXY_SSLVERSION = 0 + 250
|
||||
PROXY_TLSAUTH_USERNAME = 10000 + 251
|
||||
PROXY_TLSAUTH_PASSWORD = 10000 + 252
|
||||
PROXY_TLSAUTH_TYPE = 10000 + 253
|
||||
PROXY_SSLCERT = 10000 + 254
|
||||
PROXY_SSLCERTTYPE = 10000 + 255
|
||||
PROXY_SSLKEY = 10000 + 256
|
||||
PROXY_SSLKEYTYPE = 10000 + 257
|
||||
PROXY_KEYPASSWD = 10000 + 258
|
||||
PROXY_SSL_CIPHER_LIST = 10000 + 259
|
||||
PROXY_CRLFILE = 10000 + 260
|
||||
PROXY_SSL_OPTIONS = 0 + 261
|
||||
PRE_PROXY = 10000 + 262
|
||||
PROXY_PINNEDPUBLICKEY = 10000 + 263
|
||||
ABSTRACT_UNIX_SOCKET = 10000 + 264
|
||||
SUPPRESS_CONNECT_HEADERS = 0 + 265
|
||||
REQUEST_TARGET = 10000 + 266
|
||||
SOCKS5_AUTH = 0 + 267
|
||||
SSH_COMPRESSION = 0 + 268
|
||||
MIMEPOST = 10000 + 269
|
||||
TIMEVALUE_LARGE = 30000 + 270
|
||||
HAPPY_EYEBALLS_TIMEOUT_MS = 0 + 271
|
||||
RESOLVER_START_FUNCTION = 20000 + 272
|
||||
RESOLVER_START_DATA = 10000 + 273
|
||||
HAPROXYPROTOCOL = 0 + 274
|
||||
DNS_SHUFFLE_ADDRESSES = 0 + 275
|
||||
TLS13_CIPHERS = 10000 + 276
|
||||
PROXY_TLS13_CIPHERS = 10000 + 277
|
||||
DISALLOW_USERNAME_IN_URL = 0 + 278
|
||||
DOH_URL = 10000 + 279
|
||||
UPLOAD_BUFFERSIZE = 0 + 280
|
||||
UPKEEP_INTERVAL_MS = 0 + 281
|
||||
CURLU = 10000 + 282
|
||||
TRAILERFUNCTION = 20000 + 283
|
||||
TRAILERDATA = 10000 + 284
|
||||
HTTP09_ALLOWED = 0 + 285
|
||||
ALTSVC_CTRL = 0 + 286
|
||||
ALTSVC = 10000 + 287
|
||||
MAXAGE_CONN = 0 + 288
|
||||
SASL_AUTHZID = 10000 + 289
|
||||
MAIL_RCPT_ALLOWFAILS = 0 + 290
|
||||
SSLCERT_BLOB = 40000 + 291
|
||||
SSLKEY_BLOB = 40000 + 292
|
||||
PROXY_SSLCERT_BLOB = 40000 + 293
|
||||
PROXY_SSLKEY_BLOB = 40000 + 294
|
||||
ISSUERCERT_BLOB = 40000 + 295
|
||||
PROXY_ISSUERCERT = 10000 + 296
|
||||
PROXY_ISSUERCERT_BLOB = 40000 + 297
|
||||
SSL_EC_CURVES = 10000 + 298
|
||||
HSTS_CTRL = 0 + 299
|
||||
HSTS = 10000 + 300
|
||||
HSTSREADFUNCTION = 20000 + 301
|
||||
HSTSREADDATA = 10000 + 302
|
||||
HSTSWRITEFUNCTION = 20000 + 303
|
||||
HSTSWRITEDATA = 10000 + 304
|
||||
AWS_SIGV4 = 10000 + 305
|
||||
DOH_SSL_VERIFYPEER = 0 + 306
|
||||
DOH_SSL_VERIFYHOST = 0 + 307
|
||||
DOH_SSL_VERIFYSTATUS = 0 + 308
|
||||
CAINFO_BLOB = 40000 + 309
|
||||
PROXY_CAINFO_BLOB = 40000 + 310
|
||||
SSH_HOST_PUBLIC_KEY_SHA256 = 10000 + 311
|
||||
PREREQFUNCTION = 20000 + 312
|
||||
PREREQDATA = 10000 + 313
|
||||
MAXLIFETIME_CONN = 0 + 314
|
||||
MIME_OPTIONS = 0 + 315
|
||||
SSH_HOSTKEYFUNCTION = 20000 + 316
|
||||
SSH_HOSTKEYDATA = 10000 + 317
|
||||
PROTOCOLS_STR = 10000 + 318
|
||||
REDIR_PROTOCOLS_STR = 10000 + 319
|
||||
WS_OPTIONS = 0 + 320
|
||||
CA_CACHE_TIMEOUT = 0 + 321
|
||||
QUICK_EXIT = 0 + 322
|
||||
HAPROXY_CLIENT_IP = 10000 + 323
|
||||
SERVER_RESPONSE_TIMEOUT_MS = 0 + 324
|
||||
ECH = 10000 + 325
|
||||
TCP_KEEPCNT = 0 + 326
|
||||
UPLOAD_FLAGS = 0 + 327
|
||||
SSL_SIGNATURE_ALGORITHMS = 10000 + 328
|
||||
HTTPBASEHEADER = 10000 + 1000
|
||||
SSL_SIG_HASH_ALGS = 10000 + 1001
|
||||
SSL_ENABLE_ALPS = 0 + 1002
|
||||
SSL_CERT_COMPRESSION = 10000 + 1003
|
||||
SSL_ENABLE_TICKET = 0 + 1004
|
||||
HTTP2_PSEUDO_HEADERS_ORDER = 10000 + 1005
|
||||
HTTP2_SETTINGS = 10000 + 1006
|
||||
SSL_PERMUTE_EXTENSIONS = 0 + 1007
|
||||
HTTP2_WINDOW_UPDATE = 0 + 1008
|
||||
HTTP2_STREAMS = 10000 + 1010
|
||||
TLS_GREASE = 0 + 1011
|
||||
TLS_EXTENSION_ORDER = 10000 + 1012
|
||||
STREAM_EXCLUSIVE = 0 + 1013
|
||||
TLS_KEY_USAGE_NO_CHECK = 0 + 1014
|
||||
TLS_SIGNED_CERT_TIMESTAMPS = 0 + 1015
|
||||
TLS_STATUS_REQUEST = 0 + 1016
|
||||
TLS_DELEGATED_CREDENTIALS = 10000 + 1017
|
||||
TLS_RECORD_SIZE_LIMIT = 0 + 1018
|
||||
TLS_KEY_SHARES_LIMIT = 0 + 1019
|
||||
TLS_USE_NEW_ALPS_CODEPOINT = 0 + 1020
|
||||
HTTP2_NO_PRIORITY = 0 + 1021
|
||||
PROXY_CREDENTIAL_NO_REUSE = 0 + 1022
|
||||
|
||||
if locals().get("WRITEDATA"):
|
||||
FILE = locals().get("WRITEDATA")
|
||||
if locals().get("READDATA"):
|
||||
INFILE = locals().get("READDATA")
|
||||
if locals().get("HEADERDATA"):
|
||||
WRITEHEADER = locals().get("HEADERDATA")
|
||||
|
||||
|
||||
class CurlInfo(IntEnum):
|
||||
"""``CURLINFO_`` constancs extracted from libcurl,
|
||||
see: https://curl.se/libcurl/c/curl_easy_getinfo.html"""
|
||||
|
||||
TEXT = 0
|
||||
EFFECTIVE_URL = 0x100000 + 1
|
||||
RESPONSE_CODE = 0x200000 + 2
|
||||
TOTAL_TIME = 0x300000 + 3
|
||||
NAMELOOKUP_TIME = 0x300000 + 4
|
||||
CONNECT_TIME = 0x300000 + 5
|
||||
PRETRANSFER_TIME = 0x300000 + 6
|
||||
SIZE_UPLOAD_T = 0x600000 + 7
|
||||
SIZE_DOWNLOAD_T = 0x600000 + 8
|
||||
SPEED_DOWNLOAD_T = 0x600000 + 9
|
||||
SPEED_UPLOAD_T = 0x600000 + 10
|
||||
HEADER_SIZE = 0x200000 + 11
|
||||
REQUEST_SIZE = 0x200000 + 12
|
||||
SSL_VERIFYRESULT = 0x200000 + 13
|
||||
FILETIME = 0x200000 + 14
|
||||
FILETIME_T = 0x600000 + 14
|
||||
CONTENT_LENGTH_DOWNLOAD_T = 0x600000 + 15
|
||||
CONTENT_LENGTH_UPLOAD_T = 0x600000 + 16
|
||||
STARTTRANSFER_TIME = 0x300000 + 17
|
||||
CONTENT_TYPE = 0x100000 + 18
|
||||
REDIRECT_TIME = 0x300000 + 19
|
||||
REDIRECT_COUNT = 0x200000 + 20
|
||||
PRIVATE = 0x100000 + 21
|
||||
HTTP_CONNECTCODE = 0x200000 + 22
|
||||
HTTPAUTH_AVAIL = 0x200000 + 23
|
||||
PROXYAUTH_AVAIL = 0x200000 + 24
|
||||
OS_ERRNO = 0x200000 + 25
|
||||
NUM_CONNECTS = 0x200000 + 26
|
||||
SSL_ENGINES = 0x400000 + 27
|
||||
COOKIELIST = 0x400000 + 28
|
||||
FTP_ENTRY_PATH = 0x100000 + 30
|
||||
REDIRECT_URL = 0x100000 + 31
|
||||
PRIMARY_IP = 0x100000 + 32
|
||||
APPCONNECT_TIME = 0x300000 + 33
|
||||
CERTINFO = 0x400000 + 34
|
||||
CONDITION_UNMET = 0x200000 + 35
|
||||
RTSP_SESSION_ID = 0x100000 + 36
|
||||
RTSP_CLIENT_CSEQ = 0x200000 + 37
|
||||
RTSP_SERVER_CSEQ = 0x200000 + 38
|
||||
RTSP_CSEQ_RECV = 0x200000 + 39
|
||||
PRIMARY_PORT = 0x200000 + 40
|
||||
LOCAL_IP = 0x100000 + 41
|
||||
LOCAL_PORT = 0x200000 + 42
|
||||
ACTIVESOCKET = 0x500000 + 44
|
||||
TLS_SSL_PTR = 0x400000 + 45
|
||||
HTTP_VERSION = 0x200000 + 46
|
||||
PROXY_SSL_VERIFYRESULT = 0x200000 + 47
|
||||
SCHEME = 0x100000 + 49
|
||||
TOTAL_TIME_T = 0x600000 + 50
|
||||
NAMELOOKUP_TIME_T = 0x600000 + 51
|
||||
CONNECT_TIME_T = 0x600000 + 52
|
||||
PRETRANSFER_TIME_T = 0x600000 + 53
|
||||
STARTTRANSFER_TIME_T = 0x600000 + 54
|
||||
REDIRECT_TIME_T = 0x600000 + 55
|
||||
APPCONNECT_TIME_T = 0x600000 + 56
|
||||
RETRY_AFTER = 0x600000 + 57
|
||||
EFFECTIVE_METHOD = 0x100000 + 58
|
||||
PROXY_ERROR = 0x200000 + 59
|
||||
REFERER = 0x100000 + 60
|
||||
CAINFO = 0x100000 + 61
|
||||
CAPATH = 0x100000 + 62
|
||||
XFER_ID = 0x600000 + 63
|
||||
CONN_ID = 0x600000 + 64
|
||||
QUEUE_TIME_T = 0x600000 + 65
|
||||
USED_PROXY = 0x200000 + 66
|
||||
POSTTRANSFER_TIME_T = 0x600000 + 67
|
||||
EARLYDATA_SENT_T = 0x600000 + 68
|
||||
HTTPAUTH_USED = 0x200000 + 69
|
||||
PROXYAUTH_USED = 0x200000 + 70
|
||||
LASTONE = 70
|
||||
|
||||
if locals().get("RESPONSE_CODE"):
|
||||
HTTP_CODE = locals().get("RESPONSE_CODE")
|
||||
|
||||
|
||||
class CurlMOpt(IntEnum):
|
||||
"""``CURLMOPT_`` constancs extracted from libcurl,
|
||||
see: https://curl.se/libcurl/c/curl_multi_setopt.html"""
|
||||
|
||||
SOCKETFUNCTION = 20000 + 1
|
||||
SOCKETDATA = 10000 + 2
|
||||
PIPELINING = 0 + 3
|
||||
TIMERFUNCTION = 20000 + 4
|
||||
TIMERDATA = 10000 + 5
|
||||
MAXCONNECTS = 0 + 6
|
||||
MAX_HOST_CONNECTIONS = 0 + 7
|
||||
MAX_PIPELINE_LENGTH = 0 + 8
|
||||
CONTENT_LENGTH_PENALTY_SIZE = 30000 + 9
|
||||
CHUNK_LENGTH_PENALTY_SIZE = 30000 + 10
|
||||
PIPELINING_SITE_BL = 10000 + 11
|
||||
PIPELINING_SERVER_BL = 10000 + 12
|
||||
MAX_TOTAL_CONNECTIONS = 0 + 13
|
||||
PUSHFUNCTION = 20000 + 14
|
||||
PUSHDATA = 10000 + 15
|
||||
MAX_CONCURRENT_STREAMS = 0 + 16
|
||||
|
||||
|
||||
class CurlECode(IntEnum):
|
||||
"""``CURLECODE_`` constancs extracted from libcurl,
|
||||
see: https://curl.se/libcurl/c/libcurl-errors.html"""
|
||||
|
||||
OK = 0
|
||||
UNSUPPORTED_PROTOCOL = 1
|
||||
FAILED_INIT = 2
|
||||
URL_MALFORMAT = 3
|
||||
NOT_BUILT_IN = 4
|
||||
COULDNT_RESOLVE_PROXY = 5
|
||||
COULDNT_RESOLVE_HOST = 6
|
||||
COULDNT_CONNECT = 7
|
||||
WEIRD_SERVER_REPLY = 8
|
||||
REMOTE_ACCESS_DENIED = 9
|
||||
FTP_ACCEPT_FAILED = 10
|
||||
FTP_WEIRD_PASS_REPLY = 11
|
||||
FTP_ACCEPT_TIMEOUT = 12
|
||||
FTP_WEIRD_PASV_REPLY = 13
|
||||
FTP_WEIRD_227_FORMAT = 14
|
||||
FTP_CANT_GET_HOST = 15
|
||||
HTTP2 = 16
|
||||
FTP_COULDNT_SET_TYPE = 17
|
||||
PARTIAL_FILE = 18
|
||||
FTP_COULDNT_RETR_FILE = 19
|
||||
OBSOLETE20 = 20
|
||||
QUOTE_ERROR = 21
|
||||
HTTP_RETURNED_ERROR = 22
|
||||
WRITE_ERROR = 23
|
||||
OBSOLETE24 = 24
|
||||
UPLOAD_FAILED = 25
|
||||
READ_ERROR = 26
|
||||
OUT_OF_MEMORY = 27
|
||||
OPERATION_TIMEDOUT = 28
|
||||
OBSOLETE29 = 29
|
||||
FTP_PORT_FAILED = 30
|
||||
FTP_COULDNT_USE_REST = 31
|
||||
OBSOLETE32 = 32
|
||||
RANGE_ERROR = 33
|
||||
OBSOLETE34 = 34
|
||||
SSL_CONNECT_ERROR = 35
|
||||
BAD_DOWNLOAD_RESUME = 36
|
||||
FILE_COULDNT_READ_FILE = 37
|
||||
LDAP_CANNOT_BIND = 38
|
||||
LDAP_SEARCH_FAILED = 39
|
||||
OBSOLETE40 = 40
|
||||
OBSOLETE41 = 41
|
||||
ABORTED_BY_CALLBACK = 42
|
||||
BAD_FUNCTION_ARGUMENT = 43
|
||||
OBSOLETE44 = 44
|
||||
INTERFACE_FAILED = 45
|
||||
OBSOLETE46 = 46
|
||||
TOO_MANY_REDIRECTS = 47
|
||||
UNKNOWN_OPTION = 48
|
||||
SETOPT_OPTION_SYNTAX = 49
|
||||
OBSOLETE50 = 50
|
||||
OBSOLETE51 = 51
|
||||
GOT_NOTHING = 52
|
||||
SSL_ENGINE_NOTFOUND = 53
|
||||
SSL_ENGINE_SETFAILED = 54
|
||||
SEND_ERROR = 55
|
||||
RECV_ERROR = 56
|
||||
OBSOLETE57 = 57
|
||||
SSL_CERTPROBLEM = 58
|
||||
SSL_CIPHER = 59
|
||||
PEER_FAILED_VERIFICATION = 60
|
||||
BAD_CONTENT_ENCODING = 61
|
||||
OBSOLETE62 = 62
|
||||
FILESIZE_EXCEEDED = 63
|
||||
USE_SSL_FAILED = 64
|
||||
SEND_FAIL_REWIND = 65
|
||||
SSL_ENGINE_INITFAILED = 66
|
||||
LOGIN_DENIED = 67
|
||||
TFTP_NOTFOUND = 68
|
||||
TFTP_PERM = 69
|
||||
REMOTE_DISK_FULL = 70
|
||||
TFTP_ILLEGAL = 71
|
||||
TFTP_UNKNOWNID = 72
|
||||
REMOTE_FILE_EXISTS = 73
|
||||
TFTP_NOSUCHUSER = 74
|
||||
OBSOLETE75 = 75
|
||||
OBSOLETE76 = 76
|
||||
SSL_CACERT_BADFILE = 77
|
||||
REMOTE_FILE_NOT_FOUND = 78
|
||||
SSH = 79
|
||||
SSL_SHUTDOWN_FAILED = 80
|
||||
AGAIN = 81
|
||||
SSL_CRL_BADFILE = 82
|
||||
SSL_ISSUER_ERROR = 83
|
||||
FTP_PRET_FAILED = 84
|
||||
RTSP_CSEQ_ERROR = 85
|
||||
RTSP_SESSION_ERROR = 86
|
||||
FTP_BAD_FILE_LIST = 87
|
||||
CHUNK_FAILED = 88
|
||||
NO_CONNECTION_AVAILABLE = 89
|
||||
SSL_PINNEDPUBKEYNOTMATCH = 90
|
||||
SSL_INVALIDCERTSTATUS = 91
|
||||
HTTP2_STREAM = 92
|
||||
RECURSIVE_API_CALL = 93
|
||||
AUTH_ERROR = 94
|
||||
HTTP3 = 95
|
||||
QUIC_CONNECT_ERROR = 96
|
||||
PROXY = 97
|
||||
SSL_CLIENTCERT = 98
|
||||
UNRECOVERABLE_POLL = 99
|
||||
TOO_LARGE = 100
|
||||
ECH_REQUIRED = 101
|
||||
RESERVED115115 = 102
|
||||
RESERVED116116 = 103
|
||||
RESERVED117117 = 104
|
||||
RESERVED118118 = 105
|
||||
RESERVED119119 = 106
|
||||
RESERVED120120 = 107
|
||||
RESERVED121121 = 108
|
||||
RESERVED122122 = 109
|
||||
RESERVED123123 = 110
|
||||
RESERVED124124 = 111
|
||||
RESERVED125125 = 112
|
||||
RESERVED126126 = 113
|
||||
|
||||
|
||||
class CurlHttpVersion(IntEnum):
|
||||
"""``CURL_HTTP_VERSION`` constants from libcurl, see comments for details."""
|
||||
|
||||
NONE = 0
|
||||
V1_0 = 1 # please use HTTP 1.0 in the request */
|
||||
V1_1 = 2 # please use HTTP 1.1 in the request */
|
||||
V2_0 = 3 # please use HTTP 2 in the request */
|
||||
V2TLS = 4 # use version 2 for HTTPS, version 1.1 for HTTP */
|
||||
V2_PRIOR_KNOWLEDGE = 5 # please use HTTP 2 without HTTP/1.1 Upgrade */
|
||||
V3 = 30 # Makes use of explicit HTTP/3 with fallback.
|
||||
V3ONLY = 31 # No fallback
|
||||
|
||||
|
||||
class CurlWsFlag(IntEnum):
|
||||
"""``CURL_WS_FLAG`` constants from libcurl, see comments for details."""
|
||||
|
||||
TEXT = 1 << 0
|
||||
BINARY = 1 << 1
|
||||
CONT = 1 << 2
|
||||
CLOSE = 1 << 3
|
||||
PING = 1 << 4
|
||||
OFFSET = 1 << 5
|
||||
|
||||
|
||||
class CurlSslVersion(IntEnum):
|
||||
"""``CURL_SSLVERSION`` constants from libcurl, see comments for details."""
|
||||
|
||||
DEFAULT = 0
|
||||
TLSv1 = 1
|
||||
SSLv2 = 2
|
||||
SSLv3 = 3
|
||||
TLSv1_0 = 4
|
||||
TLSv1_1 = 5
|
||||
TLSv1_2 = 6
|
||||
TLSv1_3 = 7
|
||||
MAX_DEFAULT = 1 << 16
|
||||
|
||||
|
||||
class CurlIpResolve(IntEnum):
|
||||
"""``CURL_IPRESOLVE`` constants from libcurl, see comments for details."""
|
||||
|
||||
WHATEVER = 0 # default, uses addresses to all IP versions that your system allows
|
||||
V4 = 1 # uses only IPv4 addresses/connections
|
||||
V6 = 2 # uses only IPv6 addresses/connections
|
||||
@@ -0,0 +1,612 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import struct
|
||||
import sys
|
||||
import warnings
|
||||
from http.cookies import SimpleCookie
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast
|
||||
|
||||
import certifi
|
||||
|
||||
from ._wrapper import ffi, lib
|
||||
from .const import CurlECode, CurlHttpVersion, CurlInfo, CurlOpt, CurlWsFlag
|
||||
from .utils import CurlCffiWarning
|
||||
|
||||
DEFAULT_CACERT = certifi.where()
|
||||
REASON_PHRASE_RE = re.compile(rb"HTTP/\d\.\d [0-9]{3} (.*)")
|
||||
STATUS_LINE_RE = re.compile(rb"HTTP/(\d\.\d) ([0-9]{3}) (.*)")
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
class CurlWsFrame:
|
||||
age: int
|
||||
flags: int
|
||||
offset: int
|
||||
bytesleft: int
|
||||
len: int
|
||||
|
||||
|
||||
class CurlError(Exception):
|
||||
"""Base exception for curl_cffi package"""
|
||||
|
||||
def __init__(self, msg, code: Union[CurlECode, Literal[0]] = 0, *args, **kwargs):
|
||||
super().__init__(msg, *args, **kwargs)
|
||||
self.code: Union[CurlECode, Literal[0]] = code
|
||||
|
||||
|
||||
CURLINFO_TEXT = 0
|
||||
CURLINFO_HEADER_IN = 1
|
||||
CURLINFO_HEADER_OUT = 2
|
||||
CURLINFO_DATA_IN = 3
|
||||
CURLINFO_DATA_OUT = 4
|
||||
CURLINFO_SSL_DATA_IN = 5
|
||||
CURLINFO_SSL_DATA_OUT = 6
|
||||
|
||||
CURL_WRITEFUNC_PAUSE = 0x10000001
|
||||
CURL_WRITEFUNC_ERROR = 0xFFFFFFFF
|
||||
|
||||
|
||||
@ffi.def_extern()
|
||||
def debug_function(curl, type_: int, data, size: int, clientp) -> int:
|
||||
"""ffi callback for curl debug info"""
|
||||
callback = ffi.from_handle(clientp)
|
||||
text = ffi.buffer(data, size)[:]
|
||||
callback(type_, text)
|
||||
return 0
|
||||
|
||||
|
||||
def bytes_to_hex(b: bytes, uppercase: bool = False) -> str:
|
||||
"""
|
||||
Convert a bytes object to a space-separated hex string, e.g. "0a ff 3c".
|
||||
If uppercase=True, letters will be A–F instead of a–f.
|
||||
"""
|
||||
fmt = "{:02X}" if uppercase else "{:02x}"
|
||||
return " ".join(fmt.format(byte) for byte in b)
|
||||
|
||||
|
||||
def debug_function_default(type_: int, data: bytes) -> None:
|
||||
PREFIXES = {
|
||||
CURLINFO_TEXT: "*",
|
||||
CURLINFO_HEADER_IN: "<",
|
||||
CURLINFO_HEADER_OUT: ">",
|
||||
CURLINFO_DATA_IN: "< DATA",
|
||||
CURLINFO_DATA_OUT: "> DATA",
|
||||
CURLINFO_SSL_DATA_IN: "< SSL",
|
||||
CURLINFO_SSL_DATA_OUT: "> SSL",
|
||||
}
|
||||
MAX_SHOW_BYTES = 40
|
||||
prefix = PREFIXES.get(type_, "*")
|
||||
|
||||
# always show ssl data in binary format
|
||||
if type_ in (CURLINFO_SSL_DATA_IN, CURLINFO_SSL_DATA_OUT):
|
||||
hex_str = bytes_to_hex(data[:MAX_SHOW_BYTES])
|
||||
postfix = "" if len(data) <= MAX_SHOW_BYTES else "..."
|
||||
sys.stderr.write(f"{prefix} [{len(data)} bytes]: {hex_str}{postfix}\n")
|
||||
else:
|
||||
try:
|
||||
text = data.decode("utf-8")
|
||||
sys.stderr.write(f"{prefix} {text}")
|
||||
if type_ not in (CURLINFO_TEXT, CURLINFO_HEADER_IN, CURLINFO_HEADER_OUT):
|
||||
sys.stderr.write("\n")
|
||||
except UnicodeDecodeError:
|
||||
# Fallback to hex representation of first MAX_SHOW_BYTES bytes
|
||||
hex_str = bytes_to_hex(data[:MAX_SHOW_BYTES])
|
||||
postfix = "" if len(data) <= MAX_SHOW_BYTES else "..."
|
||||
sys.stderr.write(f"{prefix} [{len(data)} bytes]: {hex_str}{postfix}\n")
|
||||
|
||||
|
||||
@ffi.def_extern()
|
||||
def buffer_callback(ptr, size, nmemb, userdata):
|
||||
"""ffi callback for curl write function, directly writes to a buffer"""
|
||||
# assert size == 1
|
||||
buffer = ffi.from_handle(userdata)
|
||||
buffer.write(ffi.buffer(ptr, nmemb)[:])
|
||||
return nmemb * size
|
||||
|
||||
|
||||
def ensure_int(s):
|
||||
if not s:
|
||||
return 0
|
||||
return int(s)
|
||||
|
||||
|
||||
@ffi.def_extern()
|
||||
def write_callback(ptr, size, nmemb, userdata):
|
||||
"""ffi callback for curl write function, calls the callback python function"""
|
||||
# although similar enough to the function above, kept here for performance reasons
|
||||
callback = ffi.from_handle(userdata)
|
||||
wrote = callback(ffi.buffer(ptr, nmemb)[:])
|
||||
wrote = ensure_int(wrote)
|
||||
if wrote == CURL_WRITEFUNC_PAUSE or wrote == CURL_WRITEFUNC_ERROR: # noqa: SIM109
|
||||
return wrote
|
||||
# should make this an exception in future versions
|
||||
if wrote != nmemb * size:
|
||||
warnings.warn("Wrote bytes != received bytes.", CurlCffiWarning, stacklevel=2)
|
||||
return nmemb * size
|
||||
|
||||
|
||||
# Credits: @alexio777 on https://github.com/lexiforest/curl_cffi/issues/4
|
||||
def slist_to_list(head) -> list[bytes]:
|
||||
"""Converts curl slist to a python list."""
|
||||
result = []
|
||||
ptr = head
|
||||
while ptr:
|
||||
result.append(ffi.string(ptr.data))
|
||||
ptr = ptr.next
|
||||
lib.curl_slist_free_all(head)
|
||||
return result
|
||||
|
||||
|
||||
class Curl:
|
||||
"""
|
||||
Wrapper for ``curl_easy_*`` functions of libcurl.
|
||||
"""
|
||||
|
||||
def __init__(self, cacert: str = "", debug: bool = False, handle=None) -> None:
|
||||
"""
|
||||
Parameters:
|
||||
cacert: CA cert path to use, by default, certs from ``certifi`` are used.
|
||||
debug: whether to show curl debug messages.
|
||||
handle: a curl handle instance from ``curl_easy_init``.
|
||||
"""
|
||||
self._curl = handle if handle else lib.curl_easy_init()
|
||||
self._headers = ffi.NULL
|
||||
self._proxy_headers = ffi.NULL
|
||||
self._resolve = ffi.NULL
|
||||
self._cacert = cacert or DEFAULT_CACERT
|
||||
self._is_cert_set = False
|
||||
self._write_handle: Any = None
|
||||
self._header_handle: Any = None
|
||||
self._debug_handle: Any = None
|
||||
self._body_handle: Any = None
|
||||
# TODO: use CURL_ERROR_SIZE
|
||||
self._error_buffer = ffi.new("char[]", 256)
|
||||
self._debug = debug
|
||||
self._set_error_buffer()
|
||||
|
||||
def _set_error_buffer(self) -> None:
|
||||
ret = lib._curl_easy_setopt(self._curl, CurlOpt.ERRORBUFFER, self._error_buffer)
|
||||
if ret != 0:
|
||||
warnings.warn("Failed to set error buffer", CurlCffiWarning, stacklevel=2)
|
||||
if self._debug:
|
||||
self.debug()
|
||||
|
||||
def debug(self) -> None:
|
||||
"""Set debug to True"""
|
||||
self.setopt(CurlOpt.VERBOSE, 1)
|
||||
self.setopt(CurlOpt.DEBUGFUNCTION, True)
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.close()
|
||||
|
||||
def _check_error(self, errcode: int, *args: Any) -> None:
|
||||
error = self._get_error(errcode, *args)
|
||||
if error is not None:
|
||||
raise error
|
||||
|
||||
def _get_error(self, errcode: int, *args: Any):
|
||||
if errcode != 0:
|
||||
errmsg = ffi.string(self._error_buffer).decode(errors="backslashreplace")
|
||||
action = " ".join([str(a) for a in args])
|
||||
return CurlError(
|
||||
f"Failed to {action}, curl: ({errcode}) {errmsg}. "
|
||||
"See https://curl.se/libcurl/c/libcurl-errors.html first for more "
|
||||
"details.",
|
||||
code=cast(CurlECode, errcode),
|
||||
)
|
||||
|
||||
def setopt(self, option: CurlOpt, value: Any) -> int:
|
||||
"""Wrapper for ``curl_easy_setopt``.
|
||||
|
||||
Args:
|
||||
option: option to set, using constants from CurlOpt enum
|
||||
value: value to set, strings will be handled automatically
|
||||
|
||||
Returns:
|
||||
0 if no error, see ``CurlECode``.
|
||||
"""
|
||||
input_option = {
|
||||
# this should be int in curl, but cffi requires pointer for void*
|
||||
# it will be convert back in the glue c code.
|
||||
0: "long*",
|
||||
10000: "char*",
|
||||
20000: "void*",
|
||||
30000: "int64_t*", # offset type
|
||||
40000: "void*", # blob type
|
||||
}
|
||||
# print("option", option, "value", value)
|
||||
|
||||
# Convert value
|
||||
value_type = input_option.get((option // 10000) * 10000)
|
||||
if value_type == "long*" or value_type == "int64_t*":
|
||||
c_value = ffi.new(value_type, value)
|
||||
elif option == CurlOpt.WRITEDATA:
|
||||
c_value = ffi.new_handle(value)
|
||||
self._write_handle = c_value
|
||||
lib._curl_easy_setopt(
|
||||
self._curl, CurlOpt.WRITEFUNCTION, lib.buffer_callback
|
||||
)
|
||||
elif option == CurlOpt.HEADERDATA:
|
||||
c_value = ffi.new_handle(value)
|
||||
self._header_handle = c_value
|
||||
lib._curl_easy_setopt(
|
||||
self._curl, CurlOpt.HEADERFUNCTION, lib.buffer_callback
|
||||
)
|
||||
elif option == CurlOpt.WRITEFUNCTION:
|
||||
c_value = ffi.new_handle(value)
|
||||
self._write_handle = c_value
|
||||
lib._curl_easy_setopt(self._curl, CurlOpt.WRITEFUNCTION, lib.write_callback)
|
||||
option = CurlOpt.WRITEDATA
|
||||
elif option == CurlOpt.HEADERFUNCTION:
|
||||
c_value = ffi.new_handle(value)
|
||||
self._header_handle = c_value
|
||||
lib._curl_easy_setopt(
|
||||
self._curl, CurlOpt.HEADERFUNCTION, lib.write_callback
|
||||
)
|
||||
option = CurlOpt.HEADERDATA
|
||||
elif option == CurlOpt.DEBUGFUNCTION:
|
||||
if value is True:
|
||||
value = debug_function_default
|
||||
c_value = ffi.new_handle(value)
|
||||
self._debug_handle = c_value
|
||||
lib._curl_easy_setopt(self._curl, CurlOpt.DEBUGFUNCTION, lib.debug_function)
|
||||
option = CurlOpt.DEBUGDATA
|
||||
elif value_type == "char*":
|
||||
c_value = value.encode() if isinstance(value, str) else value
|
||||
# Must keep a reference, otherwise may be GCed.
|
||||
if option == CurlOpt.POSTFIELDS:
|
||||
self._body_handle = c_value
|
||||
else:
|
||||
raise NotImplementedError(f"Option unsupported: {option}")
|
||||
|
||||
if option == CurlOpt.HTTPHEADER:
|
||||
for header in value:
|
||||
self._headers = lib.curl_slist_append(self._headers, header)
|
||||
ret = lib._curl_easy_setopt(self._curl, option, self._headers)
|
||||
elif option == CurlOpt.PROXYHEADER:
|
||||
for proxy_header in value:
|
||||
self._proxy_headers = lib.curl_slist_append(
|
||||
self._proxy_headers, proxy_header
|
||||
)
|
||||
ret = lib._curl_easy_setopt(self._curl, option, self._proxy_headers)
|
||||
elif option == CurlOpt.RESOLVE:
|
||||
for resolve in value:
|
||||
if isinstance(resolve, str):
|
||||
resolve = resolve.encode()
|
||||
self._resolve = lib.curl_slist_append(self._resolve, resolve)
|
||||
ret = lib._curl_easy_setopt(self._curl, option, self._resolve)
|
||||
else:
|
||||
ret = lib._curl_easy_setopt(self._curl, option, c_value)
|
||||
self._check_error(ret, "setopt", option, value)
|
||||
|
||||
if option == CurlOpt.CAINFO:
|
||||
self._is_cert_set = True
|
||||
|
||||
return ret
|
||||
|
||||
def getinfo(self, option: CurlInfo) -> Union[bytes, int, float, list]:
|
||||
"""Wrapper for ``curl_easy_getinfo``. Gets information in response after
|
||||
curl.perform.
|
||||
|
||||
Parameters:
|
||||
option: option to get info of, using constants from ``CurlInfo`` enum
|
||||
|
||||
Returns:
|
||||
value retrieved from last perform.
|
||||
"""
|
||||
ret_option = {
|
||||
0x100000: "char**",
|
||||
0x200000: "long*",
|
||||
0x300000: "double*",
|
||||
0x400000: "struct curl_slist **",
|
||||
0x500000: "long*",
|
||||
0x600000: "int64_t*",
|
||||
}
|
||||
ret_cast_option = {
|
||||
0x100000: ffi.string,
|
||||
0x200000: int,
|
||||
0x300000: float,
|
||||
0x500000: int,
|
||||
0x600000: int,
|
||||
}
|
||||
c_value = ffi.new(ret_option[option & 0xF00000])
|
||||
ret = lib.curl_easy_getinfo(self._curl, option, c_value)
|
||||
self._check_error(ret, "getinfo", option)
|
||||
# cookielist and ssl_engines starts with 0x400000, see also: const.py
|
||||
if option & 0xF00000 == 0x400000:
|
||||
return slist_to_list(c_value[0])
|
||||
if c_value[0] == ffi.NULL:
|
||||
return b""
|
||||
return ret_cast_option[option & 0xF00000](c_value[0])
|
||||
|
||||
def version(self) -> bytes:
|
||||
"""Get the underlying libcurl version."""
|
||||
return ffi.string(lib.curl_version())
|
||||
|
||||
def impersonate(self, target: str, default_headers: bool = True) -> int:
|
||||
"""Set the browser type to impersonate.
|
||||
|
||||
Parameters:
|
||||
target: browser to impersonate.
|
||||
default_headers: whether to add default headers, like User-Agent.
|
||||
|
||||
Returns:
|
||||
0 if no error.
|
||||
"""
|
||||
return lib.curl_easy_impersonate(
|
||||
self._curl, target.encode(), int(default_headers)
|
||||
)
|
||||
|
||||
def _ensure_cacert(self) -> None:
|
||||
if not self._is_cert_set:
|
||||
ret = self.setopt(CurlOpt.CAINFO, self._cacert)
|
||||
self._check_error(ret, "set cacert")
|
||||
ret = self.setopt(CurlOpt.PROXY_CAINFO, self._cacert)
|
||||
self._check_error(ret, "set proxy cacert")
|
||||
|
||||
def perform(self, clear_headers: bool = True) -> None:
|
||||
"""Wrapper for ``curl_easy_perform``, performs a curl request.
|
||||
|
||||
Parameters:
|
||||
clear_headers: clear header slist used in this perform
|
||||
|
||||
Raises:
|
||||
CurlError: if the perform was not successful.
|
||||
"""
|
||||
# make sure we set a cacert store
|
||||
self._ensure_cacert()
|
||||
|
||||
# here we go
|
||||
ret = lib.curl_easy_perform(self._curl)
|
||||
|
||||
try:
|
||||
self._check_error(ret, "perform")
|
||||
finally:
|
||||
# cleaning
|
||||
self.clean_after_perform(clear_headers)
|
||||
|
||||
def upkeep(self) -> int:
|
||||
return lib.curl_easy_upkeep(self._curl)
|
||||
|
||||
def clean_after_perform(self, clear_headers: bool = True) -> None:
|
||||
"""Clean up handles and buffers after ``perform``, called at the end of
|
||||
``perform``."""
|
||||
self._write_handle = None
|
||||
self._header_handle = None
|
||||
self._debug_handle = None
|
||||
self._body_handle = None
|
||||
if clear_headers:
|
||||
if self._headers != ffi.NULL:
|
||||
lib.curl_slist_free_all(self._headers)
|
||||
self._headers = ffi.NULL
|
||||
|
||||
if self._proxy_headers != ffi.NULL:
|
||||
lib.curl_slist_free_all(self._proxy_headers)
|
||||
self._proxy_headers = ffi.NULL
|
||||
|
||||
def duphandle(self) -> Curl:
|
||||
"""Wrapper for ``curl_easy_duphandle``.
|
||||
|
||||
This is not a full copy of entire curl object in python. For example, headers
|
||||
handle is not copied, you have to set them again."""
|
||||
new_handle = lib.curl_easy_duphandle(self._curl)
|
||||
c = Curl(cacert=self._cacert, debug=self._debug, handle=new_handle)
|
||||
return c
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reset all curl options, wrapper for ``curl_easy_reset``."""
|
||||
self._is_cert_set = False
|
||||
if self._curl is not None:
|
||||
lib.curl_easy_reset(self._curl)
|
||||
self._set_error_buffer()
|
||||
self._resolve = ffi.NULL
|
||||
|
||||
def parse_cookie_headers(self, headers: list[bytes]) -> SimpleCookie:
|
||||
"""Extract ``cookies.SimpleCookie`` from header lines.
|
||||
|
||||
Parameters:
|
||||
headers: list of headers in bytes.
|
||||
|
||||
Returns:
|
||||
A parsed cookies.SimpleCookie instance.
|
||||
"""
|
||||
cookie: SimpleCookie = SimpleCookie()
|
||||
for header in headers:
|
||||
if header.lower().startswith(b"set-cookie: "):
|
||||
cookie.load(header[12:].decode()) # len("set-cookie: ") == 12
|
||||
return cookie
|
||||
|
||||
@staticmethod
|
||||
def get_reason_phrase(status_line: bytes) -> bytes:
|
||||
"""Extract reason phrase, like ``OK``, ``Not Found`` from response status
|
||||
line."""
|
||||
m = REASON_PHRASE_RE.match(status_line)
|
||||
return m.group(1) if m else b""
|
||||
|
||||
@staticmethod
|
||||
def parse_status_line(status_line: bytes) -> tuple[CurlHttpVersion, int, bytes]:
|
||||
"""Parse status line.
|
||||
|
||||
Returns:
|
||||
http_version, status_code, and reason phrase
|
||||
"""
|
||||
m = STATUS_LINE_RE.match(status_line)
|
||||
if not m:
|
||||
return CurlHttpVersion.V1_0, 0, b""
|
||||
if m.group(1) == "2.0":
|
||||
http_version = CurlHttpVersion.V2_0
|
||||
elif m.group(1) == "1.1":
|
||||
http_version = CurlHttpVersion.V1_1
|
||||
elif m.group(1) == "1.0":
|
||||
http_version = CurlHttpVersion.V1_0
|
||||
else:
|
||||
http_version = CurlHttpVersion.NONE
|
||||
status_code = int(m.group(2))
|
||||
reason = m.group(3)
|
||||
|
||||
return http_version, status_code, reason
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close and cleanup curl handle, wrapper for ``curl_easy_cleanup``."""
|
||||
if self._curl:
|
||||
lib.curl_easy_cleanup(self._curl)
|
||||
self._curl = None
|
||||
ffi.release(self._error_buffer)
|
||||
self._resolve = ffi.NULL
|
||||
|
||||
def ws_recv(self, n: int = 1024) -> tuple[bytes, CurlWsFrame]:
|
||||
"""Receive a frame from a websocket connection.
|
||||
|
||||
Args:
|
||||
n: maximum data to receive.
|
||||
|
||||
Returns:
|
||||
a tuple of frame content and curl frame meta struct.
|
||||
|
||||
Raises:
|
||||
CurlError: if failed.
|
||||
"""
|
||||
buffer = ffi.new("char[]", n)
|
||||
n_recv = ffi.new("size_t *")
|
||||
p_frame = ffi.new("struct curl_ws_frame **")
|
||||
|
||||
ret = lib.curl_ws_recv(self._curl, buffer, n, n_recv, p_frame)
|
||||
self._check_error(ret, "WS_RECV")
|
||||
|
||||
# Frame meta explained: https://curl.se/libcurl/c/curl_ws_meta.html
|
||||
frame = p_frame[0]
|
||||
|
||||
return ffi.buffer(buffer)[: n_recv[0]], frame
|
||||
|
||||
def ws_send(self, payload: bytes, flags: CurlWsFlag = CurlWsFlag.BINARY) -> int:
|
||||
"""Send data to a websocket connection.
|
||||
|
||||
Args:
|
||||
payload: content to send.
|
||||
flags: websocket flag to set for the frame, default: binary.
|
||||
|
||||
Returns:
|
||||
0 if no error.
|
||||
|
||||
Raises:
|
||||
CurlError: if failed.
|
||||
"""
|
||||
n_sent = ffi.new("size_t *")
|
||||
buffer = ffi.from_buffer(payload)
|
||||
ret = lib.curl_ws_send(self._curl, buffer, len(payload), n_sent, 0, flags)
|
||||
self._check_error(ret, "WS_SEND")
|
||||
return n_sent[0]
|
||||
|
||||
def ws_close(self, code: int = 1000, message: bytes = b"") -> int:
|
||||
"""Close a websocket connection. Shorthand for :meth:`ws_send`
|
||||
with close code and message. Note that to completely close the connection,
|
||||
you must close the curl handle after this call with :meth:`close`.
|
||||
|
||||
Args:
|
||||
code: close code.
|
||||
message: close message.
|
||||
|
||||
Returns:
|
||||
0 if no error.
|
||||
|
||||
Raises:
|
||||
CurlError: if failed.
|
||||
"""
|
||||
return self.ws_send(struct.pack("!H", code) + message)
|
||||
|
||||
|
||||
class CurlMime:
|
||||
"""Wrapper for the ``curl_mime_`` API."""
|
||||
|
||||
def __init__(self, curl: Optional[Curl] = None):
|
||||
"""
|
||||
Args:
|
||||
curl: Curl instance to use.
|
||||
"""
|
||||
self._curl = curl if curl else Curl()
|
||||
self._form = lib.curl_mime_init(self._curl._curl)
|
||||
|
||||
def addpart(
|
||||
self,
|
||||
name: str,
|
||||
*,
|
||||
content_type: Optional[str] = None,
|
||||
filename: Optional[str] = None,
|
||||
local_path: Optional[Union[str, bytes, Path]] = None,
|
||||
data: Optional[bytes] = None,
|
||||
) -> None:
|
||||
"""Add a mime part for a mutlipart html form.
|
||||
|
||||
Note: You can only use either local_path or data, not both.
|
||||
|
||||
Args:
|
||||
name: name of the field.
|
||||
content_type: content_type for the field. for example: ``image/png``.
|
||||
filename: filename for the server.
|
||||
local_path: file to upload on local disk.
|
||||
data: file content to upload.
|
||||
"""
|
||||
part = lib.curl_mime_addpart(self._form)
|
||||
|
||||
ret = lib.curl_mime_name(part, name.encode())
|
||||
if ret != 0:
|
||||
raise CurlError("Add field failed.")
|
||||
|
||||
# mime type
|
||||
if content_type is not None:
|
||||
ret = lib.curl_mime_type(part, content_type.encode())
|
||||
if ret != 0:
|
||||
raise CurlError("Add field failed.")
|
||||
|
||||
# remote file name
|
||||
if filename is not None:
|
||||
ret = lib.curl_mime_filename(part, filename.encode())
|
||||
if ret != 0:
|
||||
raise CurlError("Add field failed.")
|
||||
|
||||
if local_path and data:
|
||||
raise CurlError("Can not use local_path and data at the same time.")
|
||||
|
||||
# this is a filename
|
||||
if local_path is not None:
|
||||
if isinstance(local_path, Path):
|
||||
local_path_str = str(local_path)
|
||||
elif isinstance(local_path, bytes):
|
||||
local_path_str = local_path.decode()
|
||||
else:
|
||||
local_path_str = local_path
|
||||
|
||||
if not Path(local_path_str).exists():
|
||||
raise FileNotFoundError(f"File not found at {local_path_str}")
|
||||
ret = lib.curl_mime_filedata(part, local_path_str.encode())
|
||||
if ret != 0:
|
||||
raise CurlError("Add field failed.")
|
||||
|
||||
if data is not None:
|
||||
if not isinstance(data, bytes):
|
||||
data = str(data).encode()
|
||||
ret = lib.curl_mime_data(part, data, len(data))
|
||||
|
||||
@classmethod
|
||||
def from_list(cls, files: list[dict]):
|
||||
"""Create a multipart instance from a list of dict, for keys, see ``addpart``"""
|
||||
form = cls()
|
||||
for file in files:
|
||||
form.addpart(**file)
|
||||
return form
|
||||
|
||||
def attach(self, curl: Optional[Curl] = None) -> None:
|
||||
"""Attach the mime instance to a curl instance."""
|
||||
c = curl if curl else self._curl
|
||||
c.setopt(CurlOpt.MIMEPOST, self._form)
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the mime instance and underlying files. This method must be called
|
||||
after ``perform`` or ``request``."""
|
||||
lib.curl_mime_free(self._form)
|
||||
self._form = ffi.NULL
|
||||
|
||||
def __del__(self) -> None:
|
||||
self.close()
|
||||
@@ -0,0 +1 @@
|
||||
# Marker file for PEP 561.
|
||||
+171
@@ -0,0 +1,171 @@
|
||||
__all__ = [
|
||||
"Session",
|
||||
"AsyncSession",
|
||||
"BrowserType",
|
||||
"BrowserTypeLiteral",
|
||||
"CurlWsFlag",
|
||||
"request",
|
||||
"head",
|
||||
"get",
|
||||
"post",
|
||||
"put",
|
||||
"patch",
|
||||
"delete",
|
||||
"options",
|
||||
"RequestsError",
|
||||
"Cookies",
|
||||
"Headers",
|
||||
"Request",
|
||||
"Response",
|
||||
"AsyncWebSocket",
|
||||
"WebSocket",
|
||||
"WebSocketError",
|
||||
"WebSocketClosed",
|
||||
"WebSocketTimeout",
|
||||
"WsCloseCode",
|
||||
"ExtraFingerprints",
|
||||
"CookieTypes",
|
||||
"HeaderTypes",
|
||||
"ProxySpec",
|
||||
]
|
||||
|
||||
from typing import Optional, TYPE_CHECKING, TypedDict
|
||||
|
||||
from ..const import CurlWsFlag
|
||||
from .cookies import Cookies, CookieTypes
|
||||
from .errors import RequestsError
|
||||
from .headers import Headers, HeaderTypes
|
||||
from .impersonate import BrowserType, BrowserTypeLiteral, ExtraFingerprints
|
||||
from .models import Request, Response
|
||||
from .session import (
|
||||
AsyncSession,
|
||||
HttpMethod,
|
||||
ProxySpec,
|
||||
Session,
|
||||
ThreadType,
|
||||
RequestParams,
|
||||
Unpack,
|
||||
)
|
||||
from .websockets import (
|
||||
AsyncWebSocket,
|
||||
WebSocket,
|
||||
WebSocketClosed,
|
||||
WebSocketError,
|
||||
WebSocketTimeout,
|
||||
WsCloseCode,
|
||||
)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
||||
class SessionRequestParams(RequestParams, total=False):
|
||||
thread: Optional[ThreadType]
|
||||
curl_options: Optional[dict]
|
||||
debug: Optional[bool]
|
||||
else:
|
||||
SessionRequestParams = TypedDict
|
||||
|
||||
|
||||
def request(
|
||||
method: HttpMethod,
|
||||
url: str,
|
||||
thread: Optional[ThreadType] = None,
|
||||
curl_options: Optional[dict] = None,
|
||||
debug: Optional[bool] = None,
|
||||
**kwargs: Unpack[RequestParams],
|
||||
) -> Response:
|
||||
"""Send an http request.
|
||||
|
||||
Parameters:
|
||||
method: http method for the request: GET/POST/PUT/DELETE etc.
|
||||
url: url for the requests.
|
||||
params: query string for the requests.
|
||||
data: form values(dict/list/tuple) or binary data to use in body,
|
||||
``Content-Type: application/x-www-form-urlencoded`` will be added if a dict
|
||||
is given.
|
||||
json: json values to use in body, `Content-Type: application/json` will be added
|
||||
automatically.
|
||||
headers: headers to send.
|
||||
cookies: cookies to use.
|
||||
files: not supported, use ``multipart`` instead.
|
||||
auth: HTTP basic auth, a tuple of (username, password), only basic auth is
|
||||
supported.
|
||||
timeout: how many seconds to wait before giving up.
|
||||
allow_redirects: whether to allow redirection.
|
||||
max_redirects: max redirect counts, default 30, use -1 for unlimited.
|
||||
proxies: dict of proxies to use, prefer to use ``proxy`` if they are the same.
|
||||
format: ``{"http": proxy_url, "https": proxy_url}``.
|
||||
proxy: proxy to use, format: "http://user@pass:proxy_url".
|
||||
Can't be used with `proxies` parameter.
|
||||
proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
|
||||
verify: whether to verify https certs.
|
||||
referer: shortcut for setting referer header.
|
||||
accept_encoding: shortcut for setting accept-encoding header.
|
||||
content_callback: a callback function to receive response body.
|
||||
``def callback(chunk: bytes) -> None:``
|
||||
impersonate: which browser version to impersonate.
|
||||
ja3: ja3 string to impersonate.
|
||||
akamai: akamai string to impersonate.
|
||||
extra_fp: extra fingerprints options, in complement to ja3 and akamai strings.
|
||||
thread: thread engine to use for working with other thread implementations.
|
||||
choices: eventlet, gevent.
|
||||
default_headers: whether to set default browser headers when impersonating.
|
||||
default_encoding: encoding for decoding response content if charset is not found
|
||||
in headers. Defaults to "utf-8". Can be set to a callable for automatic
|
||||
detection.
|
||||
quote: Set characters to be quoted, i.e. percent-encoded. Default safe string
|
||||
is ``!#$%&'()*+,/:;=?@[]~``. If set to a sting, the character will be
|
||||
removed from the safe string, thus quoted. If set to False, the url will be
|
||||
kept as is, without any automatic percent-encoding, you must encode the URL
|
||||
yourself.
|
||||
curl_options: extra curl options to use.
|
||||
http_version: limiting http version, defaults to http2.
|
||||
debug: print extra curl debug info.
|
||||
interface: which interface to use.
|
||||
cert: a tuple of (cert, key) filenames for client cert.
|
||||
stream: streaming the response, default False.
|
||||
max_recv_speed: maximum receive speed, bytes per second.
|
||||
multipart: upload files using the multipart format, see examples for details.
|
||||
discard_cookies: discard cookies from server. Default to False.
|
||||
|
||||
Returns:
|
||||
A ``Response`` object.
|
||||
"""
|
||||
debug = False if debug is None else debug
|
||||
with Session(thread=thread, curl_options=curl_options, debug=debug) as s:
|
||||
return s.request(method=method, url=url, **kwargs)
|
||||
|
||||
|
||||
def head(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="HEAD", url=url, **kwargs)
|
||||
|
||||
|
||||
def get(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="GET", url=url, **kwargs)
|
||||
|
||||
|
||||
def post(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="POST", url=url, **kwargs)
|
||||
|
||||
|
||||
def put(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="PUT", url=url, **kwargs)
|
||||
|
||||
|
||||
def patch(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="PATCH", url=url, **kwargs)
|
||||
|
||||
|
||||
def delete(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="DELETE", url=url, **kwargs)
|
||||
|
||||
|
||||
def options(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="OPTIONS", url=url, **kwargs)
|
||||
|
||||
|
||||
def trace(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="TRACE", url=url, **kwargs)
|
||||
|
||||
|
||||
def query(url: str, **kwargs: Unpack[SessionRequestParams]):
|
||||
return request(method="QUERY", url=url, **kwargs)
|
||||
@@ -0,0 +1,364 @@
|
||||
# Adapted from: https://github.com/encode/httpx/blob/master/httpx/_models.py,
|
||||
# which is licensed under the BSD License.
|
||||
# See https://github.com/encode/httpx/blob/master/LICENSE.md
|
||||
|
||||
__all__ = ["Cookies"]
|
||||
|
||||
import re
|
||||
import time
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from http.cookiejar import Cookie, CookieJar
|
||||
from http.cookies import _unquote
|
||||
from typing import Optional, Union
|
||||
from collections.abc import Iterator, MutableMapping
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from ..utils import CurlCffiWarning
|
||||
from .errors import CookieConflict, RequestsError
|
||||
|
||||
CookieTypes = Union["Cookies", CookieJar, dict[str, str], list[tuple[str, str]]]
|
||||
|
||||
|
||||
@dataclass
|
||||
class CurlMorsel:
|
||||
name: str
|
||||
value: str
|
||||
hostname: str = ""
|
||||
subdomains: bool = False
|
||||
path: str = "/"
|
||||
secure: bool = False
|
||||
expires: int = 0
|
||||
http_only: bool = False
|
||||
|
||||
@staticmethod
|
||||
def parse_bool(s):
|
||||
return s == "TRUE"
|
||||
|
||||
@staticmethod
|
||||
def dump_bool(s):
|
||||
return "TRUE" if s else "FALSE"
|
||||
|
||||
@classmethod
|
||||
def from_curl_format(cls, set_cookie_line: bytes):
|
||||
(
|
||||
hostname,
|
||||
subdomains,
|
||||
path,
|
||||
secure,
|
||||
expires,
|
||||
name,
|
||||
value,
|
||||
) = set_cookie_line.decode().split("\t")
|
||||
if hostname and hostname[0] == "#":
|
||||
http_only = True
|
||||
# e.g. #HttpOnly_postman-echo.com
|
||||
domain = hostname[10:] # len("#HttpOnly_") == 10
|
||||
else:
|
||||
http_only = False
|
||||
domain = hostname
|
||||
return cls(
|
||||
hostname=domain,
|
||||
subdomains=cls.parse_bool(subdomains),
|
||||
path=path,
|
||||
secure=cls.parse_bool(secure),
|
||||
expires=int(expires),
|
||||
name=name,
|
||||
value=_unquote(value),
|
||||
http_only=http_only,
|
||||
)
|
||||
|
||||
def to_curl_format(self):
|
||||
if not self.hostname:
|
||||
raise RequestsError(f"Domain not found for cookie {self.name}={self.value}")
|
||||
return "\t".join(
|
||||
[
|
||||
self.hostname,
|
||||
self.dump_bool(self.subdomains),
|
||||
self.path,
|
||||
self.dump_bool(self.secure),
|
||||
str(self.expires),
|
||||
self.name,
|
||||
self.value,
|
||||
]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_cookiejar_cookie(cls, cookie: Cookie):
|
||||
return cls(
|
||||
name=cookie.name,
|
||||
value=cookie.value or "",
|
||||
hostname=cookie.domain,
|
||||
subdomains=cookie.domain_specified,
|
||||
path=cookie.path,
|
||||
secure=cookie.secure,
|
||||
expires=int(cookie.expires or 0),
|
||||
http_only=False,
|
||||
)
|
||||
|
||||
def to_cookiejar_cookie(self) -> Cookie:
|
||||
# the leading dot actually does not mean anything nowadays
|
||||
# https://stackoverflow.com/a/20884869/1061155
|
||||
# https://github.com/python/cpython/blob/d6555abfa7384b5a40435a11bdd2aa6bbf8f5cfc/Lib/http/cookiejar.py#L1535
|
||||
return Cookie(
|
||||
version=0,
|
||||
name=self.name,
|
||||
value=self.value,
|
||||
port=None,
|
||||
port_specified=False,
|
||||
domain=self.hostname,
|
||||
domain_specified=self.subdomains,
|
||||
domain_initial_dot=bool(self.hostname.startswith(".")),
|
||||
path=self.path,
|
||||
path_specified=bool(self.path),
|
||||
secure=self.secure,
|
||||
# using if explicitly to make it clear.
|
||||
expires=None if self.expires == 0 else self.expires,
|
||||
discard=self.expires == 0,
|
||||
comment=None,
|
||||
comment_url=None,
|
||||
rest=dict(http_only=f"{self.http_only}"),
|
||||
rfc2109=False,
|
||||
)
|
||||
|
||||
|
||||
cut_port_re = re.compile(r":\d+$", re.ASCII)
|
||||
IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
|
||||
|
||||
|
||||
class Cookies(MutableMapping[str, str]):
|
||||
"""
|
||||
HTTP Cookies, as a mutable mapping.
|
||||
"""
|
||||
|
||||
def __init__(self, cookies: Optional[CookieTypes] = None) -> None:
|
||||
if cookies is None or isinstance(cookies, dict):
|
||||
self.jar = CookieJar()
|
||||
if isinstance(cookies, dict):
|
||||
for key, value in cookies.items():
|
||||
self.set(key, value)
|
||||
elif isinstance(cookies, list):
|
||||
self.jar = CookieJar()
|
||||
for key, value in cookies:
|
||||
self.set(key, value)
|
||||
elif isinstance(cookies, Cookies):
|
||||
self.jar = CookieJar()
|
||||
for cookie in cookies.jar:
|
||||
self.jar.set_cookie(cookie)
|
||||
else:
|
||||
self.jar = cookies
|
||||
|
||||
def _eff_request_host(self, request) -> str:
|
||||
"""
|
||||
Almost equivalent to the eff_request_host function in:
|
||||
https://github.com/python/cpython/blob/3.11/Lib/http/cookiejar.py#L636
|
||||
"""
|
||||
host = urlparse(request.url)[1]
|
||||
if host == "":
|
||||
host = request.headers.get("Host", "")
|
||||
|
||||
# remove port, if present
|
||||
host = cut_port_re.sub("", host, 1)
|
||||
host = host.lower()
|
||||
if host.find(".") == -1 and not IPV4_RE.search(host):
|
||||
host += ".local"
|
||||
return host
|
||||
|
||||
def get_cookies_for_curl(self, request) -> list[CurlMorsel]:
|
||||
"""the process is similar to ``cookiejar.add_cookie_header``, but load all
|
||||
cookies"""
|
||||
self.jar._cookies_lock.acquire() # type: ignore
|
||||
morsels = []
|
||||
try:
|
||||
self.jar._policy._now = self._now = int(time.time()) # type: ignore
|
||||
for cookie in self.jar:
|
||||
morsel = CurlMorsel.from_cookiejar_cookie(cookie)
|
||||
if not morsel.hostname:
|
||||
morsel.hostname = self._eff_request_host(request)
|
||||
morsels.append(morsel)
|
||||
finally:
|
||||
self.jar._cookies_lock.release() # type: ignore
|
||||
|
||||
self.jar.clear_expired_cookies()
|
||||
return morsels
|
||||
|
||||
def update_cookies_from_curl(self, morsels: list[CurlMorsel]):
|
||||
for morsel in morsels:
|
||||
cookie = morsel.to_cookiejar_cookie()
|
||||
self.jar.set_cookie(cookie)
|
||||
self.jar.clear_expired_cookies()
|
||||
|
||||
def set(
|
||||
self, name: str, value: str, domain: str = "", path: str = "/", secure=False
|
||||
) -> None:
|
||||
"""
|
||||
Set a cookie value by name. May optionally include domain and path.
|
||||
"""
|
||||
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Set-Cookie
|
||||
if name.startswith("__Secure-") and secure is False:
|
||||
warnings.warn(
|
||||
"`secure` changed to True for `__Secure-` prefixed cookies",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
secure = True
|
||||
elif name.startswith("__Host-") and (secure is False or domain or path != "/"):
|
||||
warnings.warn(
|
||||
"`host` changed to True, `domain` removed, `path` changed to `/` "
|
||||
"for `__Host-` prefixed cookies",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
secure = True
|
||||
domain = ""
|
||||
path = "/"
|
||||
kwargs = {
|
||||
"version": 0,
|
||||
"name": name,
|
||||
"value": value,
|
||||
"port": None,
|
||||
"port_specified": False,
|
||||
"domain": domain,
|
||||
"domain_specified": bool(domain),
|
||||
"domain_initial_dot": domain.startswith("."),
|
||||
"path": path,
|
||||
"path_specified": bool(path),
|
||||
"secure": secure,
|
||||
"expires": None,
|
||||
"discard": True,
|
||||
"comment": None,
|
||||
"comment_url": None,
|
||||
"rest": {"HttpOnly": None},
|
||||
"rfc2109": False,
|
||||
}
|
||||
cookie = Cookie(**kwargs)
|
||||
self.jar.set_cookie(cookie)
|
||||
|
||||
def get( # type: ignore
|
||||
self,
|
||||
name: str,
|
||||
default: Optional[str] = None,
|
||||
domain: Optional[str] = None,
|
||||
path: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Get a cookie by name. May optionally include domain and path
|
||||
in order to specify exactly which cookie to retrieve.
|
||||
"""
|
||||
value = None
|
||||
matched_domain = ""
|
||||
for cookie in self.jar:
|
||||
if (
|
||||
cookie.name == name
|
||||
and (domain is None or cookie.domain == domain)
|
||||
and (path is None or cookie.path == path)
|
||||
):
|
||||
# if cookies on two different domains do not share a same value
|
||||
if (
|
||||
value is not None
|
||||
and not matched_domain.endswith(cookie.domain)
|
||||
and not str(cookie.domain).endswith(matched_domain)
|
||||
and value != cookie.value
|
||||
):
|
||||
message = (
|
||||
f"Multiple cookies exist with name={name} on "
|
||||
f"{matched_domain} and {cookie.domain}, add domain "
|
||||
"parameter to suppress this error."
|
||||
)
|
||||
raise CookieConflict(message)
|
||||
value = cookie.value
|
||||
matched_domain = cookie.domain or ""
|
||||
|
||||
if value is None:
|
||||
return default
|
||||
return value
|
||||
|
||||
def get_dict(
|
||||
self, domain: Optional[str] = None, path: Optional[str] = None
|
||||
) -> dict:
|
||||
"""
|
||||
Cookies with the same name on different domains may overwrite each other,
|
||||
do NOT use this function as a method of serialization.
|
||||
"""
|
||||
ret = {}
|
||||
for cookie in self.jar:
|
||||
if (domain is None or cookie.domain == domain) and (
|
||||
path is None or cookie.path == path
|
||||
):
|
||||
ret[cookie.name] = cookie.value
|
||||
return ret
|
||||
|
||||
def delete(
|
||||
self,
|
||||
name: str,
|
||||
domain: Optional[str] = None,
|
||||
path: Optional[str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Delete a cookie by name. May optionally include domain and path
|
||||
in order to specify exactly which cookie to delete.
|
||||
"""
|
||||
if domain is not None and path is not None:
|
||||
return self.jar.clear(domain, path, name)
|
||||
|
||||
remove = [
|
||||
cookie
|
||||
for cookie in self.jar
|
||||
if cookie.name == name
|
||||
and (domain is None or cookie.domain == domain)
|
||||
and (path is None or cookie.path == path)
|
||||
]
|
||||
|
||||
for cookie in remove:
|
||||
self.jar.clear(cookie.domain, cookie.path, cookie.name)
|
||||
|
||||
def clear(self, domain: Optional[str] = None, path: Optional[str] = None) -> None:
|
||||
"""
|
||||
Delete all cookies. Optionally include a domain and path in
|
||||
order to only delete a subset of all the cookies.
|
||||
"""
|
||||
args = []
|
||||
if domain is not None:
|
||||
args.append(domain)
|
||||
if path is not None:
|
||||
assert domain is not None
|
||||
args.append(path)
|
||||
self.jar.clear(*args)
|
||||
|
||||
def update(self, cookies: Optional[CookieTypes] = None) -> None: # type: ignore
|
||||
cookies = Cookies(cookies)
|
||||
for cookie in cookies.jar:
|
||||
self.jar.set_cookie(cookie)
|
||||
|
||||
def __setitem__(self, name: str, value: str) -> None:
|
||||
return self.set(name, value)
|
||||
|
||||
def __getitem__(self, name: str) -> str:
|
||||
value = self.get(name)
|
||||
if value is None:
|
||||
raise KeyError(name)
|
||||
return value
|
||||
|
||||
def __delitem__(self, name: str) -> None:
|
||||
return self.delete(name)
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self.jar)
|
||||
|
||||
def __iter__(self) -> Iterator[str]:
|
||||
return (cookie.name for cookie in self.jar)
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
for _ in self.jar:
|
||||
return True
|
||||
return False
|
||||
|
||||
def __repr__(self) -> str:
|
||||
cookies_repr = ", ".join(
|
||||
[
|
||||
f"<Cookie {cookie.name}={cookie.value} for {cookie.domain} />"
|
||||
for cookie in self.jar
|
||||
]
|
||||
)
|
||||
|
||||
return f"<Cookies[{cookies_repr}]>"
|
||||
@@ -0,0 +1,7 @@
|
||||
# for compatibility with 0.5.x
|
||||
|
||||
__all__ = ["CurlError", "RequestsError", "CookieConflict", "SessionClosed"]
|
||||
|
||||
from ..curl import CurlError
|
||||
from .exceptions import CookieConflict, SessionClosed
|
||||
from .exceptions import RequestException as RequestsError
|
||||
+227
@@ -0,0 +1,227 @@
|
||||
# Apache 2.0 License
|
||||
# Vendored from https://github.com/psf/requests/blob/main/src/requests/exceptions.py
|
||||
# With our own addtions
|
||||
|
||||
import json
|
||||
from typing import Literal, Union
|
||||
|
||||
from ..const import CurlECode
|
||||
from ..curl import CurlError
|
||||
|
||||
|
||||
# Note IOError is an alias of OSError in Python 3.x
|
||||
class RequestException(CurlError, OSError):
|
||||
"""Base exception for curl_cffi.requests package"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
msg,
|
||||
code: Union[CurlECode, Literal[0]] = 0,
|
||||
response=None,
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(msg, code, *args, **kwargs)
|
||||
self.response = response
|
||||
|
||||
|
||||
class CookieConflict(RequestException):
|
||||
"""Same cookie exists for different domains."""
|
||||
|
||||
|
||||
class SessionClosed(RequestException):
|
||||
"""The session has already been closed."""
|
||||
|
||||
|
||||
class ImpersonateError(RequestException):
|
||||
"""The impersonate config was wrong or impersonate failed."""
|
||||
|
||||
|
||||
# not used
|
||||
class InvalidJSONError(RequestException):
|
||||
"""A JSON error occurred. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class JSONDecodeError(InvalidJSONError, json.JSONDecodeError):
|
||||
"""Couldn't decode the text into json. not used"""
|
||||
|
||||
|
||||
class HTTPError(RequestException):
|
||||
"""An HTTP error occurred."""
|
||||
|
||||
|
||||
class IncompleteRead(HTTPError):
|
||||
"""Incomplete read of content"""
|
||||
|
||||
|
||||
class ConnectionError(RequestException):
|
||||
"""A Connection error occurred."""
|
||||
|
||||
|
||||
class DNSError(ConnectionError):
|
||||
"""Could not resolve"""
|
||||
|
||||
|
||||
class ProxyError(RequestException):
|
||||
"""A proxy error occurred."""
|
||||
|
||||
|
||||
class SSLError(ConnectionError):
|
||||
"""An SSL error occurred."""
|
||||
|
||||
|
||||
class CertificateVerifyError(SSLError):
|
||||
"""Raised when certificate validated has failed"""
|
||||
|
||||
|
||||
class Timeout(RequestException):
|
||||
"""The request timed out."""
|
||||
|
||||
|
||||
# not used
|
||||
class ConnectTimeout(ConnectionError, Timeout):
|
||||
"""The request timed out while trying to connect to the remote server.
|
||||
|
||||
Requests that produced this error are safe to retry.
|
||||
|
||||
not used
|
||||
"""
|
||||
|
||||
|
||||
# not used
|
||||
class ReadTimeout(Timeout):
|
||||
"""The server did not send any data in the allotted amount of time. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class URLRequired(RequestException):
|
||||
"""A valid URL is required to make a request. not used"""
|
||||
|
||||
|
||||
class TooManyRedirects(RequestException):
|
||||
"""Too many redirects."""
|
||||
|
||||
|
||||
# not used
|
||||
class MissingSchema(RequestException, ValueError):
|
||||
"""The URL scheme (e.g. http or https) is missing. not used"""
|
||||
|
||||
|
||||
class InvalidSchema(RequestException, ValueError):
|
||||
"""The URL scheme provided is either invalid or unsupported. not used"""
|
||||
|
||||
|
||||
class InvalidURL(RequestException, ValueError):
|
||||
"""The URL provided was somehow invalid."""
|
||||
|
||||
|
||||
# not used
|
||||
class InvalidHeader(RequestException, ValueError):
|
||||
"""The header value provided was somehow invalid. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class InvalidProxyURL(InvalidURL):
|
||||
"""The proxy URL provided is invalid. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class ChunkedEncodingError(RequestException):
|
||||
"""The server declared chunked encoding but sent an invalid chunk. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class ContentDecodingError(RequestException):
|
||||
"""Failed to decode response content. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class StreamConsumedError(RequestException, TypeError):
|
||||
"""The content for this response was already consumed. not used"""
|
||||
|
||||
|
||||
# does not support
|
||||
class RetryError(RequestException):
|
||||
"""Custom retries logic failed. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class UnrewindableBodyError(RequestException):
|
||||
"""Requests encountered an error when trying to rewind a body. not used"""
|
||||
|
||||
|
||||
class InterfaceError(RequestException):
|
||||
"""A specified outgoing interface could not be used."""
|
||||
|
||||
|
||||
# Warnings
|
||||
|
||||
|
||||
# TODO: use this warning as a base
|
||||
class RequestsWarning(Warning):
|
||||
"""Base warning for Requests. not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class FileModeWarning(RequestsWarning, DeprecationWarning):
|
||||
"""A file was opened in text mode, but Requests determined its binary length.
|
||||
not used"""
|
||||
|
||||
|
||||
# not used
|
||||
class RequestsDependencyWarning(RequestsWarning):
|
||||
"""An imported dependency doesn't match the expected version range."""
|
||||
|
||||
|
||||
CODE2ERROR = {
|
||||
0: RequestException,
|
||||
CurlECode.UNSUPPORTED_PROTOCOL: InvalidSchema,
|
||||
CurlECode.URL_MALFORMAT: InvalidURL,
|
||||
CurlECode.COULDNT_RESOLVE_PROXY: ProxyError,
|
||||
CurlECode.COULDNT_RESOLVE_HOST: DNSError,
|
||||
CurlECode.COULDNT_CONNECT: ConnectionError,
|
||||
CurlECode.WEIRD_SERVER_REPLY: ConnectionError,
|
||||
CurlECode.REMOTE_ACCESS_DENIED: ConnectionError,
|
||||
CurlECode.HTTP2: HTTPError,
|
||||
CurlECode.HTTP_RETURNED_ERROR: HTTPError,
|
||||
CurlECode.WRITE_ERROR: RequestException,
|
||||
CurlECode.READ_ERROR: RequestException,
|
||||
CurlECode.OUT_OF_MEMORY: RequestException,
|
||||
CurlECode.OPERATION_TIMEDOUT: Timeout,
|
||||
CurlECode.SSL_CONNECT_ERROR: SSLError,
|
||||
CurlECode.INTERFACE_FAILED: InterfaceError,
|
||||
CurlECode.TOO_MANY_REDIRECTS: TooManyRedirects,
|
||||
CurlECode.UNKNOWN_OPTION: RequestException,
|
||||
CurlECode.SETOPT_OPTION_SYNTAX: RequestException,
|
||||
CurlECode.GOT_NOTHING: ConnectionError,
|
||||
CurlECode.SSL_ENGINE_NOTFOUND: SSLError,
|
||||
CurlECode.SSL_ENGINE_SETFAILED: SSLError,
|
||||
CurlECode.SEND_ERROR: ConnectionError,
|
||||
CurlECode.RECV_ERROR: ConnectionError,
|
||||
CurlECode.SSL_CERTPROBLEM: SSLError,
|
||||
CurlECode.SSL_CIPHER: SSLError,
|
||||
CurlECode.PEER_FAILED_VERIFICATION: CertificateVerifyError,
|
||||
CurlECode.BAD_CONTENT_ENCODING: HTTPError,
|
||||
CurlECode.SSL_ENGINE_INITFAILED: SSLError,
|
||||
CurlECode.SSL_CACERT_BADFILE: SSLError,
|
||||
CurlECode.SSL_CRL_BADFILE: SSLError,
|
||||
CurlECode.SSL_ISSUER_ERROR: SSLError,
|
||||
CurlECode.SSL_PINNEDPUBKEYNOTMATCH: SSLError,
|
||||
CurlECode.SSL_INVALIDCERTSTATUS: SSLError,
|
||||
CurlECode.HTTP2_STREAM: HTTPError,
|
||||
CurlECode.HTTP3: HTTPError,
|
||||
CurlECode.QUIC_CONNECT_ERROR: ConnectionError,
|
||||
CurlECode.PROXY: ProxyError,
|
||||
CurlECode.SSL_CLIENTCERT: SSLError,
|
||||
CurlECode.ECH_REQUIRED: SSLError,
|
||||
CurlECode.PARTIAL_FILE: IncompleteRead,
|
||||
}
|
||||
|
||||
|
||||
# credits: https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/networking/_curlcffi.py#L241
|
||||
# Unlicense
|
||||
def code2error(code: Union[CurlECode, Literal[0]], msg: str):
|
||||
if code == CurlECode.RECV_ERROR and "CONNECT" in msg:
|
||||
return ProxyError
|
||||
return CODE2ERROR.get(code, RequestException)
|
||||
@@ -0,0 +1,347 @@
|
||||
# Copied from: https://github.com/encode/httpx/blob/master/httpx/_models.py,
|
||||
# which is licensed under the BSD License.
|
||||
# See https://github.com/encode/httpx/blob/master/LICENSE.md
|
||||
|
||||
|
||||
from collections.abc import (
|
||||
ItemsView,
|
||||
Iterable,
|
||||
Iterator,
|
||||
KeysView,
|
||||
Mapping,
|
||||
MutableMapping,
|
||||
Sequence,
|
||||
ValuesView,
|
||||
)
|
||||
from typing import Any, AnyStr, Optional, Union, cast
|
||||
|
||||
HeaderTypes = Union[
|
||||
"Headers",
|
||||
Mapping[str, Optional[str]],
|
||||
Mapping[bytes, Optional[bytes]],
|
||||
Sequence[tuple[str, str]],
|
||||
Sequence[tuple[bytes, bytes]],
|
||||
Sequence[Union[str, bytes]],
|
||||
]
|
||||
|
||||
|
||||
def to_str(value: Union[str, bytes], encoding: str = "utf-8") -> str:
|
||||
return value if isinstance(value, str) else value.decode(encoding)
|
||||
|
||||
|
||||
SENSITIVE_HEADERS = {"authorization", "proxy-authorization"}
|
||||
|
||||
|
||||
def obfuscate_sensitive_headers(
|
||||
items: Iterable[tuple[AnyStr, Optional[AnyStr]]],
|
||||
) -> Iterator[tuple[AnyStr, Optional[AnyStr]]]:
|
||||
for k, v in items:
|
||||
if to_str(k.lower()) in SENSITIVE_HEADERS:
|
||||
v = b"[secure]" if isinstance(v, bytes) else "[secure]" # type: ignore
|
||||
yield k, v
|
||||
|
||||
|
||||
def normalize_header_key(
|
||||
value: Union[str, bytes],
|
||||
lower: bool,
|
||||
encoding: Optional[str] = None,
|
||||
) -> bytes:
|
||||
"""
|
||||
Coerce str/bytes into a strictly byte-wise HTTP header key.
|
||||
"""
|
||||
bytes_value = (
|
||||
value if isinstance(value, bytes) else value.encode(encoding or "ascii")
|
||||
)
|
||||
|
||||
return bytes_value.lower() if lower else bytes_value
|
||||
|
||||
|
||||
def normalize_header_value(
|
||||
value: Union[str, bytes, int, None], encoding: Optional[str] = None
|
||||
) -> Union[bytes, None]:
|
||||
"""
|
||||
Coerce str/bytes into a strictly byte-wise HTTP header value.
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
if isinstance(value, bytes):
|
||||
return value
|
||||
|
||||
# The default encoding for header value should be latin-1
|
||||
# See: RFC and https://github.com/python/cpython/blob/bc264eac3ad14dab748e33b3d714c2674872791f/Lib/http/client.py#L1309
|
||||
if isinstance(value, int):
|
||||
return str(value).encode()
|
||||
|
||||
return cast(str, value).encode(encoding or "latin-1")
|
||||
|
||||
|
||||
class Headers(MutableMapping[str, Optional[str]]):
|
||||
"""
|
||||
HTTP headers, as a case-insensitive multi-dict.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, headers: Optional[HeaderTypes] = None, encoding: Optional[str] = None
|
||||
):
|
||||
self._list: list[tuple[bytes, bytes, Optional[bytes]]]
|
||||
|
||||
if isinstance(headers, Headers):
|
||||
self._list = list(headers._list)
|
||||
encoding = encoding or headers.encoding
|
||||
elif not headers:
|
||||
self._list = []
|
||||
elif isinstance(headers, Mapping):
|
||||
self._list = [
|
||||
(
|
||||
normalize_header_key(k, lower=False, encoding=encoding),
|
||||
normalize_header_key(k, lower=True, encoding=encoding),
|
||||
normalize_header_value(v, encoding),
|
||||
)
|
||||
for k, v in headers.items()
|
||||
]
|
||||
elif isinstance(headers, list):
|
||||
# list of "Name: Value" pairs
|
||||
if isinstance(headers[0], (str, bytes)):
|
||||
sep = ":" if isinstance(headers[0], str) else b":"
|
||||
h = []
|
||||
for line in headers:
|
||||
k, v = line.split(sep, maxsplit=1) # pyright: ignore
|
||||
h.append((k, v.strip()))
|
||||
# list of (Name, Value) pairs
|
||||
elif isinstance(headers[0], tuple):
|
||||
h = headers
|
||||
self._list = [
|
||||
(
|
||||
normalize_header_key(k, lower=False, encoding=encoding),
|
||||
normalize_header_key(k, lower=True, encoding=encoding),
|
||||
normalize_header_value(v, encoding),
|
||||
)
|
||||
for k, v in h # pyright: ignore
|
||||
]
|
||||
|
||||
self._encoding = encoding
|
||||
|
||||
@property
|
||||
def encoding(self) -> str:
|
||||
"""
|
||||
Header encoding is mandated as ascii, but we allow fallbacks to utf-8
|
||||
or iso-8859-1.
|
||||
"""
|
||||
if self._encoding is None:
|
||||
for encoding in ["ascii", "utf-8"]:
|
||||
for key, value in self.raw:
|
||||
try:
|
||||
key.decode(encoding)
|
||||
value.decode(encoding) if value is not None else value
|
||||
except UnicodeDecodeError:
|
||||
break
|
||||
else:
|
||||
# The else block runs if 'break' did not occur, meaning
|
||||
# all values fitted the encoding.
|
||||
self._encoding = encoding
|
||||
break
|
||||
else:
|
||||
# The ISO-8859-1 encoding covers all 256 code points in a byte,
|
||||
# so will never raise decode errors.
|
||||
self._encoding = "iso-8859-1"
|
||||
return self._encoding
|
||||
|
||||
@encoding.setter
|
||||
def encoding(self, value: str) -> None:
|
||||
self._encoding = value
|
||||
|
||||
@property
|
||||
def raw(self) -> list[tuple[bytes, Optional[bytes]]]:
|
||||
"""
|
||||
Returns a list of the raw header items, as byte pairs.
|
||||
"""
|
||||
return [(raw_key, value) for raw_key, _, value in self._list]
|
||||
|
||||
def keys(self) -> KeysView[str]:
|
||||
return {key.decode(self.encoding): None for _, key, _ in self._list}.keys()
|
||||
|
||||
def values(self) -> ValuesView[Optional[str]]:
|
||||
values_dict: dict[str, str] = {}
|
||||
for _, key, value in self._list:
|
||||
str_key = key.decode(self.encoding)
|
||||
str_value = value.decode(self.encoding) if value is not None else "None"
|
||||
if str_key in values_dict:
|
||||
values_dict[str_key] += f", {str_value}"
|
||||
else:
|
||||
values_dict[str_key] = str_value
|
||||
return values_dict.values()
|
||||
|
||||
def items(self) -> ItemsView[str, Optional[str]]:
|
||||
"""
|
||||
Return `(key, value)` items of headers. Concatenate headers
|
||||
into a single comma separated value when a key occurs multiple times.
|
||||
"""
|
||||
values_dict: dict[str, str] = {}
|
||||
for _, key, value in self._list:
|
||||
str_key = key.decode(self.encoding)
|
||||
str_value = value.decode(self.encoding) if value is not None else "None"
|
||||
if str_key in values_dict:
|
||||
values_dict[str_key] += f", {str_value}"
|
||||
else:
|
||||
values_dict[str_key] = str_value
|
||||
return values_dict.items()
|
||||
|
||||
def multi_items(self) -> list[tuple[str, Optional[str]]]:
|
||||
"""
|
||||
Return a list of `(key, value)` pairs of headers. Allow multiple
|
||||
occurrences of the same key without concatenating into a single
|
||||
comma separated value.
|
||||
"""
|
||||
return [
|
||||
(
|
||||
key.decode(self.encoding),
|
||||
value.decode(self.encoding) if value is not None else value,
|
||||
)
|
||||
for key, _, value in self._list
|
||||
]
|
||||
|
||||
def get(self, key: str, default: Any = None) -> Any:
|
||||
"""
|
||||
Return a header value. If multiple occurrences of the header occur
|
||||
then concatenate them together with commas.
|
||||
"""
|
||||
try:
|
||||
return self[key]
|
||||
except KeyError:
|
||||
return default
|
||||
|
||||
def get_list(self, key: str, split_commas: bool = False) -> list[Optional[str]]:
|
||||
"""
|
||||
Return a list of all header values for a given key.
|
||||
If `split_commas=True` is passed, then any comma separated header
|
||||
values are split into multiple return strings.
|
||||
"""
|
||||
get_header_key = key.lower().encode(self.encoding)
|
||||
|
||||
values = [
|
||||
item_value.decode(self.encoding) if item_value is not None else item_value
|
||||
for _, item_key, item_value in self._list
|
||||
if item_key.lower() == get_header_key
|
||||
]
|
||||
|
||||
if not split_commas:
|
||||
return values
|
||||
|
||||
split_values = []
|
||||
for value in values:
|
||||
split_values.extend([item.strip() for item in value.split(",")]) # type: ignore
|
||||
return split_values
|
||||
|
||||
def update(self, headers: Optional[HeaderTypes] = None) -> None: # type: ignore
|
||||
headers = Headers(headers)
|
||||
for key in headers:
|
||||
if key in self:
|
||||
self.pop(key)
|
||||
self._list.extend(headers._list)
|
||||
|
||||
def copy(self) -> "Headers":
|
||||
return Headers(self, encoding=self.encoding)
|
||||
|
||||
def __getitem__(self, key: str) -> Optional[str]:
|
||||
"""
|
||||
Return a single header value.
|
||||
If there are multiple headers with the same key, then we concatenate
|
||||
them with commas. See: https://tools.ietf.org/html/rfc7230#section-3.2.2
|
||||
"""
|
||||
normalized_key = key.lower().encode(self.encoding)
|
||||
|
||||
items = [
|
||||
header_value.decode(self.encoding)
|
||||
if header_value is not None
|
||||
else header_value
|
||||
for _, header_key, header_value in self._list
|
||||
if header_key == normalized_key
|
||||
]
|
||||
|
||||
if items == [None]:
|
||||
return None
|
||||
|
||||
if items:
|
||||
return ", ".join([str(item) for item in items])
|
||||
|
||||
raise KeyError(key)
|
||||
|
||||
def __setitem__(self, key: str, value: Optional[str]) -> None:
|
||||
"""
|
||||
Set the header `key` to `value`, removing any duplicate entries.
|
||||
Retains insertion order.
|
||||
"""
|
||||
set_key = key.encode(self._encoding or "utf-8")
|
||||
set_value = (
|
||||
value.encode(self._encoding or "utf-8") if value is not None else value
|
||||
)
|
||||
lookup_key = set_key.lower()
|
||||
|
||||
found_indexes = [
|
||||
idx
|
||||
for idx, (_, item_key, _) in enumerate(self._list)
|
||||
if item_key == lookup_key
|
||||
]
|
||||
|
||||
for idx in reversed(found_indexes[1:]):
|
||||
del self._list[idx]
|
||||
|
||||
if found_indexes:
|
||||
idx = found_indexes[0]
|
||||
self._list[idx] = (set_key, lookup_key, set_value)
|
||||
else:
|
||||
self._list.append((set_key, lookup_key, set_value))
|
||||
|
||||
def __delitem__(self, key: str) -> None:
|
||||
"""
|
||||
Remove the header `key`.
|
||||
"""
|
||||
del_key = key.lower().encode(self.encoding)
|
||||
|
||||
pop_indexes = [
|
||||
idx
|
||||
for idx, (_, item_key, _) in enumerate(self._list)
|
||||
if item_key.lower() == del_key
|
||||
]
|
||||
|
||||
if not pop_indexes:
|
||||
raise KeyError(key)
|
||||
|
||||
for idx in reversed(pop_indexes):
|
||||
del self._list[idx]
|
||||
|
||||
def __contains__(self, key: Any) -> bool:
|
||||
header_key = key.lower().encode(self.encoding)
|
||||
return header_key in [key for _, key, _ in self._list]
|
||||
|
||||
def __iter__(self) -> Iterator[Any]:
|
||||
return iter(self.keys())
|
||||
|
||||
def __len__(self) -> int:
|
||||
return len(self._list)
|
||||
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
try:
|
||||
other_headers = Headers(other)
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
self_list = [(key, value) for _, key, value in self._list]
|
||||
other_list = [(key, value) for _, key, value in other_headers._list]
|
||||
return sorted(self_list) == sorted(other_list)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
class_name = self.__class__.__name__
|
||||
|
||||
encoding_str = ""
|
||||
if self.encoding != "ascii":
|
||||
encoding_str = f", encoding={self.encoding!r}"
|
||||
|
||||
as_list = list(obfuscate_sensitive_headers(self.multi_items()))
|
||||
as_dict = dict(as_list)
|
||||
|
||||
no_duplicate_keys = len(as_dict) == len(as_list)
|
||||
if no_duplicate_keys:
|
||||
return f"{class_name}({as_dict!r}{encoding_str})"
|
||||
return f"{class_name}({as_list!r}{encoding_str})"
|
||||
+435
@@ -0,0 +1,435 @@
|
||||
import warnings
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Literal, Optional, TypedDict
|
||||
|
||||
from ..const import CurlOpt, CurlSslVersion
|
||||
from ..utils import CurlCffiWarning
|
||||
|
||||
BrowserTypeLiteral = Literal[
|
||||
# Edge
|
||||
"edge99",
|
||||
"edge101",
|
||||
# Chrome
|
||||
"chrome99",
|
||||
"chrome100",
|
||||
"chrome101",
|
||||
"chrome104",
|
||||
"chrome107",
|
||||
"chrome110",
|
||||
"chrome116",
|
||||
"chrome119",
|
||||
"chrome120",
|
||||
"chrome123",
|
||||
"chrome124",
|
||||
"chrome131",
|
||||
"chrome133a",
|
||||
"chrome136",
|
||||
"chrome99_android",
|
||||
"chrome131_android",
|
||||
# Safari
|
||||
"safari153",
|
||||
"safari155",
|
||||
"safari170",
|
||||
"safari172_ios",
|
||||
"safari180",
|
||||
"safari180_ios",
|
||||
"safari184",
|
||||
"safari184_ios",
|
||||
"safari260",
|
||||
"safari260_ios",
|
||||
# Firefox
|
||||
"firefox133",
|
||||
"firefox135",
|
||||
"tor145",
|
||||
# alias
|
||||
"chrome",
|
||||
"edge",
|
||||
"safari",
|
||||
"safari_ios",
|
||||
"safari_beta",
|
||||
"safari_ios_beta",
|
||||
"chrome_android",
|
||||
"firefox",
|
||||
# deprecated aliases
|
||||
"safari15_3",
|
||||
"safari15_5",
|
||||
"safari17_0",
|
||||
"safari17_2_ios",
|
||||
"safari18_0",
|
||||
"safari18_0_ios",
|
||||
"safari18_4",
|
||||
"safari18_4_ios",
|
||||
# Canonical names
|
||||
# "edge_99",
|
||||
# "edge_101",
|
||||
# "safari_15.3_macos",
|
||||
# "safari_15.5_macos",
|
||||
# "safari_17.2_ios",
|
||||
# "safari_17.0_macos",
|
||||
# "safari_18.0_ios",
|
||||
# "safari_18.0_macos",
|
||||
]
|
||||
|
||||
|
||||
DEFAULT_CHROME = "chrome136"
|
||||
DEFAULT_EDGE = "edge101"
|
||||
DEFAULT_SAFARI = "safari184"
|
||||
DEFAULT_SAFARI_IOS = "safari184_ios"
|
||||
DEFAULT_SAFARI_BETA = "safari260"
|
||||
DEFAULT_SAFARI_IOS_BETA = "safari260_ios"
|
||||
DEFAULT_CHROME_ANDROID = "chrome131_android"
|
||||
DEFAULT_FIREFOX = "firefox135"
|
||||
DEFAULT_TOR = "tor145"
|
||||
|
||||
|
||||
REAL_TARGET_MAP = {
|
||||
"chrome": "chrome136",
|
||||
"edge": "edge101",
|
||||
"safari": "safari184",
|
||||
"safari_ios": "safari184_ios",
|
||||
"safari_beta": "safari260",
|
||||
"safari_ios_beta": "safari260_ios",
|
||||
"chrome_android": "chrome131_android",
|
||||
"firefox": "firefox135",
|
||||
"tor": "tor145",
|
||||
}
|
||||
|
||||
|
||||
def normalize_browser_type(item):
|
||||
if item == "chrome": # noqa: SIM116
|
||||
return DEFAULT_CHROME
|
||||
elif item == "edge":
|
||||
return DEFAULT_EDGE
|
||||
elif item == "safari":
|
||||
return DEFAULT_SAFARI
|
||||
elif item == "safari_ios":
|
||||
return DEFAULT_SAFARI_IOS
|
||||
elif item == "safari_beta":
|
||||
return DEFAULT_SAFARI_BETA
|
||||
elif item == "safari_ios_beta":
|
||||
return DEFAULT_SAFARI_IOS_BETA
|
||||
elif item == "chrome_android":
|
||||
return DEFAULT_CHROME_ANDROID
|
||||
elif item == "firefox":
|
||||
return DEFAULT_FIREFOX
|
||||
elif item == "tor":
|
||||
return DEFAULT_TOR
|
||||
else:
|
||||
return item
|
||||
|
||||
|
||||
class BrowserType(str, Enum): # TODO: remove in version 1.x
|
||||
edge99 = "edge99"
|
||||
edge101 = "edge101"
|
||||
chrome99 = "chrome99"
|
||||
chrome100 = "chrome100"
|
||||
chrome101 = "chrome101"
|
||||
chrome104 = "chrome104"
|
||||
chrome107 = "chrome107"
|
||||
chrome110 = "chrome110"
|
||||
chrome116 = "chrome116"
|
||||
chrome119 = "chrome119"
|
||||
chrome120 = "chrome120"
|
||||
chrome123 = "chrome123"
|
||||
chrome124 = "chrome124"
|
||||
chrome131 = "chrome131"
|
||||
chrome133a = "chrome133a"
|
||||
chrome136 = "chrome136"
|
||||
chrome99_android = "chrome99_android"
|
||||
chrome131_android = "chrome131_android"
|
||||
safari153 = "safari153"
|
||||
safari155 = "safari155"
|
||||
safari170 = "safari170"
|
||||
safari172_ios = "safari172_ios"
|
||||
safari180 = "safari180"
|
||||
safari180_ios = "safari180_ios"
|
||||
safari184 = "safari184"
|
||||
safari184_ios = "safari184_ios"
|
||||
safari260 = "safari260"
|
||||
safari260_ios = "safari260_ios"
|
||||
firefox133 = "firefox133"
|
||||
firefox135 = "firefox135"
|
||||
tor145 = "tor145"
|
||||
|
||||
# deprecated aliases
|
||||
safari15_3 = "safari15_3"
|
||||
safari15_5 = "safari15_5"
|
||||
safari17_0 = "safari17_0"
|
||||
safari17_2_ios = "safari17_2_ios"
|
||||
safari18_0 = "safari18_0"
|
||||
safari18_0_ios = "safari18_0_ios"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtraFingerprints:
|
||||
tls_min_version: int = CurlSslVersion.TLSv1_2
|
||||
tls_grease: bool = False
|
||||
tls_permute_extensions: bool = False
|
||||
tls_cert_compression: Literal["zlib", "brotli"] = "brotli"
|
||||
tls_signature_algorithms: Optional[list[str]] = None
|
||||
tls_delegated_credential: str = ""
|
||||
tls_record_size_limit: int = 0
|
||||
http2_stream_weight: int = 256
|
||||
http2_stream_exclusive: int = 1
|
||||
http2_no_priority: bool = False
|
||||
|
||||
|
||||
class ExtraFpDict(TypedDict, total=False):
|
||||
tls_min_version: int
|
||||
tls_grease: bool
|
||||
tls_permute_extensions: bool
|
||||
tls_cert_compression: Literal["zlib", "brotli"]
|
||||
tls_signature_algorithms: Optional[list[str]]
|
||||
tls_delegated_credential: str
|
||||
tls_record_size_limit: int
|
||||
http2_stream_weight: int
|
||||
http2_stream_exclusive: int
|
||||
http2_no_priority: bool
|
||||
|
||||
|
||||
# TLS version are in the format of 0xAABB, where AA is major version and BB is minor
|
||||
# version. As of today, the major version is always 03.
|
||||
TLS_VERSION_MAP = {
|
||||
0x0301: CurlSslVersion.TLSv1_0, # 769
|
||||
0x0302: CurlSslVersion.TLSv1_1, # 770
|
||||
0x0303: CurlSslVersion.TLSv1_2, # 771
|
||||
0x0304: CurlSslVersion.TLSv1_3, # 772
|
||||
}
|
||||
|
||||
# A list of the possible cipher suite ids. Taken from
|
||||
# http://www.iana.org/assignments/tls-parameters/tls-parameters.xml
|
||||
# via BoringSSL
|
||||
TLS_CIPHER_NAME_MAP = {
|
||||
0x000A: "TLS_RSA_WITH_3DES_EDE_CBC_SHA",
|
||||
0x002F: "TLS_RSA_WITH_AES_128_CBC_SHA",
|
||||
0x0033: "TLS_DHE_RSA_WITH_AES_128_CBC_SHA",
|
||||
0x0035: "TLS_RSA_WITH_AES_256_CBC_SHA",
|
||||
0x0039: "TLS_DHE_RSA_WITH_AES_256_CBC_SHA",
|
||||
0x003C: "TLS_RSA_WITH_AES_128_CBC_SHA256",
|
||||
0x003D: "TLS_RSA_WITH_AES_256_CBC_SHA256",
|
||||
0x0067: "TLS_DHE_RSA_WITH_AES_128_CBC_SHA256",
|
||||
0x006B: "TLS_DHE_RSA_WITH_AES_256_CBC_SHA256",
|
||||
0x008C: "TLS_PSK_WITH_AES_128_CBC_SHA",
|
||||
0x008D: "TLS_PSK_WITH_AES_256_CBC_SHA",
|
||||
0x009C: "TLS_RSA_WITH_AES_128_GCM_SHA256",
|
||||
0x009D: "TLS_RSA_WITH_AES_256_GCM_SHA384",
|
||||
0x009E: "TLS_DHE_RSA_WITH_AES_128_GCM_SHA256",
|
||||
0x009F: "TLS_DHE_RSA_WITH_AES_256_GCM_SHA384",
|
||||
0x1301: "TLS_AES_128_GCM_SHA256",
|
||||
0x1302: "TLS_AES_256_GCM_SHA384",
|
||||
0x1303: "TLS_CHACHA20_POLY1305_SHA256",
|
||||
0xC008: "TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA",
|
||||
0xC009: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA",
|
||||
0xC00A: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA",
|
||||
0xC012: "TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA",
|
||||
0xC013: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA",
|
||||
0xC014: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA",
|
||||
0xC023: "TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256",
|
||||
0xC024: "TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384",
|
||||
0xC027: "TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256",
|
||||
0xC028: "TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384",
|
||||
0xC02B: "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256",
|
||||
0xC02C: "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384",
|
||||
0xC02F: "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256",
|
||||
0xC030: "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384",
|
||||
0xC035: "TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA",
|
||||
0xC036: "TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA",
|
||||
0xCCA8: "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256",
|
||||
0xCCA9: "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256",
|
||||
0xCCAC: "TLS_ECDHE_PSK_WITH_CHACHA20_POLY1305_SHA256",
|
||||
}
|
||||
|
||||
|
||||
# RFC tls extensions: https://datatracker.ietf.org/doc/html/rfc6066
|
||||
# IANA list: https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml
|
||||
TLS_EXTENSION_NAME_MAP = {
|
||||
0: "server_name",
|
||||
1: "max_fragment_length",
|
||||
2: "client_certificate_url",
|
||||
3: "trusted_ca_keys",
|
||||
4: "truncated_hmac",
|
||||
5: "status_request",
|
||||
6: "user_mapping",
|
||||
7: "client_authz",
|
||||
8: "server_authz",
|
||||
9: "cert_type",
|
||||
10: "supported_groups", # (renamed from "elliptic_curves")
|
||||
11: "ec_point_formats",
|
||||
12: "srp",
|
||||
13: "signature_algorithms",
|
||||
14: "use_srtp",
|
||||
15: "heartbeat",
|
||||
16: "application_layer_protocol_negotiation",
|
||||
17: "status_request_v2",
|
||||
18: "signed_certificate_timestamp",
|
||||
19: "client_certificate_type",
|
||||
20: "server_certificate_type",
|
||||
21: "padding",
|
||||
22: "encrypt_then_mac",
|
||||
23: "extended_master_secret",
|
||||
24: "token_binding",
|
||||
25: "cached_info",
|
||||
26: "tls_lts",
|
||||
27: "compress_certificate",
|
||||
28: "record_size_limit",
|
||||
29: "pwd_protect",
|
||||
30: "pwd_clear",
|
||||
31: "password_salt",
|
||||
32: "ticket_pinning",
|
||||
33: "tls_cert_with_extern_psk",
|
||||
34: "delegated_credential",
|
||||
35: "session_ticket", # (renamed from "SessionTicket TLS")
|
||||
36: "TLMSP",
|
||||
37: "TLMSP_proxying",
|
||||
38: "TLMSP_delegate",
|
||||
39: "supported_ekt_ciphers",
|
||||
# 40:"Reserved",
|
||||
41: "pre_shared_key",
|
||||
42: "early_data",
|
||||
43: "supported_versions",
|
||||
44: "cookie",
|
||||
45: "psk_key_exchange_modes",
|
||||
# 46:"Reserved",
|
||||
47: "certificate_authorities",
|
||||
48: "oid_filters",
|
||||
49: "post_handshake_auth",
|
||||
50: "signature_algorithms_cert",
|
||||
51: "key_share",
|
||||
52: "transparency_info",
|
||||
# 53:"connection_id", # (deprecated)
|
||||
54: "connection_id",
|
||||
55: "external_id_hash",
|
||||
56: "external_session_id",
|
||||
57: "quic_transport_parameters",
|
||||
58: "ticket_request",
|
||||
59: "dnssec_chain",
|
||||
60: "sequence_number_encryption_algorithms",
|
||||
61: "rrc",
|
||||
17513: "application_settings", # BoringSSL private usage
|
||||
17613: "application_settings new", # BoringSSL private usage
|
||||
# 62-2569:"Unassigned
|
||||
# 2570:"Reserved
|
||||
# 2571-6681:"Unassigned
|
||||
# 6682:"Reserved
|
||||
# 6683-10793:"Unassigned
|
||||
# 10794:"Reserved
|
||||
# 10795-14905:"Unassigned
|
||||
# 14906:"Reserved
|
||||
# 14907-19017:"Unassigned
|
||||
# 19018:"Reserved
|
||||
# 19019-23129:"Unassigned
|
||||
# 23130:"Reserved
|
||||
# 23131-27241:"Unassigned
|
||||
# 27242:"Reserved
|
||||
# 27243-31353:"Unassigned
|
||||
# 31354:"Reserved
|
||||
# 31355-35465:"Unassigned
|
||||
# 35466:"Reserved
|
||||
# 35467-39577:"Unassigned
|
||||
# 39578:"Reserved
|
||||
# 39579-43689:"Unassigned
|
||||
# 43690:"Reserved
|
||||
# 43691-47801:"Unassigned
|
||||
# 47802:"Reserved
|
||||
# 47803-51913:"Unassigned
|
||||
# 51914:"Reserved
|
||||
# 51915-56025:"Unassigned
|
||||
# 56026:"Reserved
|
||||
# 56027-60137:"Unassigned
|
||||
# 60138:"Reserved
|
||||
# 60139-64249:"Unassigned
|
||||
# 64250:"Reserved
|
||||
# 64251-64767:"Unassigned
|
||||
64768: "ech_outer_extensions",
|
||||
# 64769-65036:"Unassigned
|
||||
65037: "encrypted_client_hello",
|
||||
# 65038-65279:"Unassigned
|
||||
# 65280:"Reserved for Private Use
|
||||
65281: "renegotiation_info",
|
||||
# 65282-65535:"Reserved for Private Use
|
||||
}
|
||||
|
||||
|
||||
TLS_EC_CURVES_MAP = {
|
||||
19: "P-192",
|
||||
21: "P-224",
|
||||
23: "P-256",
|
||||
24: "P-384",
|
||||
25: "P-521",
|
||||
29: "X25519",
|
||||
256: "ffdhe2048",
|
||||
257: "ffdhe3072",
|
||||
4588: "X25519MLKEM768",
|
||||
25497: "X25519Kyber768Draft00",
|
||||
}
|
||||
|
||||
|
||||
def toggle_extension(curl, extension_id: int, enable: bool):
|
||||
# ECH
|
||||
if extension_id == 65037:
|
||||
if enable:
|
||||
curl.setopt(CurlOpt.ECH, "grease")
|
||||
else:
|
||||
curl.setopt(CurlOpt.ECH, "")
|
||||
# compress certificate
|
||||
elif extension_id == 27:
|
||||
if enable:
|
||||
warnings.warn(
|
||||
"Cert compression setting to brotli, "
|
||||
"you had better specify which to use: zlib/brotli",
|
||||
CurlCffiWarning,
|
||||
stacklevel=1,
|
||||
)
|
||||
curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, "brotli")
|
||||
else:
|
||||
curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, "")
|
||||
# ALPS: application settings
|
||||
elif extension_id == 17513:
|
||||
if enable:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 1)
|
||||
else:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 0)
|
||||
elif extension_id == 17613:
|
||||
if enable:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 1)
|
||||
curl.setopt(CurlOpt.TLS_USE_NEW_ALPS_CODEPOINT, 1)
|
||||
else:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_ALPS, 0)
|
||||
curl.setopt(CurlOpt.TLS_USE_NEW_ALPS_CODEPOINT, 0)
|
||||
# server_name
|
||||
elif extension_id == 0:
|
||||
raise NotImplementedError(
|
||||
"It's unlikely that the server_name(0) extension being changed."
|
||||
)
|
||||
# ALPN
|
||||
elif extension_id == 16:
|
||||
if enable:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_ALPN, 1)
|
||||
else:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_ALPN, 0)
|
||||
# status_request
|
||||
elif extension_id == 5:
|
||||
if enable:
|
||||
curl.setopt(CurlOpt.TLS_STATUS_REQUEST, 1)
|
||||
# signed_certificate_timestamps
|
||||
elif extension_id == 18:
|
||||
if enable:
|
||||
curl.setopt(CurlOpt.TLS_SIGNED_CERT_TIMESTAMPS, 1)
|
||||
# session_ticket
|
||||
elif extension_id == 35:
|
||||
if enable:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_TICKET, 1)
|
||||
else:
|
||||
curl.setopt(CurlOpt.SSL_ENABLE_TICKET, 0)
|
||||
# padding, should be ignored
|
||||
elif extension_id == 21:
|
||||
pass # type: ignore
|
||||
# firefox extension, toggled by extra_fp
|
||||
elif extension_id in [34, 28]:
|
||||
pass
|
||||
else:
|
||||
raise NotImplementedError(
|
||||
f"This extension({extension_id}) can not be toggled for now, it may be "
|
||||
"updated later."
|
||||
)
|
||||
@@ -0,0 +1,314 @@
|
||||
from contextlib import suppress
|
||||
import queue
|
||||
import re
|
||||
import warnings
|
||||
from concurrent.futures import Future
|
||||
from typing import Any, Callable, Optional, Union
|
||||
from collections.abc import Awaitable
|
||||
|
||||
from ..curl import Curl
|
||||
from ..utils import CurlCffiWarning
|
||||
from .cookies import Cookies
|
||||
from .exceptions import HTTPError, RequestException
|
||||
from .headers import Headers
|
||||
|
||||
# Use orjson if present
|
||||
try:
|
||||
from orjson import loads
|
||||
except ImportError:
|
||||
from json import loads
|
||||
|
||||
with suppress(ImportError):
|
||||
from markdownify import markdownify as md
|
||||
import readability as rd
|
||||
|
||||
CHARSET_RE = re.compile(r"charset=([\w-]+)")
|
||||
STREAM_END = object()
|
||||
|
||||
|
||||
def clear_queue(q: queue.Queue):
|
||||
with q.mutex:
|
||||
q.queue.clear()
|
||||
q.all_tasks_done.notify_all()
|
||||
q.unfinished_tasks = 0
|
||||
|
||||
|
||||
class Request:
|
||||
"""Representing a sent request."""
|
||||
|
||||
def __init__(self, url: str, headers: Headers, method: str):
|
||||
self.url = url
|
||||
self.headers = headers
|
||||
self.method = method
|
||||
|
||||
|
||||
class Response:
|
||||
"""Contains information the server sends.
|
||||
|
||||
Attributes:
|
||||
url: url used in the request.
|
||||
content: response body in bytes.
|
||||
text: response body in str.
|
||||
status_code: http status code.
|
||||
reason: http response reason, such as OK, Not Found.
|
||||
ok: is status_code in [200, 400)?
|
||||
headers: response headers.
|
||||
cookies: response cookies.
|
||||
elapsed: how many seconds the request cost.
|
||||
encoding: http body encoding.
|
||||
charset: alias for encoding.
|
||||
primary_ip: primary ip of the server.
|
||||
primary_port: primary port of the server.
|
||||
local_ip: local ip used in this connection.
|
||||
local_port: local port used in this connection.
|
||||
charset_encoding: encoding specified by the Content-Type header.
|
||||
default_encoding: encoding for decoding response content if charset is not found
|
||||
in headers. Defaults to "utf-8". Can be set to a callable for automatic
|
||||
detection.
|
||||
redirect_count: how many redirects happened.
|
||||
redirect_url: the final redirected url.
|
||||
http_version: http version used.
|
||||
history: history redirections, only headers are available.
|
||||
"""
|
||||
|
||||
def __init__(self, curl: Optional[Curl] = None, request: Optional[Request] = None):
|
||||
self.curl = curl
|
||||
self.request = request
|
||||
self.url = ""
|
||||
self.content = b""
|
||||
self.status_code = 200
|
||||
self.reason = "OK"
|
||||
self.ok = True
|
||||
self.headers = Headers()
|
||||
self.cookies = Cookies()
|
||||
self.elapsed = 0.0
|
||||
self.default_encoding: Union[str, Callable[[bytes], str]] = "utf-8"
|
||||
self.redirect_count = 0
|
||||
self.redirect_url = ""
|
||||
self.http_version = 0
|
||||
self.primary_ip: str = ""
|
||||
self.primary_port: int = 0
|
||||
self.local_ip: str = ""
|
||||
self.local_port: int = 0
|
||||
self.history: list[dict[str, Any]] = []
|
||||
self.infos: dict[str, Any] = {}
|
||||
self.queue: Optional[queue.Queue] = None
|
||||
self.stream_task: Optional[Future] = None
|
||||
self.astream_task: Optional[Awaitable] = None
|
||||
self.quit_now = None
|
||||
|
||||
@property
|
||||
def charset(self) -> str:
|
||||
"""Alias for encoding."""
|
||||
return self.encoding
|
||||
|
||||
@property
|
||||
def encoding(self) -> str:
|
||||
"""
|
||||
Determines the encoding to decode byte content into text.
|
||||
|
||||
The method follows a specific priority to decide the encoding:
|
||||
1. If ``.encoding`` has been explicitly set, it is used.
|
||||
2. The encoding specified by the ``charset`` parameter in the ``Content-Type``
|
||||
header.
|
||||
3. The encoding specified by the ``default_encoding`` attribute. This can either
|
||||
be a string (e.g., "utf-8") or a callable for charset autodetection.
|
||||
"""
|
||||
if not hasattr(self, "_encoding"):
|
||||
encoding = self.charset_encoding
|
||||
if encoding is None:
|
||||
if isinstance(self.default_encoding, str):
|
||||
encoding = self.default_encoding
|
||||
elif callable(self.default_encoding):
|
||||
encoding = self.default_encoding(self.content)
|
||||
self._encoding = encoding or "utf-8"
|
||||
return self._encoding
|
||||
|
||||
@encoding.setter
|
||||
def encoding(self, value: str) -> None:
|
||||
if hasattr(self, "_text"):
|
||||
raise ValueError("Cannot set encoding after text has been accessed")
|
||||
self._encoding = value
|
||||
|
||||
@property
|
||||
def charset_encoding(self) -> Optional[str]:
|
||||
"""Return the encoding, as specified by the Content-Type header."""
|
||||
content_type = self.headers.get("Content-Type")
|
||||
if content_type:
|
||||
charset_match = CHARSET_RE.search(content_type)
|
||||
return charset_match.group(1) if charset_match else None
|
||||
return None
|
||||
|
||||
@property
|
||||
def text(self) -> str:
|
||||
if not hasattr(self, "_text"):
|
||||
if not self.content:
|
||||
self._text = ""
|
||||
else:
|
||||
self._text = self._decode(self.content)
|
||||
return self._text
|
||||
|
||||
def markdown(self) -> str:
|
||||
doc = rd.Document(self.content)
|
||||
title = doc.title()
|
||||
summary = doc.summary(html_partial=True)
|
||||
body_as_md = md(f"<h1>{title}</h1><main>{summary}</main>")
|
||||
return body_as_md
|
||||
|
||||
def _decode(self, content: bytes) -> str:
|
||||
try:
|
||||
return content.decode(self.encoding, errors="replace")
|
||||
except (UnicodeDecodeError, LookupError):
|
||||
return content.decode("utf-8-sig")
|
||||
|
||||
def raise_for_status(self):
|
||||
"""Raise an error if status code is not in [200, 400)"""
|
||||
if not self.ok:
|
||||
raise HTTPError(f"HTTP Error {self.status_code}: {self.reason}", 0, self)
|
||||
|
||||
def iter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None):
|
||||
"""
|
||||
iterate streaming content line by line, separated by ``\\n``.
|
||||
|
||||
Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/
|
||||
which is under the License: Apache 2.0
|
||||
"""
|
||||
pending = None
|
||||
|
||||
for chunk in self.iter_content(
|
||||
chunk_size=chunk_size, decode_unicode=decode_unicode
|
||||
):
|
||||
if pending is not None:
|
||||
chunk = pending + chunk
|
||||
lines = chunk.split(delimiter) if delimiter else chunk.splitlines()
|
||||
pending = (
|
||||
lines.pop()
|
||||
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]
|
||||
else None
|
||||
)
|
||||
|
||||
yield from lines
|
||||
|
||||
if pending is not None:
|
||||
yield pending
|
||||
|
||||
def iter_content(self, chunk_size=None, decode_unicode=False):
|
||||
"""
|
||||
iterate streaming content chunk by chunk in bytes.
|
||||
"""
|
||||
if chunk_size:
|
||||
warnings.warn(
|
||||
"chunk_size is ignored, there is no way to tell curl that.",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if decode_unicode:
|
||||
raise NotImplementedError()
|
||||
|
||||
assert self.queue and self.curl, "stream mode is not enabled."
|
||||
|
||||
while True:
|
||||
chunk = self.queue.get()
|
||||
|
||||
# re-raise the exception if something wrong happened.
|
||||
if isinstance(chunk, RequestException):
|
||||
self.curl.reset()
|
||||
raise chunk
|
||||
|
||||
# end of stream.
|
||||
if chunk is STREAM_END:
|
||||
break
|
||||
|
||||
yield chunk
|
||||
|
||||
def json(self, **kw):
|
||||
"""return a parsed json object of the content."""
|
||||
return loads(self.content, **kw)
|
||||
|
||||
def close(self):
|
||||
"""Close the streaming connection, only valid in stream mode."""
|
||||
|
||||
if self.quit_now:
|
||||
self.quit_now.set()
|
||||
if self.stream_task:
|
||||
self.stream_task.result()
|
||||
|
||||
async def aiter_lines(self, chunk_size=None, decode_unicode=False, delimiter=None):
|
||||
"""
|
||||
iterate streaming content line by line, separated by ``\\n``.
|
||||
|
||||
Copied from: https://requests.readthedocs.io/en/latest/_modules/requests/models/
|
||||
which is under the License: Apache 2.0
|
||||
"""
|
||||
pending = None
|
||||
|
||||
async for chunk in self.aiter_content(
|
||||
chunk_size=chunk_size, decode_unicode=decode_unicode
|
||||
):
|
||||
if pending is not None:
|
||||
chunk = pending + chunk
|
||||
lines = chunk.split(delimiter) if delimiter else chunk.splitlines()
|
||||
pending = (
|
||||
lines.pop()
|
||||
if lines and lines[-1] and chunk and lines[-1][-1] == chunk[-1]
|
||||
else None
|
||||
)
|
||||
|
||||
for line in lines:
|
||||
yield line
|
||||
|
||||
if pending is not None:
|
||||
yield pending
|
||||
|
||||
async def aiter_content(self, chunk_size=None, decode_unicode=False):
|
||||
"""
|
||||
iterate streaming content chunk by chunk in bytes.
|
||||
"""
|
||||
if chunk_size:
|
||||
warnings.warn(
|
||||
"chunk_size is ignored, there is no way to tell curl that.",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
if decode_unicode:
|
||||
raise NotImplementedError()
|
||||
|
||||
assert self.queue and self.curl, "stream mode is not enabled."
|
||||
|
||||
while True:
|
||||
chunk = await self.queue.get()
|
||||
|
||||
# re-raise the exception if something wrong happened.
|
||||
if isinstance(chunk, RequestException):
|
||||
await self.aclose()
|
||||
raise chunk
|
||||
|
||||
# end of stream.
|
||||
if chunk is STREAM_END:
|
||||
await self.aclose()
|
||||
return
|
||||
|
||||
yield chunk
|
||||
|
||||
async def atext(self) -> str:
|
||||
"""
|
||||
Return a decoded string.
|
||||
"""
|
||||
return self._decode(await self.acontent())
|
||||
|
||||
async def acontent(self) -> bytes:
|
||||
"""wait and read the streaming content in one bytes object."""
|
||||
chunks = []
|
||||
async for chunk in self.aiter_content():
|
||||
chunks.append(chunk)
|
||||
return b"".join(chunks)
|
||||
|
||||
async def aclose(self):
|
||||
"""Close the streaming connection, only valid in stream mode."""
|
||||
|
||||
if self.astream_task:
|
||||
await self.astream_task
|
||||
|
||||
# It prints the status code of the response instead of the object's memory location.
|
||||
def __repr__(self) -> str:
|
||||
return f"<Response [{self.status_code}]>"
|
||||
+1112
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,698 @@
|
||||
from __future__ import annotations
|
||||
|
||||
__all__ = ["HttpVersionLiteral", "set_curl_options", "not_set"]
|
||||
|
||||
|
||||
import asyncio
|
||||
import math
|
||||
import queue
|
||||
import warnings
|
||||
from collections import Counter
|
||||
from io import BytesIO
|
||||
from json import dumps
|
||||
from typing import TYPE_CHECKING, Any, Callable, Final, Literal, Optional, Union, cast
|
||||
from urllib.parse import ParseResult, parse_qsl, quote, urlencode, urljoin, urlparse
|
||||
|
||||
from ..const import CurlHttpVersion, CurlOpt, CurlSslVersion
|
||||
from ..curl import CURL_WRITEFUNC_ERROR, CurlMime
|
||||
from ..utils import CurlCffiWarning
|
||||
from .cookies import Cookies
|
||||
from .exceptions import ImpersonateError, InvalidURL
|
||||
from .headers import Headers
|
||||
from .impersonate import (
|
||||
TLS_CIPHER_NAME_MAP,
|
||||
TLS_EC_CURVES_MAP,
|
||||
TLS_VERSION_MAP,
|
||||
ExtraFingerprints,
|
||||
normalize_browser_type,
|
||||
toggle_extension,
|
||||
)
|
||||
from .models import Request
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ..curl import Curl
|
||||
from .cookies import CookieTypes
|
||||
from .headers import HeaderTypes
|
||||
from .impersonate import BrowserTypeLiteral, ExtraFpDict
|
||||
from .session import ProxySpec
|
||||
|
||||
|
||||
HttpMethod = Literal[
|
||||
"GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "TRACE", "PATCH", "QUERY"
|
||||
]
|
||||
|
||||
HttpVersionLiteral = Literal["v1", "v2", "v2tls", "v2_prior_knowledge", "v3", "v3only"]
|
||||
|
||||
SAFE_CHARS = set("!#$%&'()*+,/:;=?@[]~")
|
||||
|
||||
not_set: Final[Any] = object()
|
||||
|
||||
|
||||
# ruff: noqa: SIM116
|
||||
def normalize_http_version(
|
||||
version: Union[CurlHttpVersion, HttpVersionLiteral],
|
||||
) -> CurlHttpVersion:
|
||||
if version == "v1":
|
||||
return CurlHttpVersion.V1_1
|
||||
elif version == "v3":
|
||||
return CurlHttpVersion.V3
|
||||
elif version == "v3only":
|
||||
return CurlHttpVersion.V3ONLY
|
||||
elif version == "v2":
|
||||
return CurlHttpVersion.V2_0
|
||||
elif version == "v2tls":
|
||||
return CurlHttpVersion.V2TLS
|
||||
elif version == "v2_prior_knowledge":
|
||||
return CurlHttpVersion.V2_PRIOR_KNOWLEDGE
|
||||
|
||||
return version # type: ignore
|
||||
|
||||
|
||||
def is_absolute_url(url: str) -> bool:
|
||||
"""Check if the provided url is an absolute url"""
|
||||
parsed_url = urlparse(url)
|
||||
return bool(parsed_url.scheme and parsed_url.hostname)
|
||||
|
||||
|
||||
def quote_path_and_params(url: str, quote_str: str = ""):
|
||||
safe = "".join(SAFE_CHARS - set(quote_str))
|
||||
parsed_url = urlparse(url)
|
||||
parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
|
||||
encoded_get_args = urlencode(parsed_get_args, doseq=True, safe=safe)
|
||||
return ParseResult(
|
||||
parsed_url.scheme,
|
||||
parsed_url.netloc,
|
||||
quote(parsed_url.path, safe=safe),
|
||||
parsed_url.params,
|
||||
encoded_get_args,
|
||||
parsed_url.fragment,
|
||||
).geturl()
|
||||
|
||||
|
||||
def update_url_params(url: str, params: Union[dict, list, tuple]) -> str:
|
||||
"""Add URL query params to provided URL being aware of existing.
|
||||
|
||||
Args:
|
||||
url: string of target URL
|
||||
params: dict containing requested params to be added
|
||||
|
||||
Returns:
|
||||
string with updated URL
|
||||
|
||||
>> url = 'http://stackoverflow.com/test?answers=true'
|
||||
>> new_params = {'answers': False, 'data': ['some','values']}
|
||||
>> update_url_params(url, new_params)
|
||||
'http://stackoverflow.com/test?data=some&data=values&answers=false'
|
||||
"""
|
||||
# No need to unquote, since requote_uri will be called later.
|
||||
parsed_url = urlparse(url)
|
||||
|
||||
# Extracting URL arguments from parsed URL, NOTE the result is a list, not dict
|
||||
parsed_get_args = parse_qsl(parsed_url.query, keep_blank_values=True)
|
||||
|
||||
# Merging URL arguments dict with new params
|
||||
old_args_counter = Counter(x[0] for x in parsed_get_args)
|
||||
if isinstance(params, dict):
|
||||
params = list(params.items())
|
||||
new_args_counter = Counter(x[0] for x in params)
|
||||
for key, value in params:
|
||||
# Bool and Dict values should be converted to json-friendly values
|
||||
if isinstance(value, (bool, dict)):
|
||||
value = dumps(value)
|
||||
# 1 to 1 mapping, we have to search and update it.
|
||||
if old_args_counter.get(key) == 1 and new_args_counter.get(key) == 1:
|
||||
parsed_get_args = [
|
||||
(x if x[0] != key else (key, value)) for x in parsed_get_args
|
||||
]
|
||||
else:
|
||||
parsed_get_args.append((key, value))
|
||||
|
||||
# Converting URL argument to proper query string
|
||||
encoded_get_args = urlencode(parsed_get_args, doseq=True)
|
||||
|
||||
# Creating new parsed result object based on provided with new
|
||||
# URL arguments. Same thing happens inside of urlparse.
|
||||
new_url = ParseResult(
|
||||
parsed_url.scheme,
|
||||
parsed_url.netloc,
|
||||
parsed_url.path,
|
||||
parsed_url.params,
|
||||
encoded_get_args,
|
||||
parsed_url.fragment,
|
||||
).geturl()
|
||||
|
||||
return new_url
|
||||
|
||||
|
||||
# Adapted from: https://github.com/psf/requests/blob/1ae6fc3137a11e11565ed22436aa1e77277ac98c/src%2Frequests%2Futils.py#L633-L682
|
||||
# License: Apache 2.0
|
||||
|
||||
# The unreserved URI characters (RFC 3986)
|
||||
UNRESERVED_SET = frozenset(
|
||||
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
|
||||
)
|
||||
|
||||
|
||||
def unquote_unreserved(uri: str) -> str:
|
||||
"""Un-escape any percent-escape sequences in a URI that are unreserved
|
||||
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
|
||||
"""
|
||||
parts = uri.split("%")
|
||||
for i in range(1, len(parts)):
|
||||
h = parts[i][0:2]
|
||||
if len(h) == 2 and h.isalnum():
|
||||
try:
|
||||
c = chr(int(h, 16))
|
||||
except ValueError as e:
|
||||
raise InvalidURL(f"Invalid percent-escape sequence: '{h}'") from e
|
||||
|
||||
if c in UNRESERVED_SET:
|
||||
parts[i] = c + parts[i][2:]
|
||||
else:
|
||||
parts[i] = f"%{parts[i]}"
|
||||
else:
|
||||
parts[i] = f"%{parts[i]}"
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def requote_uri(uri: str) -> str:
|
||||
"""Re-quote the given URI.
|
||||
|
||||
This function passes the given URI through an unquote/quote cycle to
|
||||
ensure that it is fully and consistently quoted.
|
||||
"""
|
||||
safe_with_percent = "!#$%&'()*+,/:;=?@[]~|"
|
||||
safe_without_percent = "!#$&'()*+,/:;=?@[]~|"
|
||||
try:
|
||||
# Unquote only the unreserved characters
|
||||
# Then quote only illegal characters (do not quote reserved,
|
||||
# unreserved, or '%')
|
||||
return quote(unquote_unreserved(uri), safe=safe_with_percent)
|
||||
except InvalidURL:
|
||||
# We couldn't unquote the given URI, so let's try quoting it, but
|
||||
# there may be unquoted '%'s in the URI. We need to make sure they're
|
||||
# properly quoted so they do not cause issues elsewhere.
|
||||
return quote(uri, safe=safe_without_percent)
|
||||
|
||||
|
||||
# TODO: should we move this function to headers.py?
|
||||
def update_header_line(
|
||||
header_lines: list[str], key: str, value: str, replace: bool = False
|
||||
):
|
||||
"""Update header line list by key value pair."""
|
||||
found = False
|
||||
for idx, line in enumerate(header_lines):
|
||||
if line.lower().startswith(key.lower() + ":"):
|
||||
found = True
|
||||
if replace:
|
||||
header_lines[idx] = f"{key}: {value}"
|
||||
break
|
||||
if not found:
|
||||
header_lines.append(f"{key}: {value}")
|
||||
|
||||
|
||||
def peek_queue(q: queue.Queue, default=None):
|
||||
try:
|
||||
return q.queue[0]
|
||||
except IndexError:
|
||||
return default
|
||||
|
||||
|
||||
def peek_aio_queue(q: asyncio.Queue, default=None):
|
||||
try:
|
||||
return q._queue[0] # type: ignore
|
||||
except IndexError:
|
||||
return default
|
||||
|
||||
|
||||
def toggle_extensions_by_ids(curl: Curl, extension_ids):
|
||||
# TODO: find a better representation, rather than magic numbers
|
||||
default_enabled = {0, 10, 11, 13, 16, 23, 35, 43, 45, 51, 65281}
|
||||
|
||||
to_enable_ids = extension_ids - default_enabled
|
||||
for ext_id in to_enable_ids:
|
||||
toggle_extension(curl, ext_id, enable=True)
|
||||
|
||||
# print("to_enable: ", to_enable_ids)
|
||||
|
||||
to_disable_ids = default_enabled - extension_ids
|
||||
for ext_id in to_disable_ids:
|
||||
toggle_extension(curl, ext_id, enable=False)
|
||||
|
||||
# print("to_disable: ", to_disable_ids)
|
||||
|
||||
|
||||
def set_ja3_options(curl: Curl, ja3: str, permute: bool = False):
|
||||
"""
|
||||
Detailed explanation: https://engineering.salesforce.com/tls-fingerprinting-with-ja3-and-ja3s-247362855967/
|
||||
"""
|
||||
tls_version, ciphers, extensions, curves, curve_formats = ja3.split(",")
|
||||
|
||||
curl_tls_version = TLS_VERSION_MAP[int(tls_version)]
|
||||
curl.setopt(CurlOpt.SSLVERSION, curl_tls_version | CurlSslVersion.MAX_DEFAULT)
|
||||
assert curl_tls_version == CurlSslVersion.TLSv1_2, "Only TLS v1.2 works for now."
|
||||
|
||||
cipher_names = []
|
||||
for cipher in ciphers.split("-"):
|
||||
cipher_id = int(cipher)
|
||||
cipher_name = TLS_CIPHER_NAME_MAP.get(cipher_id)
|
||||
if not cipher_name:
|
||||
raise ImpersonateError(f"Cipher {hex(cipher_id)} is not found")
|
||||
cipher_names.append(cipher_name)
|
||||
|
||||
curl.setopt(CurlOpt.SSL_CIPHER_LIST, ":".join(cipher_names))
|
||||
|
||||
if extensions.endswith("-21"):
|
||||
extensions = extensions[:-3]
|
||||
warnings.warn(
|
||||
"Padding(21) extension found in ja3 string, whether to add it should "
|
||||
"be managed by the SSL engine. The TLS client hello packet may contain "
|
||||
"or not contain this extension, any of which should be correct.",
|
||||
CurlCffiWarning,
|
||||
stacklevel=1,
|
||||
)
|
||||
extension_ids = set(int(e) for e in extensions.split("-"))
|
||||
toggle_extensions_by_ids(curl, extension_ids)
|
||||
|
||||
if not permute:
|
||||
curl.setopt(CurlOpt.TLS_EXTENSION_ORDER, extensions)
|
||||
|
||||
curve_names = []
|
||||
for curve in curves.split("-"):
|
||||
curve_id = int(curve)
|
||||
curve_name = TLS_EC_CURVES_MAP[curve_id]
|
||||
curve_names.append(curve_name)
|
||||
|
||||
curl.setopt(CurlOpt.SSL_EC_CURVES, ":".join(curve_names))
|
||||
|
||||
assert int(curve_formats) == 0, "Only curve_formats == 0 is supported."
|
||||
|
||||
|
||||
def set_akamai_options(curl: Curl, akamai: str):
|
||||
"""
|
||||
Detailed explanation: https://www.blackhat.com/docs/eu-17/materials/eu-17-Shuster-Passive-Fingerprinting-Of-HTTP2-Clients-wp.pdf
|
||||
"""
|
||||
settings, window_update, streams, header_order = akamai.split("|")
|
||||
|
||||
# For compatiblity with tls.peet.ws
|
||||
settings = settings.replace(",", ";")
|
||||
|
||||
curl.setopt(CurlOpt.HTTP_VERSION, CurlHttpVersion.V2_0)
|
||||
|
||||
curl.setopt(CurlOpt.HTTP2_SETTINGS, settings)
|
||||
curl.setopt(CurlOpt.HTTP2_WINDOW_UPDATE, int(window_update))
|
||||
|
||||
if streams != "0":
|
||||
curl.setopt(CurlOpt.HTTP2_STREAMS, streams)
|
||||
|
||||
# m,a,s,p -> masp
|
||||
# curl-impersonate only accepts masp format, without commas.
|
||||
curl.setopt(CurlOpt.HTTP2_PSEUDO_HEADERS_ORDER, header_order.replace(",", ""))
|
||||
|
||||
|
||||
def set_extra_fp(curl: Curl, fp: ExtraFingerprints):
|
||||
if fp.tls_signature_algorithms:
|
||||
curl.setopt(CurlOpt.SSL_SIG_HASH_ALGS, ",".join(fp.tls_signature_algorithms))
|
||||
|
||||
curl.setopt(CurlOpt.SSLVERSION, fp.tls_min_version | CurlSslVersion.MAX_DEFAULT)
|
||||
curl.setopt(CurlOpt.TLS_GREASE, int(fp.tls_grease))
|
||||
curl.setopt(CurlOpt.SSL_PERMUTE_EXTENSIONS, int(fp.tls_permute_extensions))
|
||||
curl.setopt(CurlOpt.SSL_CERT_COMPRESSION, fp.tls_cert_compression)
|
||||
curl.setopt(CurlOpt.STREAM_WEIGHT, fp.http2_stream_weight)
|
||||
curl.setopt(CurlOpt.STREAM_EXCLUSIVE, fp.http2_stream_exclusive)
|
||||
if fp.tls_delegated_credential:
|
||||
curl.setopt(CurlOpt.TLS_DELEGATED_CREDENTIALS, fp.tls_delegated_credential)
|
||||
if fp.tls_record_size_limit:
|
||||
curl.setopt(CurlOpt.TLS_RECORD_SIZE_LIMIT, fp.tls_record_size_limit)
|
||||
if fp.http2_no_priority:
|
||||
curl.setopt(CurlOpt.HTTP2_NO_PRIORITY, fp.http2_no_priority)
|
||||
|
||||
|
||||
def set_curl_options(
|
||||
curl: Curl,
|
||||
method: HttpMethod,
|
||||
url: str,
|
||||
*,
|
||||
params_list: list[Union[dict, list, tuple, None]] = [], # noqa: B006
|
||||
base_url: Optional[str] = None,
|
||||
data: Optional[Union[dict[str, str], list[tuple], str, BytesIO, bytes]] = None,
|
||||
json: Optional[dict | list] = None,
|
||||
headers_list: list[Optional[HeaderTypes]] = [], # noqa: B006
|
||||
cookies_list: list[Optional[CookieTypes]] = [], # noqa: B006
|
||||
files: Optional[dict] = None,
|
||||
auth: Optional[tuple[str, str]] = None,
|
||||
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
|
||||
allow_redirects: Optional[bool] = True,
|
||||
max_redirects: Optional[int] = 30,
|
||||
proxies_list: list[Optional[ProxySpec]] = [], # noqa: B006
|
||||
proxy: Optional[str] = None,
|
||||
proxy_auth: Optional[tuple[str, str]] = None,
|
||||
verify_list: list[Union[bool, str, None]] = [], # noqa: B006
|
||||
referer: Optional[str] = None,
|
||||
accept_encoding: Optional[str] = "gzip, deflate, br, zstd",
|
||||
content_callback: Optional[Callable] = None,
|
||||
impersonate: Optional[Union[BrowserTypeLiteral, str]] = None,
|
||||
ja3: Optional[str] = None,
|
||||
akamai: Optional[str] = None,
|
||||
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
|
||||
default_headers: bool = True,
|
||||
quote: Union[str, Literal[False]] = "",
|
||||
http_version: Optional[Union[CurlHttpVersion, HttpVersionLiteral]] = None,
|
||||
interface: Optional[str] = None,
|
||||
cert: Optional[Union[str, tuple[str, str]]] = None,
|
||||
stream: Optional[bool] = None,
|
||||
max_recv_speed: int = 0,
|
||||
multipart: Optional[CurlMime] = None,
|
||||
queue_class: Any = None,
|
||||
event_class: Any = None,
|
||||
curl_options: Optional[dict[CurlOpt, str]] = None,
|
||||
):
|
||||
c = curl
|
||||
|
||||
method = method.upper() # type: ignore
|
||||
|
||||
# method
|
||||
if method == "POST":
|
||||
c.setopt(CurlOpt.POST, 1)
|
||||
elif method != "GET":
|
||||
c.setopt(CurlOpt.CUSTOMREQUEST, method.encode())
|
||||
if method == "HEAD":
|
||||
c.setopt(CurlOpt.NOBODY, 1)
|
||||
|
||||
# url
|
||||
base_params, params = params_list
|
||||
if base_params:
|
||||
url = update_url_params(url, base_params)
|
||||
if params:
|
||||
url = update_url_params(url, params)
|
||||
if base_url:
|
||||
url = urljoin(base_url, url)
|
||||
if quote:
|
||||
url = quote_path_and_params(url, quote_str=quote)
|
||||
if quote is not False:
|
||||
url = requote_uri(url)
|
||||
c.setopt(CurlOpt.URL, url.encode())
|
||||
|
||||
# data/body/json
|
||||
if isinstance(data, (dict, list, tuple)):
|
||||
body = urlencode(data).encode()
|
||||
elif isinstance(data, str):
|
||||
body = data.encode()
|
||||
elif isinstance(data, BytesIO):
|
||||
body = data.read()
|
||||
elif isinstance(data, bytes):
|
||||
body = data
|
||||
elif data is None:
|
||||
body = b""
|
||||
else:
|
||||
raise TypeError("data must be dict/list/tuple, str, BytesIO or bytes")
|
||||
if json is not None:
|
||||
body = dumps(json, separators=(",", ":")).encode()
|
||||
|
||||
# Tell libcurl to be aware of bodies and related headers when,
|
||||
# 1. POST/PUT/PATCH, even if the body is empty, it's up to curl to decide what to do
|
||||
# 2. GET/DELETE with body, although it's against the RFC, some applications.
|
||||
# e.g. Elasticsearch, use this.
|
||||
if body or method in ("POST", "PUT", "PATCH"):
|
||||
c.setopt(CurlOpt.POSTFIELDS, body)
|
||||
# necessary if body contains '\0'
|
||||
c.setopt(CurlOpt.POSTFIELDSIZE, len(body))
|
||||
if method == "GET":
|
||||
c.setopt(CurlOpt.CUSTOMREQUEST, method)
|
||||
|
||||
# headers
|
||||
base_headers, headers = headers_list
|
||||
# let headers encoding take precedence over base headers encoding
|
||||
encoding = headers.encoding if isinstance(headers, Headers) else None
|
||||
h = Headers(base_headers, encoding=encoding)
|
||||
h.update(headers)
|
||||
|
||||
# remove Host header if it's unnecessary, otherwise curl may get confused.
|
||||
# Host header will be automatically added by curl if it's not present.
|
||||
# https://github.com/lexiforest/curl_cffi/issues/119
|
||||
host_header = h.get("Host")
|
||||
if host_header is not None:
|
||||
u = urlparse(url)
|
||||
if host_header == u.netloc or host_header == u.hostname:
|
||||
h.pop("Host", None)
|
||||
|
||||
# Make curl always include empty headers.
|
||||
# See: https://stackoverflow.com/a/32911474/1061155
|
||||
header_lines = []
|
||||
for k, v in h.multi_items():
|
||||
if v is None:
|
||||
header_lines.append(f"{k}:") # Explictly disable this header
|
||||
elif v == "":
|
||||
header_lines.append(f"{k};") # Add an empty valued header
|
||||
else:
|
||||
header_lines.append(f"{k}: {v}")
|
||||
|
||||
# Add content-type if missing
|
||||
if json is not None:
|
||||
update_header_line(header_lines, "Content-Type", "application/json")
|
||||
if isinstance(data, dict) and method != "POST":
|
||||
update_header_line(
|
||||
header_lines, "Content-Type", "application/x-www-form-urlencoded"
|
||||
)
|
||||
if isinstance(data, (str, bytes)):
|
||||
update_header_line(header_lines, "Content-Type", "application/octet-stream")
|
||||
|
||||
# Never send `Expect` header.
|
||||
update_header_line(header_lines, "Expect", "", replace=True)
|
||||
|
||||
c.setopt(CurlOpt.HTTPHEADER, [h.encode() for h in header_lines])
|
||||
|
||||
req = Request(url, h, method)
|
||||
|
||||
# cookies
|
||||
c.setopt(CurlOpt.COOKIEFILE, b"") # always enable the curl cookie engine first
|
||||
c.setopt(CurlOpt.COOKIELIST, "ALL") # remove all the old cookies first.
|
||||
|
||||
base_cookies, cookies = cookies_list
|
||||
|
||||
if base_cookies:
|
||||
for morsel in base_cookies.get_cookies_for_curl(req): # type: ignore
|
||||
curl.setopt(CurlOpt.COOKIELIST, morsel.to_curl_format())
|
||||
if cookies:
|
||||
temp_cookies = Cookies(cookies)
|
||||
for morsel in temp_cookies.get_cookies_for_curl(req):
|
||||
curl.setopt(CurlOpt.COOKIELIST, morsel.to_curl_format())
|
||||
|
||||
# files
|
||||
if files:
|
||||
raise NotImplementedError(
|
||||
"files is not supported, use `multipart`. See examples here: "
|
||||
"https://github.com/lexiforest/curl_cffi/blob/main/examples/upload.py"
|
||||
)
|
||||
|
||||
# multipart
|
||||
if multipart:
|
||||
# multipart will overrides postfields
|
||||
for k, v in cast(dict, data or {}).items():
|
||||
multipart.addpart(name=k, data=v.encode() if isinstance(v, str) else v)
|
||||
c.setopt(CurlOpt.MIMEPOST, multipart._form)
|
||||
|
||||
# auth
|
||||
if auth:
|
||||
username, password = auth
|
||||
c.setopt(CurlOpt.USERNAME, username.encode()) # pyright: ignore [reportPossiblyUnboundVariable=none]
|
||||
c.setopt(CurlOpt.PASSWORD, password.encode()) # pyright: ignore [reportPossiblyUnboundVariable=none]
|
||||
|
||||
# timeout
|
||||
if timeout is None:
|
||||
timeout = 0 # indefinitely
|
||||
|
||||
if isinstance(timeout, tuple):
|
||||
connect_timeout, read_timeout = timeout
|
||||
all_timeout = connect_timeout + read_timeout
|
||||
c.setopt(CurlOpt.CONNECTTIMEOUT_MS, int(connect_timeout * 1000))
|
||||
if not stream:
|
||||
c.setopt(CurlOpt.TIMEOUT_MS, int(all_timeout * 1000))
|
||||
else:
|
||||
# trick from: https://github.com/lexiforest/curl_cffi/issues/156
|
||||
c.setopt(CurlOpt.LOW_SPEED_LIMIT, 1)
|
||||
c.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(all_timeout))
|
||||
|
||||
elif isinstance(timeout, (int, float)):
|
||||
if not stream:
|
||||
c.setopt(CurlOpt.TIMEOUT_MS, int(timeout * 1000))
|
||||
else:
|
||||
c.setopt(CurlOpt.CONNECTTIMEOUT_MS, int(timeout * 1000))
|
||||
c.setopt(CurlOpt.LOW_SPEED_LIMIT, 1)
|
||||
c.setopt(CurlOpt.LOW_SPEED_TIME, math.ceil(timeout))
|
||||
|
||||
# allow_redirects
|
||||
c.setopt(CurlOpt.FOLLOWLOCATION, int(allow_redirects)) # type: ignore
|
||||
|
||||
# max_redirects
|
||||
c.setopt(CurlOpt.MAXREDIRS, max_redirects)
|
||||
|
||||
# proxies
|
||||
base_proxies, proxies = proxies_list
|
||||
if proxy and proxies:
|
||||
raise TypeError("Cannot specify both 'proxy' and 'proxies'")
|
||||
if proxy:
|
||||
proxies = {"all": proxy}
|
||||
if proxies is None:
|
||||
proxies = base_proxies
|
||||
|
||||
if proxies:
|
||||
# Turn on proxy_credential_no_reuse, which has the following benefits:
|
||||
# 1. New connection will be made when proxy username changed
|
||||
# 2. New TLS session will be created based on proxy address, i.e. when accessing
|
||||
# the same site with different proxies, TLS session won't leak previous IP.
|
||||
c.setopt(CurlOpt.PROXY_CREDENTIAL_NO_REUSE, 1)
|
||||
|
||||
parts = urlparse(url)
|
||||
proxy = cast(Optional[str], proxies.get(parts.scheme, proxies.get("all")))
|
||||
if parts.hostname:
|
||||
proxy = (
|
||||
proxies.get( # type: ignore
|
||||
f"{parts.scheme}://{parts.hostname}",
|
||||
proxies.get(f"all://{parts.hostname}"),
|
||||
)
|
||||
or proxy
|
||||
)
|
||||
|
||||
if proxy is not None:
|
||||
c.setopt(CurlOpt.PROXY, proxy)
|
||||
|
||||
if parts.scheme == "https":
|
||||
if proxy.startswith("https://"):
|
||||
warnings.warn(
|
||||
"Make sure you are using https over https proxy, otherwise, "
|
||||
"the proxy prefix should be 'http://' not 'https://', "
|
||||
"see: https://github.com/lexiforest/curl_cffi/issues/6",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
# For https site with http tunnel proxy, tell curl to enable tunneling
|
||||
if not proxy.startswith("socks"):
|
||||
c.setopt(CurlOpt.HTTPPROXYTUNNEL, 1)
|
||||
|
||||
# proxy_auth
|
||||
if proxy_auth:
|
||||
username, password = proxy_auth
|
||||
c.setopt(CurlOpt.PROXYUSERNAME, username.encode())
|
||||
c.setopt(CurlOpt.PROXYPASSWORD, password.encode())
|
||||
|
||||
# verify
|
||||
base_verify, verify = verify_list
|
||||
if verify is False or not base_verify and verify is None:
|
||||
c.setopt(CurlOpt.SSL_VERIFYPEER, 0)
|
||||
c.setopt(CurlOpt.SSL_VERIFYHOST, 0)
|
||||
|
||||
# cert for this single request
|
||||
if isinstance(verify, str):
|
||||
c.setopt(CurlOpt.CAINFO, verify)
|
||||
|
||||
# cert for the session
|
||||
if verify in (None, True) and isinstance(base_verify, str):
|
||||
c.setopt(CurlOpt.CAINFO, base_verify)
|
||||
|
||||
# referer
|
||||
if referer:
|
||||
c.setopt(CurlOpt.REFERER, referer.encode())
|
||||
|
||||
# accept_encoding
|
||||
if accept_encoding is not None:
|
||||
c.setopt(CurlOpt.ACCEPT_ENCODING, accept_encoding.encode())
|
||||
|
||||
# cert
|
||||
if cert:
|
||||
if isinstance(cert, str):
|
||||
c.setopt(CurlOpt.SSLCERT, cert)
|
||||
else:
|
||||
cert, key = cert
|
||||
c.setopt(CurlOpt.SSLCERT, cert)
|
||||
c.setopt(CurlOpt.SSLKEY, key)
|
||||
|
||||
# impersonate
|
||||
if impersonate:
|
||||
impersonate = normalize_browser_type(impersonate)
|
||||
ret = c.impersonate(impersonate, default_headers=default_headers) # type: ignore
|
||||
if ret != 0:
|
||||
raise ImpersonateError(f"Impersonating {impersonate} is not supported")
|
||||
|
||||
# extra_fp options
|
||||
if extra_fp:
|
||||
if isinstance(extra_fp, dict):
|
||||
extra_fp = ExtraFingerprints(**extra_fp)
|
||||
if impersonate:
|
||||
warnings.warn(
|
||||
"Extra fingerprints was altered after impersonated version was set.",
|
||||
CurlCffiWarning,
|
||||
stacklevel=1,
|
||||
)
|
||||
set_extra_fp(c, extra_fp)
|
||||
|
||||
# ja3 string
|
||||
if ja3:
|
||||
if impersonate:
|
||||
warnings.warn(
|
||||
"JA3 fingerprint was altered after impersonated version was set.",
|
||||
CurlCffiWarning,
|
||||
stacklevel=1,
|
||||
)
|
||||
permute = False
|
||||
if isinstance(extra_fp, ExtraFingerprints) and extra_fp.tls_permute_extensions:
|
||||
permute = True
|
||||
if isinstance(extra_fp, dict) and extra_fp.get("tls_permute_extensions"):
|
||||
permute = True
|
||||
set_ja3_options(c, ja3, permute=permute)
|
||||
|
||||
# akamai string
|
||||
if akamai:
|
||||
if impersonate:
|
||||
warnings.warn(
|
||||
"Akamai fingerprint was altered after impersonated version was set.",
|
||||
CurlCffiWarning,
|
||||
stacklevel=1,
|
||||
)
|
||||
set_akamai_options(c, akamai)
|
||||
|
||||
# http_version, after impersonate, which will change this to http2
|
||||
if http_version:
|
||||
http_version = normalize_http_version(http_version)
|
||||
c.setopt(CurlOpt.HTTP_VERSION, http_version)
|
||||
|
||||
buffer = None
|
||||
q = None
|
||||
header_recved = None
|
||||
quit_now = None
|
||||
if stream:
|
||||
q = queue_class()
|
||||
header_recved = event_class()
|
||||
quit_now = event_class()
|
||||
|
||||
def qput(chunk):
|
||||
if not header_recved.is_set():
|
||||
header_recved.set()
|
||||
if quit_now.is_set():
|
||||
return CURL_WRITEFUNC_ERROR
|
||||
q.put_nowait(chunk)
|
||||
return len(chunk)
|
||||
|
||||
c.setopt(CurlOpt.WRITEFUNCTION, qput)
|
||||
elif content_callback is not None:
|
||||
c.setopt(CurlOpt.WRITEFUNCTION, content_callback)
|
||||
else:
|
||||
buffer = BytesIO()
|
||||
c.setopt(CurlOpt.WRITEDATA, buffer)
|
||||
header_buffer = BytesIO()
|
||||
c.setopt(CurlOpt.HEADERDATA, header_buffer)
|
||||
|
||||
# interface
|
||||
if interface:
|
||||
c.setopt(CurlOpt.INTERFACE, interface.encode())
|
||||
|
||||
# max_recv_speed
|
||||
# do not check, since 0 is a valid value to disable it
|
||||
c.setopt(CurlOpt.MAX_RECV_SPEED_LARGE, max_recv_speed)
|
||||
|
||||
# set extra options, after all others, because it will alter some options
|
||||
if curl_options:
|
||||
for option, setting in curl_options.items():
|
||||
c.setopt(option, setting)
|
||||
|
||||
return req, buffer, header_buffer, q, header_recved, quit_now
|
||||
+839
@@ -0,0 +1,839 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import struct
|
||||
from enum import IntEnum
|
||||
from functools import partial
|
||||
from json import dumps, loads
|
||||
from select import select
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Literal,
|
||||
Optional,
|
||||
TypeVar,
|
||||
Union,
|
||||
)
|
||||
import warnings
|
||||
|
||||
from curl_cffi.utils import CurlCffiWarning
|
||||
|
||||
from ..aio import CURL_SOCKET_BAD, get_selector
|
||||
from ..const import CurlECode, CurlInfo, CurlOpt, CurlWsFlag
|
||||
from ..curl import Curl, CurlError
|
||||
from .exceptions import SessionClosed, Timeout
|
||||
from .utils import not_set, set_curl_options
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing_extensions import Self
|
||||
|
||||
from ..const import CurlHttpVersion
|
||||
from ..curl import CurlWsFrame
|
||||
from .cookies import CookieTypes
|
||||
from .headers import HeaderTypes
|
||||
from .impersonate import BrowserTypeLiteral, ExtraFingerprints, ExtraFpDict
|
||||
from .session import AsyncSession, ProxySpec
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
ON_DATA_T = Callable[["WebSocket", bytes, CurlWsFrame], None]
|
||||
ON_MESSAGE_T = Callable[["WebSocket", Union[bytes, str]], None]
|
||||
ON_ERROR_T = Callable[["WebSocket", CurlError], None]
|
||||
ON_OPEN_T = Callable[["WebSocket"], None]
|
||||
ON_CLOSE_T = Callable[["WebSocket", int, str], None]
|
||||
|
||||
|
||||
# We need a partial for dumps() because a custom function may not accept the parameter
|
||||
dumps = partial(dumps, separators=(",", ":"))
|
||||
|
||||
|
||||
class WsCloseCode(IntEnum):
|
||||
"""See: https://www.iana.org/assignments/websocket/websocket.xhtml"""
|
||||
|
||||
OK = 1000
|
||||
GOING_AWAY = 1001
|
||||
PROTOCOL_ERROR = 1002
|
||||
UNSUPPORTED_DATA = 1003
|
||||
UNKNOWN = 1005
|
||||
ABNORMAL_CLOSURE = 1006
|
||||
INVALID_DATA = 1007
|
||||
POLICY_VIOLATION = 1008
|
||||
MESSAGE_TOO_BIG = 1009
|
||||
MANDATORY_EXTENSION = 1010
|
||||
INTERNAL_ERROR = 1011
|
||||
SERVICE_RESTART = 1012
|
||||
TRY_AGAIN_LATER = 1013
|
||||
BAD_GATEWAY = 1014
|
||||
TLS_HANDSHAKE = 1015
|
||||
UNAUTHORIZED = 3000
|
||||
FORBIDDEN = 3003
|
||||
TIMEOUT = 3008
|
||||
|
||||
|
||||
class WebSocketError(CurlError):
|
||||
"""WebSocket-specific error."""
|
||||
|
||||
def __init__(
|
||||
self, message: str, code: Union[WsCloseCode, CurlECode, Literal[0]] = 0
|
||||
):
|
||||
super().__init__(message, code) # type: ignore
|
||||
|
||||
|
||||
class WebSocketClosed(WebSocketError, SessionClosed):
|
||||
"""WebSocket is already closed."""
|
||||
|
||||
|
||||
class WebSocketTimeout(WebSocketError, Timeout):
|
||||
"""WebSocket operation timed out."""
|
||||
|
||||
|
||||
async def aselect(
|
||||
fd,
|
||||
mode: Literal["read", "write"] = "read",
|
||||
*,
|
||||
loop: asyncio.AbstractEventLoop,
|
||||
timeout: Optional[float] = None,
|
||||
) -> bool:
|
||||
future = loop.create_future()
|
||||
|
||||
if mode == "read":
|
||||
loop.add_reader(fd, future.set_result, None)
|
||||
future.add_done_callback(lambda _: loop.remove_reader(fd))
|
||||
elif mode == "write":
|
||||
loop.add_writer(fd, future.set_result, None)
|
||||
future.add_done_callback(lambda _: loop.remove_writer(fd))
|
||||
else:
|
||||
raise ValueError(f"Invalid mode: {mode}. Must be 'read' or 'write'")
|
||||
|
||||
try:
|
||||
await asyncio.wait_for(future, timeout)
|
||||
except asyncio.TimeoutError:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class BaseWebSocket:
|
||||
def __init__(self, curl: Curl, *, autoclose: bool = True, debug: bool = False):
|
||||
self._curl: Curl = curl
|
||||
self.autoclose: bool = autoclose
|
||||
self._close_code: Optional[int] = None
|
||||
self._close_reason: Optional[str] = None
|
||||
self.debug = debug
|
||||
self.closed = False
|
||||
|
||||
@property
|
||||
def curl(self):
|
||||
if self._curl is not_set:
|
||||
self._curl = Curl(debug=self.debug)
|
||||
return self._curl
|
||||
|
||||
@property
|
||||
def close_code(self) -> Optional[int]:
|
||||
"""The WebSocket close code, if the connection has been closed."""
|
||||
return self._close_code
|
||||
|
||||
@property
|
||||
def close_reason(self) -> Optional[str]:
|
||||
"""The WebSocket close reason, if the connection has been closed."""
|
||||
return self._close_reason
|
||||
|
||||
@staticmethod
|
||||
def _pack_close_frame(code: int, reason: bytes) -> bytes:
|
||||
return struct.pack("!H", code) + reason
|
||||
|
||||
@staticmethod
|
||||
def _unpack_close_frame(frame: bytes) -> tuple[int, str]:
|
||||
if len(frame) < 2:
|
||||
code = WsCloseCode.UNKNOWN
|
||||
reason = ""
|
||||
else:
|
||||
try:
|
||||
code = struct.unpack_from("!H", frame)[0]
|
||||
reason = frame[2:].decode()
|
||||
except UnicodeDecodeError as e:
|
||||
raise WebSocketError(
|
||||
"Invalid close message", WsCloseCode.INVALID_DATA
|
||||
) from e
|
||||
except Exception as e:
|
||||
raise WebSocketError(
|
||||
"Invalid close frame", WsCloseCode.PROTOCOL_ERROR
|
||||
) from e
|
||||
else:
|
||||
if (
|
||||
code not in WsCloseCode._value2member_map_
|
||||
or code == WsCloseCode.UNKNOWN
|
||||
):
|
||||
raise WebSocketError(
|
||||
f"Invalid close code: {code}", WsCloseCode.PROTOCOL_ERROR
|
||||
)
|
||||
return code, reason
|
||||
|
||||
def terminate(self):
|
||||
"""Terminate the underlying connection."""
|
||||
self.closed = True
|
||||
self.curl.close()
|
||||
|
||||
|
||||
EventTypeLiteral = Literal["open", "close", "data", "message", "error"]
|
||||
|
||||
|
||||
class WebSocket(BaseWebSocket):
|
||||
"""A WebSocket implementation using libcurl."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
curl: Union[Curl, Any] = not_set,
|
||||
*,
|
||||
autoclose: bool = True,
|
||||
skip_utf8_validation: bool = False,
|
||||
debug: bool = False,
|
||||
on_open: Optional[ON_OPEN_T] = None,
|
||||
on_close: Optional[ON_CLOSE_T] = None,
|
||||
on_data: Optional[ON_DATA_T] = None,
|
||||
on_message: Optional[ON_MESSAGE_T] = None,
|
||||
on_error: Optional[ON_ERROR_T] = None,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
autoclose: whether to close the WebSocket after receiving a close frame.
|
||||
skip_utf8_validation: whether to skip UTF-8 validation for text frames in
|
||||
run_forever().
|
||||
debug: print extra curl debug info.
|
||||
|
||||
on_open: open callback, ``def on_open(ws)``
|
||||
on_close: close callback, ``def on_close(ws, code, reason)``
|
||||
on_data: raw data receive callback, ``def on_data(ws, data, frame)``
|
||||
on_message: message receive callback, ``def on_message(ws, message)``
|
||||
on_error: error callback, ``def on_error(ws, exception)``
|
||||
"""
|
||||
super().__init__(curl=curl, autoclose=autoclose, debug=debug)
|
||||
self.skip_utf8_validation = skip_utf8_validation
|
||||
|
||||
self._emitters: dict[EventTypeLiteral, Callable] = {}
|
||||
if on_open:
|
||||
self._emitters["open"] = on_open
|
||||
if on_close:
|
||||
self._emitters["close"] = on_close
|
||||
if on_data:
|
||||
self._emitters["data"] = on_data
|
||||
if on_message:
|
||||
self._emitters["message"] = on_message
|
||||
if on_error:
|
||||
self._emitters["error"] = on_error
|
||||
|
||||
def __iter__(self) -> WebSocket:
|
||||
if self.closed:
|
||||
raise WebSocketClosed("WebSocket is closed")
|
||||
return self
|
||||
|
||||
def __next__(self) -> bytes:
|
||||
msg, flags = self.recv()
|
||||
if flags & CurlWsFlag.CLOSE:
|
||||
raise StopIteration
|
||||
return msg
|
||||
|
||||
def _emit(self, event_type: EventTypeLiteral, *args) -> None:
|
||||
callback = self._emitters.get(event_type)
|
||||
if callback:
|
||||
try:
|
||||
callback(self, *args)
|
||||
except Exception as e:
|
||||
error_callback = self._emitters.get("error")
|
||||
if error_callback:
|
||||
error_callback(self, e)
|
||||
else:
|
||||
warnings.warn(
|
||||
f"WebSocket callback '{event_type}' failed",
|
||||
CurlCffiWarning,
|
||||
stacklevel=2,
|
||||
)
|
||||
|
||||
def connect(
|
||||
self,
|
||||
url: str,
|
||||
params: Optional[Union[dict, list, tuple]] = None,
|
||||
headers: Optional[HeaderTypes] = None,
|
||||
cookies: Optional[CookieTypes] = None,
|
||||
auth: Optional[tuple[str, str]] = None,
|
||||
timeout: Optional[Union[float, tuple[float, float], object]] = not_set,
|
||||
allow_redirects: bool = True,
|
||||
max_redirects: int = 30,
|
||||
proxies: Optional[ProxySpec] = None,
|
||||
proxy: Optional[str] = None,
|
||||
proxy_auth: Optional[tuple[str, str]] = None,
|
||||
verify: Optional[bool] = None,
|
||||
referer: Optional[str] = None,
|
||||
accept_encoding: Optional[str] = "gzip, deflate, br",
|
||||
impersonate: Optional[BrowserTypeLiteral] = None,
|
||||
ja3: Optional[str] = None,
|
||||
akamai: Optional[str] = None,
|
||||
extra_fp: Optional[Union[ExtraFingerprints, ExtraFpDict]] = None,
|
||||
default_headers: bool = True,
|
||||
quote: Union[str, Literal[False]] = "",
|
||||
http_version: Optional[CurlHttpVersion] = None,
|
||||
interface: Optional[str] = None,
|
||||
cert: Optional[Union[str, tuple[str, str]]] = None,
|
||||
max_recv_speed: int = 0,
|
||||
curl_options: Optional[dict[CurlOpt, str]] = None,
|
||||
):
|
||||
"""Connect to the WebSocket.
|
||||
|
||||
libcurl automatically handles pings and pongs.
|
||||
ref: https://curl.se/libcurl/c/libcurl-ws.html
|
||||
|
||||
Args:
|
||||
url: url for the requests.
|
||||
params: query string for the requests.
|
||||
headers: headers to send.
|
||||
cookies: cookies to use.
|
||||
auth: HTTP basic auth, a tuple of (username, password), only basic auth is
|
||||
supported.
|
||||
timeout: how many seconds to wait before giving up.
|
||||
allow_redirects: whether to allow redirection.
|
||||
max_redirects: max redirect counts, default 30, use -1 for unlimited.
|
||||
proxies: dict of proxies to use, prefer to use ``proxy`` if they are the
|
||||
same. format: ``{"http": proxy_url, "https": proxy_url}``.
|
||||
proxy: proxy to use, format: "http://user@pass:proxy_url".
|
||||
Can't be used with `proxies` parameter.
|
||||
proxy_auth: HTTP basic auth for proxy, a tuple of (username, password).
|
||||
verify: whether to verify https certs.
|
||||
referer: shortcut for setting referer header.
|
||||
accept_encoding: shortcut for setting accept-encoding header.
|
||||
impersonate: which browser version to impersonate.
|
||||
ja3: ja3 string to impersonate.
|
||||
akamai: akamai string to impersonate.
|
||||
extra_fp: extra fingerprints options, in complement to ja3 and akamai str.
|
||||
default_headers: whether to set default browser headers.
|
||||
default_encoding: encoding for decoding response content if charset is not
|
||||
found in headers. Defaults to "utf-8". Can be set to a callable for
|
||||
automatic detection.
|
||||
quote: Set characters to be quoted, i.e. percent-encoded. Default safe
|
||||
string is ``!#$%&'()*+,/:;=?@[]~``. If set to a sting, the character
|
||||
will be removed from the safe string, thus quoted. If set to False, the
|
||||
url will be kept as is, without any automatic percent-encoding, you must
|
||||
encode the URL yourself.
|
||||
curl_options: extra curl options to use.
|
||||
http_version: limiting http version, defaults to http2.
|
||||
interface: which interface to use.
|
||||
cert: a tuple of (cert, key) filenames for client cert.
|
||||
max_recv_speed: maximum receive speed, bytes per second.
|
||||
curl_options: extra curl options to use.
|
||||
"""
|
||||
|
||||
curl = self.curl
|
||||
|
||||
set_curl_options(
|
||||
curl=curl,
|
||||
method="GET",
|
||||
url=url,
|
||||
params_list=[None, params],
|
||||
headers_list=[None, headers],
|
||||
cookies_list=[None, cookies],
|
||||
auth=auth,
|
||||
timeout=timeout,
|
||||
allow_redirects=allow_redirects,
|
||||
max_redirects=max_redirects,
|
||||
proxies_list=[None, proxies],
|
||||
proxy=proxy,
|
||||
proxy_auth=proxy_auth,
|
||||
verify_list=[None, verify],
|
||||
referer=referer,
|
||||
accept_encoding=accept_encoding,
|
||||
impersonate=impersonate,
|
||||
ja3=ja3,
|
||||
akamai=akamai,
|
||||
extra_fp=extra_fp,
|
||||
default_headers=default_headers,
|
||||
quote=quote,
|
||||
http_version=http_version,
|
||||
interface=interface,
|
||||
max_recv_speed=max_recv_speed,
|
||||
cert=cert,
|
||||
curl_options=curl_options,
|
||||
)
|
||||
|
||||
# Magic number defined in: https://curl.se/docs/websocket.html
|
||||
curl.setopt(CurlOpt.CONNECT_ONLY, 2)
|
||||
curl.perform()
|
||||
return self
|
||||
|
||||
def recv_fragment(self) -> tuple[bytes, CurlWsFrame]:
|
||||
"""Receive a single curl websocket fragment as bytes."""
|
||||
|
||||
if self.closed:
|
||||
raise WebSocketClosed("WebSocket is already closed")
|
||||
|
||||
chunk, frame = self.curl.ws_recv()
|
||||
|
||||
if frame.flags & CurlWsFlag.CLOSE:
|
||||
try:
|
||||
self._close_code, self._close_reason = self._unpack_close_frame(chunk)
|
||||
except WebSocketError as e:
|
||||
# Follow the spec to close the connection
|
||||
# Errors do not respect autoclose
|
||||
self._close_code = e.code
|
||||
self.close(e.code)
|
||||
raise
|
||||
if self.autoclose:
|
||||
self.close()
|
||||
|
||||
return chunk, frame
|
||||
|
||||
def recv(self) -> tuple[bytes, int]:
|
||||
"""
|
||||
Receive a frame as bytes. libcurl splits frames into fragments, so we have to
|
||||
collect all the chunks for a frame.
|
||||
"""
|
||||
chunks = []
|
||||
flags = 0
|
||||
|
||||
sock_fd = self.curl.getinfo(CurlInfo.ACTIVESOCKET)
|
||||
if sock_fd == CURL_SOCKET_BAD:
|
||||
raise WebSocketError(
|
||||
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
|
||||
)
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Try to receive the first fragment first
|
||||
chunk, frame = self.recv_fragment()
|
||||
flags = frame.flags
|
||||
chunks.append(chunk)
|
||||
if frame.bytesleft == 0 and flags & CurlWsFlag.CONT == 0:
|
||||
break
|
||||
except CurlError as e:
|
||||
if e.code == CurlECode.AGAIN:
|
||||
# According to https://curl.se/libcurl/c/curl_ws_recv.html
|
||||
# > in real application: wait for socket here, e.g. using select()
|
||||
_, _, _ = select([sock_fd], [], [], 0.5)
|
||||
else:
|
||||
raise
|
||||
|
||||
return b"".join(chunks), flags
|
||||
|
||||
def recv_str(self) -> str:
|
||||
"""Receive a text frame."""
|
||||
data, flags = self.recv()
|
||||
if not (flags & CurlWsFlag.TEXT):
|
||||
raise WebSocketError("Not valid text frame", WsCloseCode.INVALID_DATA)
|
||||
return data.decode()
|
||||
|
||||
def recv_json(self, *, loads: Callable[[str], T] = loads) -> T:
|
||||
"""Receive a JSON frame.
|
||||
|
||||
Args:
|
||||
loads: JSON decoder, default is json.loads.
|
||||
"""
|
||||
data = self.recv_str()
|
||||
return loads(data)
|
||||
|
||||
def send(self, payload: Union[str, bytes], flags: CurlWsFlag = CurlWsFlag.BINARY):
|
||||
"""Send a data frame.
|
||||
|
||||
Args:
|
||||
payload: data to send.
|
||||
flags: flags for the frame.
|
||||
"""
|
||||
if flags & CurlWsFlag.CLOSE:
|
||||
self.keep_running = False
|
||||
|
||||
if self.closed:
|
||||
raise WebSocketClosed("WebSocket is already closed")
|
||||
|
||||
# curl expects bytes
|
||||
if isinstance(payload, str):
|
||||
payload = payload.encode()
|
||||
|
||||
sock_fd = self.curl.getinfo(CurlInfo.ACTIVESOCKET)
|
||||
if sock_fd == CURL_SOCKET_BAD:
|
||||
raise WebSocketError(
|
||||
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
|
||||
)
|
||||
|
||||
# Loop checks for CurlECode.Again
|
||||
# https://curl.se/libcurl/c/curl_ws_send.html
|
||||
offset = 0
|
||||
while offset < len(payload):
|
||||
current_buffer = payload[offset:]
|
||||
|
||||
try:
|
||||
n_sent = self.curl.ws_send(current_buffer, flags)
|
||||
except CurlError as e:
|
||||
if e.code == CurlECode.AGAIN:
|
||||
_, writeable, _ = select([], [sock_fd], [], 0.5)
|
||||
if not writeable:
|
||||
raise WebSocketError("Socket write timeout") from e
|
||||
continue
|
||||
raise
|
||||
|
||||
offset += n_sent
|
||||
|
||||
return offset
|
||||
|
||||
def send_binary(self, payload: bytes):
|
||||
"""Send a binary frame.
|
||||
|
||||
Args:
|
||||
payload: binary data to send.
|
||||
"""
|
||||
return self.send(payload, CurlWsFlag.BINARY)
|
||||
|
||||
def send_bytes(self, payload: bytes):
|
||||
"""Send a binary frame, alias of :meth:`send_binary`.
|
||||
|
||||
Args:
|
||||
payload: binary data to send.
|
||||
"""
|
||||
return self.send(payload, CurlWsFlag.BINARY)
|
||||
|
||||
def send_str(self, payload: str):
|
||||
"""Send a text frame.
|
||||
|
||||
Args:
|
||||
payload: text data to send.
|
||||
"""
|
||||
return self.send(payload, CurlWsFlag.TEXT)
|
||||
|
||||
def send_json(self, payload: Any, *, dumps: Callable[[Any], str] = dumps):
|
||||
"""Send a JSON frame.
|
||||
|
||||
Args:
|
||||
payload: data to send.
|
||||
dumps: JSON encoder, default is json.dumps.
|
||||
"""
|
||||
return self.send_str(dumps(payload))
|
||||
|
||||
def ping(self, payload: Union[str, bytes]):
|
||||
"""Send a ping frame.
|
||||
|
||||
Args:
|
||||
payload: data to send.
|
||||
"""
|
||||
return self.send(payload, CurlWsFlag.PING)
|
||||
|
||||
def run_forever(self, url: str = "", **kwargs):
|
||||
"""Run the WebSocket forever. See :meth:`connect` for details on parameters.
|
||||
|
||||
libcurl automatically handles pings and pongs.
|
||||
ref: https://curl.se/libcurl/c/libcurl-ws.html
|
||||
"""
|
||||
|
||||
if url:
|
||||
self.connect(url, **kwargs)
|
||||
|
||||
sock_fd = self.curl.getinfo(CurlInfo.ACTIVESOCKET)
|
||||
if sock_fd == CURL_SOCKET_BAD:
|
||||
raise WebSocketError(
|
||||
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
|
||||
)
|
||||
|
||||
self._emit("open")
|
||||
|
||||
# Keep reading the messages and invoke callbacks
|
||||
# TODO: Reconnect logic
|
||||
chunks = []
|
||||
self.keep_running = True
|
||||
while self.keep_running:
|
||||
try:
|
||||
chunk, frame = self.recv_fragment()
|
||||
flags = frame.flags
|
||||
self._emit("data", chunk, frame)
|
||||
|
||||
chunks.append(chunk)
|
||||
if not (frame.bytesleft == 0 and flags & CurlWsFlag.CONT == 0):
|
||||
continue
|
||||
|
||||
# Avoid unnecessary computation
|
||||
if "message" in self._emitters:
|
||||
# Concatenate collected chunks with the final message
|
||||
msg = b"".join(chunks)
|
||||
|
||||
if (flags & CurlWsFlag.TEXT) and not self.skip_utf8_validation:
|
||||
try:
|
||||
msg = msg.decode() # type: ignore
|
||||
except UnicodeDecodeError as e:
|
||||
self._close_code = WsCloseCode.INVALID_DATA
|
||||
self.close(WsCloseCode.INVALID_DATA)
|
||||
raise WebSocketError(
|
||||
"Invalid UTF-8", WsCloseCode.INVALID_DATA
|
||||
) from e
|
||||
|
||||
if (flags & CurlWsFlag.BINARY) or (flags & CurlWsFlag.TEXT):
|
||||
self._emit("message", msg)
|
||||
|
||||
chunks = [] # Reset chunks for next message
|
||||
|
||||
if flags & CurlWsFlag.CLOSE:
|
||||
self.keep_running = False
|
||||
self._emit("close", self._close_code or 0, self._close_reason or "")
|
||||
|
||||
except CurlError as e:
|
||||
if e.code == CurlECode.AGAIN:
|
||||
_, _, _ = select([sock_fd], [], [], 0.5)
|
||||
else:
|
||||
self._emit("error", e)
|
||||
if not self.closed:
|
||||
code = WsCloseCode.UNKNOWN
|
||||
if isinstance(e, WebSocketError):
|
||||
code = e.code
|
||||
self.close(code)
|
||||
raise
|
||||
|
||||
def close(self, code: int = WsCloseCode.OK, message: bytes = b""):
|
||||
"""Close the connection.
|
||||
|
||||
Args:
|
||||
code: close code.
|
||||
message: close reason.
|
||||
"""
|
||||
if self.curl is not_set:
|
||||
return
|
||||
|
||||
# TODO: As per spec, we should wait for the server to close the connection
|
||||
# But this is not a requirement
|
||||
msg = self._pack_close_frame(code, message)
|
||||
self.send(msg, CurlWsFlag.CLOSE)
|
||||
# The only way to close the connection appears to be curl_easy_cleanup
|
||||
self.terminate()
|
||||
|
||||
|
||||
class AsyncWebSocket(BaseWebSocket):
|
||||
"""An async WebSocket implementation using libcurl."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
session: AsyncSession,
|
||||
curl: Curl,
|
||||
*,
|
||||
autoclose: bool = True,
|
||||
debug: bool = False,
|
||||
):
|
||||
super().__init__(curl=curl, autoclose=autoclose, debug=debug)
|
||||
self.session = session
|
||||
self._loop: Optional[asyncio.AbstractEventLoop] = None
|
||||
self._recv_lock = asyncio.Lock()
|
||||
self._send_lock = asyncio.Lock()
|
||||
|
||||
@property
|
||||
def loop(self):
|
||||
if self._loop is None:
|
||||
self._loop = get_selector(asyncio.get_running_loop())
|
||||
return self._loop
|
||||
|
||||
def __aiter__(self) -> Self:
|
||||
if self.closed:
|
||||
raise WebSocketClosed("WebSocket has been closed")
|
||||
return self
|
||||
|
||||
async def __anext__(self) -> bytes:
|
||||
msg, flags = await self.recv()
|
||||
if flags & CurlWsFlag.CLOSE:
|
||||
raise StopAsyncIteration
|
||||
return msg
|
||||
|
||||
async def recv_fragment(
|
||||
self, *, timeout: Optional[float] = None
|
||||
) -> tuple[bytes, CurlWsFrame]:
|
||||
"""Receive a single frame as bytes.
|
||||
|
||||
Args:
|
||||
timeout: how many seconds to wait before giving up.
|
||||
"""
|
||||
if self.closed:
|
||||
raise WebSocketClosed("WebSocket is closed")
|
||||
if self._recv_lock.locked():
|
||||
raise TypeError("Concurrent call to recv_fragment() is not allowed")
|
||||
|
||||
async with self._recv_lock:
|
||||
try:
|
||||
chunk, frame = await asyncio.wait_for(
|
||||
self.loop.run_in_executor(None, self.curl.ws_recv), timeout
|
||||
)
|
||||
except asyncio.TimeoutError as e:
|
||||
raise WebSocketTimeout("WebSocket recv_fragment() timed out") from e
|
||||
if frame.flags & CurlWsFlag.CLOSE:
|
||||
try:
|
||||
code, message = self._close_code, self._close_reason = (
|
||||
self._unpack_close_frame(chunk)
|
||||
)
|
||||
except WebSocketError as e:
|
||||
# Follow the spec to close the connection
|
||||
# Errors do not respect autoclose
|
||||
self._close_code = e.code
|
||||
await self.close(e.code)
|
||||
raise
|
||||
if self.autoclose:
|
||||
await self.close(code, message.encode())
|
||||
|
||||
return chunk, frame
|
||||
|
||||
async def recv(self, *, timeout: Optional[float] = None) -> tuple[bytes, int]:
|
||||
"""
|
||||
Receive a frame as bytes. libcurl splits frames into fragments, so we have to
|
||||
collect all the chunks for a frame.
|
||||
|
||||
Args:
|
||||
timeout: how many seconds to wait before giving up.
|
||||
"""
|
||||
loop = self.loop
|
||||
chunks = []
|
||||
flags = 0
|
||||
|
||||
sock_fd = await loop.run_in_executor(
|
||||
None, self.curl.getinfo, CurlInfo.ACTIVESOCKET
|
||||
)
|
||||
if sock_fd == CURL_SOCKET_BAD:
|
||||
raise WebSocketError(
|
||||
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
|
||||
)
|
||||
while True:
|
||||
try:
|
||||
chunk, frame = await self.recv_fragment(timeout=timeout)
|
||||
flags = frame.flags
|
||||
chunks.append(chunk)
|
||||
if frame.bytesleft == 0 and flags & CurlWsFlag.CONT == 0:
|
||||
break
|
||||
except CurlError as e:
|
||||
if e.code == CurlECode.AGAIN:
|
||||
await aselect(sock_fd, loop=loop, timeout=timeout)
|
||||
else:
|
||||
raise
|
||||
|
||||
return b"".join(chunks), flags
|
||||
|
||||
async def recv_str(self, *, timeout: Optional[float] = None) -> str:
|
||||
"""Receive a text frame.
|
||||
|
||||
Args:
|
||||
timeout: how many seconds to wait before giving up.
|
||||
"""
|
||||
data, flags = await self.recv(timeout=timeout)
|
||||
if not (flags & CurlWsFlag.TEXT):
|
||||
raise WebSocketError("Invalid UTF-8", WsCloseCode.INVALID_DATA)
|
||||
return data.decode()
|
||||
|
||||
async def recv_json(
|
||||
self, *, loads: Callable[[str], T] = loads, timeout: Optional[float] = None
|
||||
) -> T:
|
||||
"""Receive a JSON frame.
|
||||
|
||||
Args:
|
||||
loads: JSON decoder, default is json.loads.
|
||||
timeout: how many seconds to wait before giving up.
|
||||
"""
|
||||
data = await self.recv_str(timeout=timeout)
|
||||
return loads(data)
|
||||
|
||||
async def send(
|
||||
self, payload: Union[str, bytes], flags: CurlWsFlag = CurlWsFlag.BINARY
|
||||
):
|
||||
"""Send a data frame.
|
||||
|
||||
Args:
|
||||
payload: data to send.
|
||||
flags: flags for the frame.
|
||||
"""
|
||||
if self.closed:
|
||||
raise WebSocketClosed("WebSocket is closed")
|
||||
|
||||
# curl expects bytes
|
||||
if isinstance(payload, str):
|
||||
payload = payload.encode()
|
||||
|
||||
sock_fd = await self.loop.run_in_executor(
|
||||
None, self.curl.getinfo, CurlInfo.ACTIVESOCKET
|
||||
)
|
||||
if sock_fd == CURL_SOCKET_BAD:
|
||||
raise WebSocketError(
|
||||
"Invalid active socket", CurlECode.NO_CONNECTION_AVAILABLE
|
||||
)
|
||||
|
||||
# TODO: Why does concurrently sending fail
|
||||
async with self._send_lock:
|
||||
offset = 0
|
||||
|
||||
# Loop checks for CurlECode.Again
|
||||
# https://curl.se/libcurl/c/curl_ws_send.html
|
||||
while offset < len(payload):
|
||||
current_buffer = payload[offset:]
|
||||
|
||||
try:
|
||||
n_sent = await self.loop.run_in_executor(
|
||||
None, self.curl.ws_send, current_buffer, flags
|
||||
)
|
||||
except CurlError as e:
|
||||
if e.code == CurlECode.AGAIN:
|
||||
writeable = await aselect(
|
||||
sock_fd, mode="write", loop=self.loop, timeout=0.5
|
||||
)
|
||||
if not writeable:
|
||||
raise WebSocketError("Socket write timeout") from e
|
||||
continue
|
||||
raise
|
||||
|
||||
offset += n_sent
|
||||
|
||||
return offset
|
||||
|
||||
async def send_binary(self, payload: bytes):
|
||||
"""Send a binary frame.
|
||||
|
||||
Args:
|
||||
payload: binary data to send.
|
||||
"""
|
||||
return await self.send(payload, CurlWsFlag.BINARY)
|
||||
|
||||
async def send_bytes(self, payload: bytes):
|
||||
"""Send a binary frame, alias of :meth:`send_binary`.
|
||||
|
||||
Args:
|
||||
payload: binary data to send.
|
||||
"""
|
||||
return await self.send(payload, CurlWsFlag.BINARY)
|
||||
|
||||
async def send_str(self, payload: str):
|
||||
"""Send a text frame.
|
||||
|
||||
Args:
|
||||
payload: text data to send.
|
||||
"""
|
||||
return await self.send(payload, CurlWsFlag.TEXT)
|
||||
|
||||
async def send_json(self, payload: Any, *, dumps: Callable[[Any], str] = dumps):
|
||||
"""Send a JSON frame.
|
||||
|
||||
Args:
|
||||
payload: data to send.
|
||||
dumps: JSON encoder, default is json.dumps.
|
||||
"""
|
||||
return await self.send_str(dumps(payload))
|
||||
|
||||
async def ping(self, payload: Union[str, bytes]):
|
||||
"""Send a ping frame.
|
||||
|
||||
Args:
|
||||
payload: data to send.
|
||||
"""
|
||||
return await self.send(payload, CurlWsFlag.PING)
|
||||
|
||||
async def close(self, code: int = WsCloseCode.OK, message: bytes = b""):
|
||||
"""Close the connection.
|
||||
|
||||
Args:
|
||||
code: close code.
|
||||
message: close reason.
|
||||
"""
|
||||
# TODO: As per spec, we should wait for the server to close the connection
|
||||
# But this is not a requirement
|
||||
msg = self._pack_close_frame(code, message)
|
||||
await self.send(msg, CurlWsFlag.CLOSE)
|
||||
# The only way to close the connection appears to be curl_easy_cleanup
|
||||
self.terminate()
|
||||
|
||||
def terminate(self):
|
||||
"""Terminate the underlying connection."""
|
||||
super().terminate()
|
||||
if not self.session._closed:
|
||||
# WebSocket curls CANNOT be reused
|
||||
self.session.push_curl(None)
|
||||
@@ -0,0 +1,16 @@
|
||||
import warnings
|
||||
|
||||
|
||||
class CurlCffiWarning(UserWarning, RuntimeWarning):
|
||||
pass
|
||||
|
||||
|
||||
def config_warnings(on: bool = False):
|
||||
if on:
|
||||
warnings.simplefilter("default", category=CurlCffiWarning)
|
||||
else:
|
||||
warnings.simplefilter("ignore", category=CurlCffiWarning)
|
||||
|
||||
|
||||
def is_pro():
|
||||
return False
|
||||
Reference in New Issue
Block a user