std/sys/thread/
unix.rs

1#[cfg(not(any(
2    target_env = "newlib",
3    target_os = "l4re",
4    target_os = "emscripten",
5    target_os = "redox",
6    target_os = "hurd",
7    target_os = "aix",
8    target_os = "wasi",
9)))]
10use crate::ffi::CStr;
11use crate::mem::{self, DropGuard, ManuallyDrop};
12use crate::num::NonZero;
13#[cfg(all(target_os = "linux", target_env = "gnu"))]
14use crate::sys::weak::dlsym;
15#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
16use crate::sys::weak::weak;
17use crate::sys::{os, stack_overflow};
18use crate::thread::ThreadInit;
19use crate::time::Duration;
20use crate::{cmp, io, ptr};
21#[cfg(not(any(
22    target_os = "l4re",
23    target_os = "vxworks",
24    target_os = "espidf",
25    target_os = "nuttx"
26)))]
27pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024;
28#[cfg(target_os = "l4re")]
29pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
30#[cfg(target_os = "vxworks")]
31pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
32#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
33pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
34
35pub struct Thread {
36    id: libc::pthread_t,
37}
38
39// Some platforms may have pthread_t as a pointer in which case we still want
40// a thread to be Send/Sync
41unsafe impl Send for Thread {}
42unsafe impl Sync for Thread {}
43
44impl Thread {
45    // unsafe: see thread::Builder::spawn_unchecked for safety requirements
46    #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
47    pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
48        let data = init;
49        let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
50        assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
51        let mut attr = DropGuard::new(&mut attr, |attr| {
52            assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0)
53        });
54
55        #[cfg(any(target_os = "espidf", target_os = "nuttx"))]
56        if stack > 0 {
57            // Only set the stack if a non-zero value is passed
58            // 0 is used as an indication that the default stack size configured in the ESP-IDF/NuttX menuconfig system should be used
59            assert_eq!(
60                libc::pthread_attr_setstacksize(
61                    attr.as_mut_ptr(),
62                    cmp::max(stack, min_stack_size(attr.as_ptr()))
63                ),
64                0
65            );
66        }
67
68        #[cfg(not(any(target_os = "espidf", target_os = "nuttx")))]
69        {
70            let stack_size = cmp::max(stack, min_stack_size(attr.as_ptr()));
71
72            match libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) {
73                0 => {}
74                n => {
75                    assert_eq!(n, libc::EINVAL);
76                    // EINVAL means |stack_size| is either too small or not a
77                    // multiple of the system page size. Because it's definitely
78                    // >= PTHREAD_STACK_MIN, it must be an alignment issue.
79                    // Round up to the nearest page and try again.
80                    let page_size = os::page_size();
81                    let stack_size =
82                        (stack_size + page_size - 1) & (-(page_size as isize - 1) as usize - 1);
83
84                    // Some libc implementations, e.g. musl, place an upper bound
85                    // on the stack size, in which case we can only gracefully return
86                    // an error here.
87                    if libc::pthread_attr_setstacksize(attr.as_mut_ptr(), stack_size) != 0 {
88                        return Err(io::const_error!(
89                            io::ErrorKind::InvalidInput,
90                            "invalid stack size"
91                        ));
92                    }
93                }
94            };
95        }
96
97        let data = Box::into_raw(data);
98        let mut native: libc::pthread_t = mem::zeroed();
99        let ret = libc::pthread_create(&mut native, attr.as_ptr(), thread_start, data as *mut _);
100        return if ret == 0 {
101            Ok(Thread { id: native })
102        } else {
103            // The thread failed to start and as a result `data` was not consumed.
104            // Therefore, it is safe to reconstruct the box so that it gets deallocated.
105            drop(Box::from_raw(data));
106            Err(io::Error::from_raw_os_error(ret))
107        };
108
109        extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
110            unsafe {
111                // SAFETY: we are simply recreating the box that was leaked earlier.
112                let init = Box::from_raw(data as *mut ThreadInit);
113                let rust_start = init.init();
114
115                // Now that the thread information is set, set up our stack
116                // overflow handler.
117                let _handler = stack_overflow::Handler::new();
118
119                rust_start();
120            }
121            ptr::null_mut()
122        }
123    }
124
125    pub fn join(self) {
126        let id = self.into_id();
127        let ret = unsafe { libc::pthread_join(id, ptr::null_mut()) };
128        assert!(ret == 0, "failed to join thread: {}", io::Error::from_raw_os_error(ret));
129    }
130
131    #[cfg(not(target_os = "wasi"))]
132    pub fn id(&self) -> libc::pthread_t {
133        self.id
134    }
135
136    pub fn into_id(self) -> libc::pthread_t {
137        ManuallyDrop::new(self).id
138    }
139}
140
141impl Drop for Thread {
142    fn drop(&mut self) {
143        let ret = unsafe { libc::pthread_detach(self.id) };
144        debug_assert_eq!(ret, 0);
145    }
146}
147
148pub fn available_parallelism() -> io::Result<NonZero<usize>> {
149    cfg_select! {
150        any(
151            target_os = "android",
152            target_os = "emscripten",
153            target_os = "fuchsia",
154            target_os = "hurd",
155            target_os = "linux",
156            target_os = "aix",
157            target_vendor = "apple",
158            target_os = "cygwin",
159        ) => {
160            #[allow(unused_assignments)]
161            #[allow(unused_mut)]
162            let mut quota = usize::MAX;
163
164            #[cfg(any(target_os = "android", target_os = "linux"))]
165            {
166                quota = cgroups::quota().max(1);
167                let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
168                unsafe {
169                    if libc::sched_getaffinity(0, size_of::<libc::cpu_set_t>(), &mut set) == 0 {
170                        let count = libc::CPU_COUNT(&set) as usize;
171                        let count = count.min(quota);
172
173                        // According to sched_getaffinity's API it should always be non-zero, but
174                        // some old MIPS kernels were buggy and zero-initialized the mask if
175                        // none was explicitly set.
176                        // In that case we use the sysconf fallback.
177                        if let Some(count) = NonZero::new(count) {
178                            return Ok(count)
179                        }
180                    }
181                }
182            }
183            match unsafe { libc::sysconf(libc::_SC_NPROCESSORS_ONLN) } {
184                -1 => Err(io::Error::last_os_error()),
185                0 => Err(io::Error::UNKNOWN_THREAD_COUNT),
186                cpus => {
187                    let count = cpus as usize;
188                    // Cover the unusual situation where we were able to get the quota but not the affinity mask
189                    let count = count.min(quota);
190                    Ok(unsafe { NonZero::new_unchecked(count) })
191                }
192            }
193        }
194        any(
195           target_os = "freebsd",
196           target_os = "dragonfly",
197           target_os = "openbsd",
198           target_os = "netbsd",
199        ) => {
200            use crate::ptr;
201
202            #[cfg(target_os = "freebsd")]
203            {
204                let mut set: libc::cpuset_t = unsafe { mem::zeroed() };
205                unsafe {
206                    if libc::cpuset_getaffinity(
207                        libc::CPU_LEVEL_WHICH,
208                        libc::CPU_WHICH_PID,
209                        -1,
210                        size_of::<libc::cpuset_t>(),
211                        &mut set,
212                    ) == 0 {
213                        let count = libc::CPU_COUNT(&set) as usize;
214                        if count > 0 {
215                            return Ok(NonZero::new_unchecked(count));
216                        }
217                    }
218                }
219            }
220
221            #[cfg(target_os = "netbsd")]
222            {
223                unsafe {
224                    let set = libc::_cpuset_create();
225                    if !set.is_null() {
226                        let mut count: usize = 0;
227                        if libc::pthread_getaffinity_np(libc::pthread_self(), libc::_cpuset_size(set), set) == 0 {
228                            for i in 0..libc::cpuid_t::MAX {
229                                match libc::_cpuset_isset(i, set) {
230                                    -1 => break,
231                                    0 => continue,
232                                    _ => count = count + 1,
233                                }
234                            }
235                        }
236                        libc::_cpuset_destroy(set);
237                        if let Some(count) = NonZero::new(count) {
238                            return Ok(count);
239                        }
240                    }
241                }
242            }
243
244            let mut cpus: libc::c_uint = 0;
245            let mut cpus_size = size_of_val(&cpus);
246
247            unsafe {
248                cpus = libc::sysconf(libc::_SC_NPROCESSORS_ONLN) as libc::c_uint;
249            }
250
251            // Fallback approach in case of errors or no hardware threads.
252            if cpus < 1 {
253                let mut mib = [libc::CTL_HW, libc::HW_NCPU, 0, 0];
254                let res = unsafe {
255                    libc::sysctl(
256                        mib.as_mut_ptr(),
257                        2,
258                        (&raw mut cpus) as *mut _,
259                        (&raw mut cpus_size) as *mut _,
260                        ptr::null_mut(),
261                        0,
262                    )
263                };
264
265                // Handle errors if any.
266                if res == -1 {
267                    return Err(io::Error::last_os_error());
268                } else if cpus == 0 {
269                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
270                }
271            }
272
273            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
274        }
275        target_os = "nto" => {
276            unsafe {
277                use libc::_syspage_ptr;
278                if _syspage_ptr.is_null() {
279                    Err(io::const_error!(io::ErrorKind::NotFound, "no syspage available"))
280                } else {
281                    let cpus = (*_syspage_ptr).num_cpu;
282                    NonZero::new(cpus as usize)
283                        .ok_or(io::Error::UNKNOWN_THREAD_COUNT)
284                }
285            }
286        }
287        any(target_os = "solaris", target_os = "illumos") => {
288            let mut cpus = 0u32;
289            if unsafe { libc::pset_info(libc::PS_MYID, core::ptr::null_mut(), &mut cpus, core::ptr::null_mut()) } != 0 {
290                return Err(io::Error::UNKNOWN_THREAD_COUNT);
291            }
292            Ok(unsafe { NonZero::new_unchecked(cpus as usize) })
293        }
294        target_os = "haiku" => {
295            // system_info cpu_count field gets the static data set at boot time with `smp_set_num_cpus`
296            // `get_system_info` calls then `smp_get_num_cpus`
297            unsafe {
298                let mut sinfo: libc::system_info = crate::mem::zeroed();
299                let res = libc::get_system_info(&mut sinfo);
300
301                if res != libc::B_OK {
302                    return Err(io::Error::UNKNOWN_THREAD_COUNT);
303                }
304
305                Ok(NonZero::new_unchecked(sinfo.cpu_count as usize))
306            }
307        }
308        target_os = "vxworks" => {
309            // Note: there is also `vxCpuConfiguredGet`, closer to _SC_NPROCESSORS_CONF
310            // expectations than the actual cores availability.
311
312            // SAFETY: `vxCpuEnabledGet` always fetches a mask with at least one bit set
313            unsafe{
314                let set = libc::vxCpuEnabledGet();
315                Ok(NonZero::new_unchecked(set.count_ones() as usize))
316            }
317        }
318        _ => {
319            // FIXME: implement on Redox, l4re
320            Err(io::const_error!(io::ErrorKind::Unsupported, "getting the number of hardware threads is not supported on the target platform"))
321        }
322    }
323}
324
325pub fn current_os_id() -> Option<u64> {
326    // Most Unix platforms have a way to query an integer ID of the current thread, all with
327    // slightly different spellings.
328    //
329    // The OS thread ID is used rather than `pthread_self` so as to match what will be displayed
330    // for process inspection (debuggers, trace, `top`, etc.).
331    cfg_select! {
332        // Most platforms have a function returning a `pid_t` or int, which is an `i32`.
333        any(target_os = "android", target_os = "linux") => {
334            use crate::sys::pal::weak::syscall;
335
336            // `libc::gettid` is only available on glibc 2.30+, but the syscall is available
337            // since Linux 2.4.11.
338            syscall!(fn gettid() -> libc::pid_t;);
339
340            // SAFETY: FFI call with no preconditions.
341            let id: libc::pid_t = unsafe { gettid() };
342            Some(id as u64)
343        }
344        target_os = "nto" => {
345            // SAFETY: FFI call with no preconditions.
346            let id: libc::pid_t = unsafe { libc::gettid() };
347            Some(id as u64)
348        }
349        target_os = "openbsd" => {
350            // SAFETY: FFI call with no preconditions.
351            let id: libc::pid_t = unsafe { libc::getthrid() };
352            Some(id as u64)
353        }
354        target_os = "freebsd" => {
355            // SAFETY: FFI call with no preconditions.
356            let id: libc::c_int = unsafe { libc::pthread_getthreadid_np() };
357            Some(id as u64)
358        }
359        target_os = "netbsd" => {
360            // SAFETY: FFI call with no preconditions.
361            let id: libc::lwpid_t = unsafe { libc::_lwp_self() };
362            Some(id as u64)
363        }
364        any(target_os = "illumos", target_os = "solaris") => {
365            // On Illumos and Solaris, the `pthread_t` is the same as the OS thread ID.
366            // SAFETY: FFI call with no preconditions.
367            let id: libc::pthread_t = unsafe { libc::pthread_self() };
368            Some(id as u64)
369        }
370        target_vendor = "apple" => {
371            // Apple allows querying arbitrary thread IDs, `thread=NULL` queries the current thread.
372            let mut id = 0u64;
373            // SAFETY: `thread_id` is a valid pointer, no other preconditions.
374            let status: libc::c_int = unsafe { libc::pthread_threadid_np(0, &mut id) };
375            if status == 0 {
376                Some(id)
377            } else {
378                None
379            }
380        }
381        // Other platforms don't have an OS thread ID or don't have a way to access it.
382        _ => None,
383    }
384}
385
386#[cfg(any(
387    target_os = "linux",
388    target_os = "nto",
389    target_os = "solaris",
390    target_os = "illumos",
391    target_os = "vxworks",
392    target_os = "cygwin",
393    target_vendor = "apple",
394))]
395fn truncate_cstr<const MAX_WITH_NUL: usize>(cstr: &CStr) -> [libc::c_char; MAX_WITH_NUL] {
396    let mut result = [0; MAX_WITH_NUL];
397    for (src, dst) in cstr.to_bytes().iter().zip(&mut result[..MAX_WITH_NUL - 1]) {
398        *dst = *src as libc::c_char;
399    }
400    result
401}
402
403#[cfg(target_os = "android")]
404pub fn set_name(name: &CStr) {
405    const PR_SET_NAME: libc::c_int = 15;
406    unsafe {
407        let res = libc::prctl(
408            PR_SET_NAME,
409            name.as_ptr(),
410            0 as libc::c_ulong,
411            0 as libc::c_ulong,
412            0 as libc::c_ulong,
413        );
414        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
415        debug_assert_eq!(res, 0);
416    }
417}
418
419#[cfg(any(
420    target_os = "linux",
421    target_os = "freebsd",
422    target_os = "dragonfly",
423    target_os = "nuttx",
424    target_os = "cygwin"
425))]
426pub fn set_name(name: &CStr) {
427    unsafe {
428        cfg_select! {
429            any(target_os = "linux", target_os = "cygwin") => {
430                // Linux and Cygwin limits the allowed length of the name.
431                const TASK_COMM_LEN: usize = 16;
432                let name = truncate_cstr::<{ TASK_COMM_LEN }>(name);
433            }
434            _ => {
435                // FreeBSD, DragonFly BSD and NuttX do not enforce length limits.
436            }
437        };
438        // Available since glibc 2.12, musl 1.1.16, and uClibc 1.0.20 for Linux,
439        // FreeBSD 12.2 and 13.0, and DragonFly BSD 6.0.
440        let res = libc::pthread_setname_np(libc::pthread_self(), name.as_ptr());
441        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
442        debug_assert_eq!(res, 0);
443    }
444}
445
446#[cfg(target_os = "openbsd")]
447pub fn set_name(name: &CStr) {
448    unsafe {
449        libc::pthread_set_name_np(libc::pthread_self(), name.as_ptr());
450    }
451}
452
453#[cfg(target_vendor = "apple")]
454pub fn set_name(name: &CStr) {
455    unsafe {
456        let name = truncate_cstr::<{ libc::MAXTHREADNAMESIZE }>(name);
457        let res = libc::pthread_setname_np(name.as_ptr());
458        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
459        debug_assert_eq!(res, 0);
460    }
461}
462
463#[cfg(target_os = "netbsd")]
464pub fn set_name(name: &CStr) {
465    unsafe {
466        let res = libc::pthread_setname_np(
467            libc::pthread_self(),
468            c"%s".as_ptr(),
469            name.as_ptr() as *mut libc::c_void,
470        );
471        debug_assert_eq!(res, 0);
472    }
473}
474
475#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto"))]
476pub fn set_name(name: &CStr) {
477    weak!(
478        fn pthread_setname_np(thread: libc::pthread_t, name: *const libc::c_char) -> libc::c_int;
479    );
480
481    if let Some(f) = pthread_setname_np.get() {
482        #[cfg(target_os = "nto")]
483        const THREAD_NAME_MAX: usize = libc::_NTO_THREAD_NAME_MAX as usize;
484        #[cfg(any(target_os = "solaris", target_os = "illumos"))]
485        const THREAD_NAME_MAX: usize = 32;
486
487        let name = truncate_cstr::<{ THREAD_NAME_MAX }>(name);
488        let res = unsafe { f(libc::pthread_self(), name.as_ptr()) };
489        debug_assert_eq!(res, 0);
490    }
491}
492
493#[cfg(target_os = "fuchsia")]
494pub fn set_name(name: &CStr) {
495    use crate::sys::pal::fuchsia::*;
496    unsafe {
497        zx_object_set_property(
498            zx_thread_self(),
499            ZX_PROP_NAME,
500            name.as_ptr() as *const libc::c_void,
501            name.to_bytes().len(),
502        );
503    }
504}
505
506#[cfg(target_os = "haiku")]
507pub fn set_name(name: &CStr) {
508    unsafe {
509        let thread_self = libc::find_thread(ptr::null_mut());
510        let res = libc::rename_thread(thread_self, name.as_ptr());
511        // We have no good way of propagating errors here, but in debug-builds let's check that this actually worked.
512        debug_assert_eq!(res, libc::B_OK);
513    }
514}
515
516#[cfg(target_os = "vxworks")]
517pub fn set_name(name: &CStr) {
518    let mut name = truncate_cstr::<{ (libc::VX_TASK_RENAME_LENGTH - 1) as usize }>(name);
519    let res = unsafe { libc::taskNameSet(libc::taskIdSelf(), name.as_mut_ptr()) };
520    debug_assert_eq!(res, libc::OK);
521}
522
523#[cfg(not(any(target_os = "espidf", target_os = "wasi")))]
524pub fn sleep(dur: Duration) {
525    let mut secs = dur.as_secs();
526    let mut nsecs = dur.subsec_nanos() as _;
527
528    // If we're awoken with a signal then the return value will be -1 and
529    // nanosleep will fill in `ts` with the remaining time.
530    unsafe {
531        while secs > 0 || nsecs > 0 {
532            let mut ts = libc::timespec {
533                tv_sec: cmp::min(libc::time_t::MAX as u64, secs) as libc::time_t,
534                tv_nsec: nsecs,
535            };
536            secs -= ts.tv_sec as u64;
537            let ts_ptr = &raw mut ts;
538            if libc::nanosleep(ts_ptr, ts_ptr) == -1 {
539                assert_eq!(os::errno(), libc::EINTR);
540                secs += ts.tv_sec as u64;
541                nsecs = ts.tv_nsec;
542            } else {
543                nsecs = 0;
544            }
545        }
546    }
547}
548
549#[cfg(any(
550    target_os = "espidf",
551    // wasi-libc prior to WebAssembly/wasi-libc#696 has a broken implementation
552    // of `nanosleep`, used above by most platforms, so use `usleep` until
553    // that fix propagates throughout the ecosystem.
554    target_os = "wasi",
555))]
556pub fn sleep(dur: Duration) {
557    // ESP-IDF does not have `nanosleep`, so we use `usleep` instead.
558    // As per the documentation of `usleep`, it is expected to support
559    // sleep times as big as at least up to 1 second.
560    //
561    // ESP-IDF does support almost up to `u32::MAX`, but due to a potential integer overflow in its
562    // `usleep` implementation
563    // (https://github.com/espressif/esp-idf/blob/d7ca8b94c852052e3bc33292287ef4dd62c9eeb1/components/newlib/time.c#L210),
564    // we limit the sleep time to the maximum one that would not cause the underlying `usleep` implementation to overflow
565    // (`portTICK_PERIOD_MS` can be anything between 1 to 1000, and is 10 by default).
566    const MAX_MICROS: u32 = u32::MAX - 1_000_000 - 1;
567
568    // Add any nanoseconds smaller than a microsecond as an extra microsecond
569    // so as to comply with the `std::thread::sleep` contract which mandates
570    // implementations to sleep for _at least_ the provided `dur`.
571    // We can't overflow `micros` as it is a `u128`, while `Duration` is a pair of
572    // (`u64` secs, `u32` nanos), where the nanos are strictly smaller than 1 second
573    // (i.e. < 1_000_000_000)
574    let mut micros = dur.as_micros() + if dur.subsec_nanos() % 1_000 > 0 { 1 } else { 0 };
575
576    while micros > 0 {
577        let st = if micros > MAX_MICROS as u128 { MAX_MICROS } else { micros as u32 };
578        unsafe {
579            libc::usleep(st);
580        }
581
582        micros -= st as u128;
583    }
584}
585
586// Any unix that has clock_nanosleep
587// If this list changes update the MIRI chock_nanosleep shim
588#[cfg(any(
589    target_os = "freebsd",
590    target_os = "netbsd",
591    target_os = "linux",
592    target_os = "android",
593    target_os = "solaris",
594    target_os = "illumos",
595    target_os = "dragonfly",
596    target_os = "hurd",
597    target_os = "fuchsia",
598    target_os = "vxworks",
599    target_os = "wasi",
600))]
601pub fn sleep_until(deadline: crate::time::Instant) {
602    use crate::time::Instant;
603
604    let Some(ts) = deadline.into_inner().into_timespec().to_timespec() else {
605        // The deadline is further in the future then can be passed to
606        // clock_nanosleep. We have to use Self::sleep instead. This might
607        // happen on 32 bit platforms, especially closer to 2038.
608        let now = Instant::now();
609        if let Some(delay) = deadline.checked_duration_since(now) {
610            sleep(delay);
611        }
612        return;
613    };
614
615    unsafe {
616        // When we get interrupted (res = EINTR) call clock_nanosleep again
617        loop {
618            let res = libc::clock_nanosleep(
619                crate::sys::time::Instant::CLOCK_ID,
620                libc::TIMER_ABSTIME,
621                &ts,
622                core::ptr::null_mut(), // not required with TIMER_ABSTIME
623            );
624
625            if res == 0 {
626                break;
627            } else {
628                assert_eq!(
629                    res,
630                    libc::EINTR,
631                    "timespec is in range,
632                         clockid is valid and kernel should support it"
633                );
634            }
635        }
636    }
637}
638
639pub fn yield_now() {
640    let ret = unsafe { libc::sched_yield() };
641    debug_assert_eq!(ret, 0);
642}
643
644#[cfg(any(target_os = "android", target_os = "linux"))]
645mod cgroups {
646    //! Currently not covered
647    //! * cgroup v2 in non-standard mountpoints
648    //! * paths containing control characters or spaces, since those would be escaped in procfs
649    //!   output and we don't unescape
650
651    use crate::borrow::Cow;
652    use crate::ffi::OsString;
653    use crate::fs::{File, exists};
654    use crate::io::{BufRead, Read};
655    use crate::os::unix::ffi::OsStringExt;
656    use crate::path::{Path, PathBuf};
657    use crate::str::from_utf8;
658
659    #[derive(PartialEq)]
660    enum Cgroup {
661        V1,
662        V2,
663    }
664
665    /// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
666    /// be determined or is not set.
667    pub(super) fn quota() -> usize {
668        let mut quota = usize::MAX;
669        if cfg!(miri) {
670            // Attempting to open a file fails under default flags due to isolation.
671            // And Miri does not have parallelism anyway.
672            return quota;
673        }
674
675        let _: Option<()> = try {
676            let mut buf = Vec::with_capacity(128);
677            // find our place in the cgroup hierarchy
678            File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
679            let (cgroup_path, version) =
680                buf.split(|&c| c == b'\n').fold(None, |previous, line| {
681                    let mut fields = line.splitn(3, |&c| c == b':');
682                    // 2nd field is a list of controllers for v1 or empty for v2
683                    let version = match fields.nth(1) {
684                        Some(b"") => Cgroup::V2,
685                        Some(controllers)
686                            if from_utf8(controllers)
687                                .is_ok_and(|c| c.split(',').any(|c| c == "cpu")) =>
688                        {
689                            Cgroup::V1
690                        }
691                        _ => return previous,
692                    };
693
694                    // already-found v1 trumps v2 since it explicitly specifies its controllers
695                    if previous.is_some() && version == Cgroup::V2 {
696                        return previous;
697                    }
698
699                    let path = fields.last()?;
700                    // skip leading slash
701                    Some((path[1..].to_owned(), version))
702                })?;
703            let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
704
705            quota = match version {
706                Cgroup::V1 => quota_v1(cgroup_path),
707                Cgroup::V2 => quota_v2(cgroup_path),
708            };
709        };
710
711        quota
712    }
713
714    fn quota_v2(group_path: PathBuf) -> usize {
715        let mut quota = usize::MAX;
716
717        let mut path = PathBuf::with_capacity(128);
718        let mut read_buf = String::with_capacity(20);
719
720        // standard mount location defined in file-hierarchy(7) manpage
721        let cgroup_mount = "/sys/fs/cgroup";
722
723        path.push(cgroup_mount);
724        path.push(&group_path);
725
726        path.push("cgroup.controllers");
727
728        // skip if we're not looking at cgroup2
729        if matches!(exists(&path), Err(_) | Ok(false)) {
730            return usize::MAX;
731        };
732
733        path.pop();
734
735        let _: Option<()> = try {
736            while path.starts_with(cgroup_mount) {
737                path.push("cpu.max");
738
739                read_buf.clear();
740
741                if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
742                    let raw_quota = read_buf.lines().next()?;
743                    let mut raw_quota = raw_quota.split(' ');
744                    let limit = raw_quota.next()?;
745                    let period = raw_quota.next()?;
746                    match (limit.parse::<usize>(), period.parse::<usize>()) {
747                        (Ok(limit), Ok(period)) if period > 0 => {
748                            quota = quota.min(limit / period);
749                        }
750                        _ => {}
751                    }
752                }
753
754                path.pop(); // pop filename
755                path.pop(); // pop dir
756            }
757        };
758
759        quota
760    }
761
762    fn quota_v1(group_path: PathBuf) -> usize {
763        let mut quota = usize::MAX;
764        let mut path = PathBuf::with_capacity(128);
765        let mut read_buf = String::with_capacity(20);
766
767        // Hardcode commonly used locations mentioned in the cgroups(7) manpage
768        // if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
769        let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
770            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
771            |p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
772            // this can be expensive on systems with tons of mountpoints
773            // but we only get to this point when /proc/self/cgroups explicitly indicated
774            // this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
775            find_mountpoint,
776        ];
777
778        for mount in mounts {
779            let Some((mount, group_path)) = mount(&group_path) else { continue };
780
781            path.clear();
782            path.push(mount.as_ref());
783            path.push(&group_path);
784
785            // skip if we guessed the mount incorrectly
786            if matches!(exists(&path), Err(_) | Ok(false)) {
787                continue;
788            }
789
790            while path.starts_with(mount.as_ref()) {
791                let mut parse_file = |name| {
792                    path.push(name);
793                    read_buf.clear();
794
795                    let f = File::open(&path);
796                    path.pop(); // restore buffer before any early returns
797                    f.ok()?.read_to_string(&mut read_buf).ok()?;
798                    let parsed = read_buf.trim().parse::<usize>().ok()?;
799
800                    Some(parsed)
801                };
802
803                let limit = parse_file("cpu.cfs_quota_us");
804                let period = parse_file("cpu.cfs_period_us");
805
806                match (limit, period) {
807                    (Some(limit), Some(period)) if period > 0 => quota = quota.min(limit / period),
808                    _ => {}
809                }
810
811                path.pop();
812            }
813
814            // we passed the try_exists above so we should have traversed the correct hierarchy
815            // when reaching this line
816            break;
817        }
818
819        quota
820    }
821
822    /// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
823    ///
824    /// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
825    /// over the already-included prefix
826    fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
827        let mut reader = File::open_buffered("/proc/self/mountinfo").ok()?;
828        let mut line = String::with_capacity(256);
829        loop {
830            line.clear();
831            if reader.read_line(&mut line).ok()? == 0 {
832                break;
833            }
834
835            let line = line.trim();
836            let mut items = line.split(' ');
837
838            let sub_path = items.nth(3)?;
839            let mount_point = items.next()?;
840            let mount_opts = items.next_back()?;
841            let filesystem_type = items.nth_back(1)?;
842
843            if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
844                // not a cgroup / not a cpu-controller
845                continue;
846            }
847
848            let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
849
850            if !group_path.starts_with(sub_path) {
851                // this is a bind-mount and the bound subdirectory
852                // does not contain the cgroup this process belongs to
853                continue;
854            }
855
856            let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
857
858            return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
859        }
860
861        None
862    }
863}
864
865// glibc >= 2.15 has a __pthread_get_minstack() function that returns
866// PTHREAD_STACK_MIN plus bytes needed for thread-local storage.
867// We need that information to avoid blowing up when a small stack
868// is created in an application with big thread-local storage requirements.
869// See #6233 for rationale and details.
870#[cfg(all(target_os = "linux", target_env = "gnu"))]
871unsafe fn min_stack_size(attr: *const libc::pthread_attr_t) -> usize {
872    // We use dlsym to avoid an ELF version dependency on GLIBC_PRIVATE. (#23628)
873    // We shouldn't really be using such an internal symbol, but there's currently
874    // no other way to account for the TLS size.
875    dlsym!(
876        fn __pthread_get_minstack(attr: *const libc::pthread_attr_t) -> libc::size_t;
877    );
878
879    match __pthread_get_minstack.get() {
880        None => libc::PTHREAD_STACK_MIN,
881        Some(f) => unsafe { f(attr) },
882    }
883}
884
885// No point in looking up __pthread_get_minstack() on non-glibc platforms.
886#[cfg(all(
887    not(all(target_os = "linux", target_env = "gnu")),
888    not(any(target_os = "netbsd", target_os = "nuttx"))
889))]
890unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
891    libc::PTHREAD_STACK_MIN
892}
893
894#[cfg(any(target_os = "netbsd", target_os = "nuttx"))]
895unsafe fn min_stack_size(_: *const libc::pthread_attr_t) -> usize {
896    static STACK: crate::sync::OnceLock<usize> = crate::sync::OnceLock::new();
897
898    *STACK.get_or_init(|| {
899        let mut stack = unsafe { libc::sysconf(libc::_SC_THREAD_STACK_MIN) };
900        if stack < 0 {
901            stack = 2048; // just a guess
902        }
903
904        stack as usize
905    })
906}