[systemd-devel] [RFC PATCH] proc, pidns: Add highpid

Andy Lutomirski luto at amacapital.net
Fri Nov 28 15:06:10 PST 2014


[Adding CRIU people.  Whoops.]

On Fri, Nov 28, 2014 at 3:05 PM, Andy Lutomirski <luto at amacapital.net> wrote:
> Pid reuse is common, which means that it's difficult or impossible
> to read information about a pid from /proc without races.
>
> This introduces a second number associated with each (task, pidns)
> pair called highpid.  Highpid is a 64-bit number, and, barring
> extremely unlikely circumstances or outright error, a (highpid, pid)
> will never be reused.
>
> With just this change, a program can open /proc/PID/status, read the
> "Highpid" field, and confirm that it has the expected value.  If the
> pid has been reused, then highpid will be different.
>
> The initial implementation is straightforward: highpid is simply a
> 64-bit counter. If a high-end system can fork every 3 ns (which
> would be amazing, given that just allocating a pid requires at
> atomic operation), it would take well over 1000 years for highpid to
> wrap.
>
> For CRIU's benefit, the next highpid can be set by a privileged
> user.
>
> NB: The sysctl stuff only works on 64-bit systems.  If the approach
> looks good, I'll fix that somehow.
>
> Signed-off-by: Andy Lutomirski <luto at amacapital.net>
> ---
>
> If this goes in, there's plenty of room to add new interfaces to
> make this more useful.  For example, we could add a fancier tgkill
> that adds and validates hightgid and highpid, and we might want to
> add a syscall to read one's own hightgid and highpid.  These would
> be quite useful for pidfiles.
>
> David, would this be useful for kdbus?
>
> CRIU people: will this be unduly difficult to support in CRIU?
>
> If you all think this is a good idea, I'll fix the sysctl stuff and
> document it.  It might also be worth adding "Hightgid" to status.
>
>  fs/proc/array.c               |  5 ++++-
>  include/linux/pid.h           |  2 ++
>  include/linux/pid_namespace.h |  1 +
>  kernel/pid.c                  | 19 +++++++++++++++----
>  kernel/pid_namespace.c        | 22 ++++++++++++++++++++++
>  5 files changed, 44 insertions(+), 5 deletions(-)
>
> diff --git a/fs/proc/array.c b/fs/proc/array.c
> index cd3653e4f35c..f1e0e69d19f9 100644
> --- a/fs/proc/array.c
> +++ b/fs/proc/array.c
> @@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
>         int g;
>         struct fdtable *fdt = NULL;
>         const struct cred *cred;
> +       const struct upid *upid;
>         pid_t ppid, tpid;
>
>         rcu_read_lock();
> @@ -170,12 +171,14 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
>                 if (tracer)
>                         tpid = task_pid_nr_ns(tracer, ns);
>         }
> +       upid = pid_upid_ns(pid, ns);
>         cred = get_task_cred(p);
>         seq_printf(m,
>                 "State:\t%s\n"
>                 "Tgid:\t%d\n"
>                 "Ngid:\t%d\n"
>                 "Pid:\t%d\n"
> +               "Highpid:\t%llu\n"
>                 "PPid:\t%d\n"
>                 "TracerPid:\t%d\n"
>                 "Uid:\t%d\t%d\t%d\t%d\n"
> @@ -183,7 +186,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
>                 get_task_state(p),
>                 task_tgid_nr_ns(p, ns),
>                 task_numa_group_id(p),
> -               pid_nr_ns(pid, ns),
> +               upid ? upid->nr : 0, upid ? upid->highnr : 0,
>                 ppid, tpid,
>                 from_kuid_munged(user_ns, cred->uid),
>                 from_kuid_munged(user_ns, cred->euid),
> diff --git a/include/linux/pid.h b/include/linux/pid.h
> index 23705a53abba..ece70b64d04c 100644
> --- a/include/linux/pid.h
> +++ b/include/linux/pid.h
> @@ -51,6 +51,7 @@ struct upid {
>         /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
>         int nr;
>         struct pid_namespace *ns;
> +       u64 highnr;
>         struct hlist_node pid_chain;
>  };
>
> @@ -170,6 +171,7 @@ static inline pid_t pid_nr(struct pid *pid)
>  }
>
>  pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns);
> +const struct upid *pid_upid_ns(struct pid *pid, struct pid_namespace *ns);
>  pid_t pid_vnr(struct pid *pid);
>
>  #define do_each_pid_task(pid, type, task)                              \
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index 1997ffc295a7..1f9f0455d7ef 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -26,6 +26,7 @@ struct pid_namespace {
>         struct rcu_head rcu;
>         int last_pid;
>         unsigned int nr_hashed;
> +       atomic64_t next_highpid;
>         struct task_struct *child_reaper;
>         struct kmem_cache *pid_cachep;
>         unsigned int level;
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 9b9a26698144..863d11a9bfbf 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -312,6 +312,8 @@ struct pid *alloc_pid(struct pid_namespace *ns)
>
>                 pid->numbers[i].nr = nr;
>                 pid->numbers[i].ns = tmp;
> +               pid->numbers[i].highnr =
> +                       atomic64_inc_return(&tmp->next_highpid) - 1;
>                 tmp = tmp->parent;
>         }
>
> @@ -492,17 +494,26 @@ struct pid *find_get_pid(pid_t nr)
>  }
>  EXPORT_SYMBOL_GPL(find_get_pid);
>
> -pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
> +const struct upid *pid_upid_ns(struct pid *pid, struct pid_namespace *ns)
>  {
>         struct upid *upid;
> -       pid_t nr = 0;
>
>         if (pid && ns->level <= pid->level) {
>                 upid = &pid->numbers[ns->level];
>                 if (upid->ns == ns)
> -                       nr = upid->nr;
> +                       return upid;
>         }
> -       return nr;
> +       return NULL;
> +}
> +EXPORT_SYMBOL_GPL(pid_upid_ns);
> +
> +pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
> +{
> +       const struct upid *upid = pid_upid_ns(pid, ns);
> +
> +       if (!upid)
> +               return 0;
> +       return upid->nr;
>  }
>  EXPORT_SYMBOL_GPL(pid_nr_ns);
>
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index db95d8eb761b..e246802b4361 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -268,6 +268,22 @@ static int pid_ns_ctl_handler(struct ctl_table *table, int write,
>         return proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
>  }
>
> +static int pid_ns_next_highpid_handler(struct ctl_table *table, int write,
> +               void __user *buffer, size_t *lenp, loff_t *ppos)
> +{
> +       struct pid_namespace *pid_ns = task_active_pid_ns(current);
> +       struct ctl_table tmp = *table;
> +
> +       if (write && !ns_capable(pid_ns->user_ns, CAP_SYS_ADMIN))
> +               return -EPERM;
> +
> +       /* This needs to be fixed. */
> +       BUILD_BUG_ON(sizeof(u64) != sizeof(unsigned long));
> +
> +       tmp.data = &pid_ns->next_highpid;
> +       return proc_dointvec(&tmp, write, buffer, lenp, ppos);
> +}
> +
>  extern int pid_max;
>  static int zero = 0;
>  static struct ctl_table pid_ns_ctl_table[] = {
> @@ -279,6 +295,12 @@ static struct ctl_table pid_ns_ctl_table[] = {
>                 .extra1 = &zero,
>                 .extra2 = &pid_max,
>         },
> +       {
> +               .procname = "ns_next_highpid",
> +               .maxlen = sizeof(u64),
> +               .mode = 0666, /* permissions are checked in the handler */
> +               .proc_handler = pid_ns_next_highpid_handler,
> +       },
>         { }
>  };
>  static struct ctl_path kern_path[] = { { .procname = "kernel", }, { } };
> --
> 1.9.3
>



-- 
Andy Lutomirski
AMA Capital Management, LLC


More information about the systemd-devel mailing list