ntp_monitor.c revision 289997
12966Swollman/*
250477Speter * ntp_monitor - monitor ntpd statistics
31590Srgrimes */
44699Sjkh#ifdef HAVE_CONFIG_H
534706Sbde# include <config.h>
650634Speter#endif
797128Sjmallett
81930Swollman#include "ntpd.h"
91930Swollman#include "ntp_io.h"
10106717Smarcel#include "ntp_if.h"
11106717Smarcel#include "ntp_lists.h"
12106717Smarcel#include "ntp_stdlib.h"
13106717Smarcel#include <ntp_random.h>
14106717Smarcel
15100200Swollman#include <stdio.h>
16100200Swollman#include <signal.h>
1796630Stjr#ifdef HAVE_SYS_IOCTL_H
1838653Sgpalmer# include <sys/ioctl.h>
1991706Sobrien#endif
2038653Sgpalmer
2138653Sgpalmer/*
2238653Sgpalmer * Record statistics based on source address, mode and version. The
2357013Sobrien * receive procedure calls us with the incoming rbufp before it does
2478562Sobrien * anything else. While at it, implement rate controls for inbound
2538653Sgpalmer * traffic.
26104615Stjr *
2738653Sgpalmer * Each entry is doubly linked into two lists, a hash table and a most-
2838653Sgpalmer * recently-used (MRU) list. When a packet arrives it is looked up in
2996845Smarkm * the hash table. If found, the statistics are updated and the entry
3038653Sgpalmer * relinked at the head of the MRU list. If not found, a new entry is
3138653Sgpalmer * allocated, initialized and linked into both the hash table and at the
3238653Sgpalmer * head of the MRU list.
3338653Sgpalmer *
3438653Sgpalmer * Memory is usually allocated by grabbing a big chunk of new memory and
3538653Sgpalmer * cutting it up into littler pieces. The exception to this when we hit
3638653Sgpalmer * the memory limit. Then we free memory by grabbing entries off the
3738653Sgpalmer * tail for the MRU list, unlinking from the hash table, and
3838653Sgpalmer * reinitializing.
3938653Sgpalmer *
4038653Sgpalmer * INC_MONLIST is the default allocation granularity in entries.
4138653Sgpalmer * INIT_MONLIST is the default initial allocation in entries.
4238653Sgpalmer */
4338653Sgpalmer#ifdef MONMEMINC		/* old name */
4495926Stjr# define	INC_MONLIST	MONMEMINC
4538653Sgpalmer#elif !defined(INC_MONLIST)
4638653Sgpalmer# define	INC_MONLIST	(4 * 1024 / sizeof(mon_entry))
4738653Sgpalmer#endif
4838653Sgpalmer#ifndef INIT_MONLIST
4938653Sgpalmer# define	INIT_MONLIST	(4 * 1024 / sizeof(mon_entry))
5040826Sjoerg#endif
5138653Sgpalmer#ifndef MRU_MAXDEPTH_DEF
5238653Sgpalmer# define MRU_MAXDEPTH_DEF	(1024 * 1024 / sizeof(mon_entry))
5338653Sgpalmer#endif
5438653Sgpalmer
5538653Sgpalmer/*
5638653Sgpalmer * Hashing stuff
5738653Sgpalmer */
5838653Sgpalmeru_char	mon_hash_bits;
5938653Sgpalmer
6038653Sgpalmer/*
6138653Sgpalmer * Pointers to the hash table and the MRU list.  Memory for the hash
6241036Sdima * table is allocated only if monitoring is enabled.
6363499Sps */
6438653Sgpalmermon_entry **	mon_hash;	/* MRU hash table */
65103303Spetermon_entry	mon_mru_list;	/* mru listhead */
6638653Sgpalmer
67101629Sjake/*
6838653Sgpalmer * List of free structures structures, and counters of in-use and total
6948839Ssimokawa * structures. The free structures are linked with the hash_next field.
7038653Sgpalmer */
7165627Snectarstatic  mon_entry *mon_free;		/* free list or null if none */
7238653Sgpalmer	u_int mru_alloc;		/* mru list + free list count */
7338653Sgpalmer	u_int mru_entries;		/* mru list count */
7438653Sgpalmer	u_int mru_peakentries;		/* highest mru_entries seen */
7538653Sgpalmer	u_int mru_initalloc = INIT_MONLIST;/* entries to preallocate */
7638653Sgpalmer	u_int mru_incalloc = INC_MONLIST;/* allocation batch factor */
7738653Sgpalmerstatic	u_int mon_mem_increments;	/* times called malloc() */
7838653Sgpalmer
7938653Sgpalmer/*
8062813Speter * Parameters of the RES_LIMITED restriction option. We define headway
8138653Sgpalmer * as the idle time between packets. A packet is discarded if the
8238653Sgpalmer * headway is less than the minimum, as well as if the average headway
8338653Sgpalmer * is less than eight times the increment.
8438653Sgpalmer */
8593619Sjakeint	ntp_minpkt = NTP_MINPKT;	/* minimum (log 2 s) */
8638653Sgpalmeru_char	ntp_minpoll = NTP_MINPOLL;	/* increment (log 2 s) */
8738653Sgpalmer
8838653Sgpalmer/*
8939614Sbde * Initialization state.  We may be monitoring, we may not.  If
9038653Sgpalmer * we aren't, we may not even have allocated any memory yet.
9160789Sps */
9260789Sps	u_int	mon_enabled;		/* enable switch */
9360789Sps	u_int	mru_mindepth = 600;	/* preempt above this */
9438653Sgpalmer	int	mru_maxage = 64;	/* for entries older than */
9538653Sgpalmer	u_int	mru_maxdepth = 		/* MRU count hard limit */
9638653Sgpalmer			MRU_MAXDEPTH_DEF;
9738653Sgpalmer	int	mon_age = 3000;		/* preemption limit */
9838653Sgpalmer
9938653Sgpalmerstatic	void		mon_getmoremem(void);
10038653Sgpalmerstatic	void		remove_from_hash(mon_entry *);
10138653Sgpalmerstatic	inline void	mon_free_entry(mon_entry *);
10238653Sgpalmerstatic	inline void	mon_reclaim_entry(mon_entry *);
10338653Sgpalmer
10438653Sgpalmer
10538653Sgpalmer/*
10638653Sgpalmer * init_mon - initialize monitoring global data
10738653Sgpalmer */
10896845Smarkmvoid
10938653Sgpalmerinit_mon(void)
11039614Sbde{
11138653Sgpalmer	/*
11238653Sgpalmer	 * Don't do much of anything here.  We don't allocate memory
11338653Sgpalmer	 * until mon_start().
11438653Sgpalmer	 */
11538653Sgpalmer	mon_enabled = MON_OFF;
11638653Sgpalmer	INIT_DLIST(mon_mru_list, mru);
11739914Sdfr}
11838653Sgpalmer
11938653Sgpalmer
12097365Stjr/*
12138653Sgpalmer * remove_from_hash - removes an entry from the address hash table and
12238653Sgpalmer *		      decrements mru_entries.
12376273Sbrian */
12438653Sgpalmerstatic void
12538653Sgpalmerremove_from_hash(
12638653Sgpalmer	mon_entry *mon
12738653Sgpalmer	)
12838653Sgpalmer{
12938653Sgpalmer	u_int hash;
13038653Sgpalmer	mon_entry *punlinked;
13138653Sgpalmer
13297096Stjr	mru_entries--;
13338653Sgpalmer	hash = MON_HASH(&mon->rmtadr);
13438653Sgpalmer	UNLINK_SLIST(punlinked, mon_hash[hash], mon, hash_next,
13538653Sgpalmer		     mon_entry);
13638653Sgpalmer	ENSURE(punlinked == mon);
13738653Sgpalmer}
13838653Sgpalmer
13995332Sobrien
14038653Sgpalmerstatic inline void
14138653Sgpalmermon_free_entry(
14238653Sgpalmer	mon_entry *m
14395332Sobrien	)
14438653Sgpalmer{
14538653Sgpalmer	ZERO(*m);
14638653Sgpalmer	LINK_SLIST(mon_free, m, hash_next);
14738653Sgpalmer}
14838653Sgpalmer
14938653Sgpalmer
15038653Sgpalmer/*
15138653Sgpalmer * mon_reclaim_entry - Remove an entry from the MRU list and from the
15238653Sgpalmer *		       hash array, then zero-initialize it.  Indirectly
15345701Sdes *		       decrements mru_entries.
15438653Sgpalmer
15597955Sdougb * The entry is prepared to be reused.  Before return, in
15638653Sgpalmer * remove_from_hash(), mru_entries is decremented.  It is the caller's
15738653Sgpalmer * responsibility to increment it again.
15841035Sdima */
15997034Stjrstatic inline void
16038653Sgpalmermon_reclaim_entry(
16138653Sgpalmer	mon_entry *m
16238653Sgpalmer	)
16338653Sgpalmer{
16438653Sgpalmer	DEBUG_INSIST(NULL != m);
16538653Sgpalmer
16688277Smarkm	UNLINK_DLIST(m, mru);
16738653Sgpalmer	remove_from_hash(m);
16838653Sgpalmer	ZERO(*m);
16938653Sgpalmer}
17038653Sgpalmer
17138653Sgpalmer
17263437Ssheldonh/*
173106717Smarcel * mon_getmoremem - get more memory and put it on the free list
17438653Sgpalmer */
17538653Sgpalmerstatic void
17638653Sgpalmermon_getmoremem(void)
17738653Sgpalmer{
17838653Sgpalmer	mon_entry *chunk;
17938653Sgpalmer	u_int entries;
18038653Sgpalmer
18138653Sgpalmer	entries = (0 == mon_mem_increments)
18238653Sgpalmer		      ? mru_initalloc
18338653Sgpalmer		      : mru_incalloc;
18462642Sn_hibma
18538653Sgpalmer	if (entries) {
18638653Sgpalmer		chunk = eallocarray(entries, sizeof(*chunk));
18738653Sgpalmer		mru_alloc += entries;
18897372Smarcel		for (chunk += entries; entries; entries--)
18938653Sgpalmer			mon_free_entry(--chunk);
19038653Sgpalmer
19138653Sgpalmer		mon_mem_increments++;
19241062Sbde	}
19338653Sgpalmer}
19438653Sgpalmer
19538653Sgpalmer
19638653Sgpalmer/*
19799379Sjohan * mon_start - start up the monitoring software
19838653Sgpalmer */
19938653Sgpalmervoid
20038653Sgpalmermon_start(
20138653Sgpalmer	int mode
20238653Sgpalmer	)
20338653Sgpalmer{
20438653Sgpalmer	size_t octets;
205106717Smarcel	u_int min_hash_slots;
20638653Sgpalmer
20738653Sgpalmer	if (MON_OFF == mode)		/* MON_OFF is 0 */
20838653Sgpalmer		return;
20938653Sgpalmer	if (mon_enabled) {
21038653Sgpalmer		mon_enabled |= mode;
21138653Sgpalmer		return;
2121590Srgrimes	}
21374813Sru	if (0 == mon_mem_increments)
21474813Sru		mon_getmoremem();
21574813Sru	/*
21639614Sbde	 * Select the MRU hash table size to limit the average count
21734554Sjb	 * per bucket at capacity (mru_maxdepth) to 8, if possible
21834554Sjb	 * given our hash is limited to 16 bits.
21934554Sjb	 */
22053935Speter	min_hash_slots = (mru_maxdepth / 8) + 1;
22153909Speter	mon_hash_bits = 0;
22253935Speter	while (min_hash_slots >>= 1)
22353909Speter		mon_hash_bits++;
22453909Speter	mon_hash_bits = max(4, mon_hash_bits);
22553909Speter	mon_hash_bits = min(16, mon_hash_bits);
22653909Speter	octets = sizeof(*mon_hash) * MON_HASH_SIZE;
22798064Sdougb	mon_hash = erealloc_zero(mon_hash, octets, 0);
22897821Sdes
22997821Sdes	mon_enabled = mode;
23097821Sdes}
23186252Sgshapiro
23297955Sdougb
23386252Sgshapiro/*
23486252Sgshapiro * mon_stop - stop the monitoring software
23534554Sjb */
23636064Sjbvoid
237103436Spetermon_stop(
23852406Sbp	int mode
23952702Sbp	)
24038653Sgpalmer{
241103436Speter	mon_entry *mon;
24236064Sjb
24334554Sjb	if (MON_OFF == mon_enabled)
24456279Sobrien		return;
24556279Sobrien	if ((mon_enabled & mode) == 0 || mode == MON_OFF)
24656279Sobrien		return;
24756279Sobrien
24881878Speter	mon_enabled &= ~mode;
24981054Ssobomax	if (mon_enabled != MON_OFF)
25081054Ssobomax		return;
25181054Ssobomax
2521590Srgrimes	/*
253	 * Move everything on the MRU list to the free list quickly,
254	 * without bothering to remove each from either the MRU list or
255	 * the hash table.
256	 */
257	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
258		mon_free_entry(mon);
259	ITER_DLIST_END()
260
261	/* empty the MRU list and hash table. */
262	mru_entries = 0;
263	INIT_DLIST(mon_mru_list, mru);
264	zero_mem(mon_hash, sizeof(*mon_hash) * MON_HASH_SIZE);
265}
266
267
268/*
269 * mon_clearinterface -- remove mru entries referring to a local address
270 *			 which is going away.
271 */
272void
273mon_clearinterface(
274	endpt *lcladr
275	)
276{
277	mon_entry *mon;
278
279	/* iterate mon over mon_mru_list */
280	ITER_DLIST_BEGIN(mon_mru_list, mon, mru, mon_entry)
281		if (mon->lcladr == lcladr) {
282			/* remove from mru list */
283			UNLINK_DLIST(mon, mru);
284			/* remove from hash list, adjust mru_entries */
285			remove_from_hash(mon);
286			/* put on free list */
287			mon_free_entry(mon);
288		}
289	ITER_DLIST_END()
290}
291
292
293/*
294 * ntp_monitor - record stats about this packet
295 *
296 * Returns supplied restriction flags, with RES_LIMITED and RES_KOD
297 * cleared unless the packet should not be responded to normally
298 * (RES_LIMITED) and possibly should trigger a KoD response (RES_KOD).
299 * The returned flags are saved in the MRU entry, so that it reflects
300 * whether the last packet from that source triggered rate limiting,
301 * and if so, possible KoD response.  This implies you can not tell
302 * whether a given address is eligible for rate limiting/KoD from the
303 * monlist restrict bits, only whether or not the last packet triggered
304 * such responses.  ntpdc -c reslist lets you see whether RES_LIMITED
305 * or RES_KOD is lit for a particular address before ntp_monitor()'s
306 * typical dousing.
307 */
308u_short
309ntp_monitor(
310	struct recvbuf *rbufp,
311	u_short	flags
312	)
313{
314	l_fp		interval_fp;
315	struct pkt *	pkt;
316	mon_entry *	mon;
317	mon_entry *	oldest;
318	int		oldest_age;
319	u_int		hash;
320	u_short		restrict_mask;
321	u_char		mode;
322	u_char		version;
323	int		interval;
324	int		head;		/* headway increment */
325	int		leak;		/* new headway */
326	int		limit;		/* average threshold */
327
328	REQUIRE(rbufp != NULL);
329
330	if (mon_enabled == MON_OFF)
331		return ~(RES_LIMITED | RES_KOD) & flags;
332
333	pkt = &rbufp->recv_pkt;
334	hash = MON_HASH(&rbufp->recv_srcadr);
335	mode = PKT_MODE(pkt->li_vn_mode);
336	version = PKT_VERSION(pkt->li_vn_mode);
337	mon = mon_hash[hash];
338
339	/*
340	 * We keep track of all traffic for a given IP in one entry,
341	 * otherwise cron'ed ntpdate or similar evades RES_LIMITED.
342	 */
343
344	for (; mon != NULL; mon = mon->hash_next)
345		if (SOCK_EQ(&mon->rmtadr, &rbufp->recv_srcadr))
346			break;
347
348	if (mon != NULL) {
349		interval_fp = rbufp->recv_time;
350		L_SUB(&interval_fp, &mon->last);
351		/* add one-half second to round up */
352		L_ADDUF(&interval_fp, 0x80000000);
353		interval = interval_fp.l_i;
354		mon->last = rbufp->recv_time;
355		NSRCPORT(&mon->rmtadr) = NSRCPORT(&rbufp->recv_srcadr);
356		mon->count++;
357		restrict_mask = flags;
358		mon->vn_mode = VN_MODE(version, mode);
359
360		/* Shuffle to the head of the MRU list. */
361		UNLINK_DLIST(mon, mru);
362		LINK_DLIST(mon_mru_list, mon, mru);
363
364		/*
365		 * At this point the most recent arrival is first in the
366		 * MRU list.  Decrease the counter by the headway, but
367		 * not less than zero.
368		 */
369		mon->leak -= interval;
370		mon->leak = max(0, mon->leak);
371		head = 1 << ntp_minpoll;
372		leak = mon->leak + head;
373		limit = NTP_SHIFT * head;
374
375		DPRINTF(2, ("MRU: interval %d headway %d limit %d\n",
376			    interval, leak, limit));
377
378		/*
379		 * If the minimum and average thresholds are not
380		 * exceeded, douse the RES_LIMITED and RES_KOD bits and
381		 * increase the counter by the headway increment.  Note
382		 * that we give a 1-s grace for the minimum threshold
383		 * and a 2-s grace for the headway increment.  If one or
384		 * both thresholds are exceeded and the old counter is
385		 * less than the average threshold, set the counter to
386		 * the average threshold plus the increment and leave
387		 * the RES_LIMITED and RES_KOD bits lit. Otherwise,
388		 * leave the counter alone and douse the RES_KOD bit.
389		 * This rate-limits the KoDs to no less than the average
390		 * headway.
391		 */
392		if (interval + 1 >= ntp_minpkt && leak < limit) {
393			mon->leak = leak - 2;
394			restrict_mask &= ~(RES_LIMITED | RES_KOD);
395		} else if (mon->leak < limit)
396			mon->leak = limit + head;
397		else
398			restrict_mask &= ~RES_KOD;
399
400		mon->flags = restrict_mask;
401
402		return mon->flags;
403	}
404
405	/*
406	 * If we got here, this is the first we've heard of this
407	 * guy.  Get him some memory, either from the free list
408	 * or from the tail of the MRU list.
409	 *
410	 * The following ntp.conf "mru" knobs come into play determining
411	 * the depth (or count) of the MRU list:
412	 * - mru_mindepth ("mru mindepth") is a floor beneath which
413	 *   entries are kept without regard to their age.  The
414	 *   default is 600 which matches the longtime implementation
415	 *   limit on the total number of entries.
416	 * - mru_maxage ("mru maxage") is a ceiling on the age in
417	 *   seconds of entries.  Entries older than this are
418	 *   reclaimed once mon_mindepth is exceeded.  64s default.
419	 *   Note that entries older than this can easily survive
420	 *   as they are reclaimed only as needed.
421	 * - mru_maxdepth ("mru maxdepth") is a hard limit on the
422	 *   number of entries.
423	 * - "mru maxmem" sets mru_maxdepth to the number of entries
424	 *   which fit in the given number of kilobytes.  The default is
425	 *   1024, or 1 megabyte.
426	 * - mru_initalloc ("mru initalloc" sets the count of the
427	 *   initial allocation of MRU entries.
428	 * - "mru initmem" sets mru_initalloc in units of kilobytes.
429	 *   The default is 4.
430	 * - mru_incalloc ("mru incalloc" sets the number of entries to
431	 *   allocate on-demand each time the free list is empty.
432	 * - "mru incmem" sets mru_incalloc in units of kilobytes.
433	 *   The default is 4.
434	 * Whichever of "mru maxmem" or "mru maxdepth" occurs last in
435	 * ntp.conf controls.  Similarly for "mru initalloc" and "mru
436	 * initmem", and for "mru incalloc" and "mru incmem".
437	 */
438	if (mru_entries < mru_mindepth) {
439		if (NULL == mon_free)
440			mon_getmoremem();
441		UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
442	} else {
443		oldest = TAIL_DLIST(mon_mru_list, mru);
444		oldest_age = 0;		/* silence uninit warning */
445		if (oldest != NULL) {
446			interval_fp = rbufp->recv_time;
447			L_SUB(&interval_fp, &oldest->last);
448			/* add one-half second to round up */
449			L_ADDUF(&interval_fp, 0x80000000);
450			oldest_age = interval_fp.l_i;
451		}
452		/* note -1 is legal for mru_maxage (disables) */
453		if (oldest != NULL && mru_maxage < oldest_age) {
454			mon_reclaim_entry(oldest);
455			mon = oldest;
456		} else if (mon_free != NULL || mru_alloc <
457			   mru_maxdepth) {
458			if (NULL == mon_free)
459				mon_getmoremem();
460			UNLINK_HEAD_SLIST(mon, mon_free, hash_next);
461		/* Preempt from the MRU list if old enough. */
462		} else if (ntp_random() / (2. * FRAC) >
463			   (double)oldest_age / mon_age) {
464			return ~(RES_LIMITED | RES_KOD) & flags;
465		} else {
466			mon_reclaim_entry(oldest);
467			mon = oldest;
468		}
469	}
470
471	INSIST(mon != NULL);
472
473	/*
474	 * Got one, initialize it
475	 */
476	mru_entries++;
477	mru_peakentries = max(mru_peakentries, mru_entries);
478	mon->last = rbufp->recv_time;
479	mon->first = mon->last;
480	mon->count = 1;
481	mon->flags = ~(RES_LIMITED | RES_KOD) & flags;
482	mon->leak = 0;
483	memcpy(&mon->rmtadr, &rbufp->recv_srcadr, sizeof(mon->rmtadr));
484	mon->vn_mode = VN_MODE(version, mode);
485	mon->lcladr = rbufp->dstadr;
486	mon->cast_flags = (u_char)(((rbufp->dstadr->flags &
487	    INT_MCASTOPEN) && rbufp->fd == mon->lcladr->fd) ? MDF_MCAST
488	    : rbufp->fd == mon->lcladr->bfd ? MDF_BCAST : MDF_UCAST);
489
490	/*
491	 * Drop him into front of the hash table. Also put him on top of
492	 * the MRU list.
493	 */
494	LINK_SLIST(mon_hash[hash], mon, hash_next);
495	LINK_DLIST(mon_mru_list, mon, mru);
496
497	return mon->flags;
498}
499
500
501