1/****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32#include <sys/param.h> 33#include <sys/lock.h> 34#include <sys/kernel.h> 35#include <sys/kthread.h> 36#include <sys/malloc.h> 37#include <sys/mutex.h> 38#include <sys/sysctl.h> 39#include <sys/module.h> 40 41#include <vm/vm.h> 42#include <vm/vm_page.h> 43 44#include <xen/xen-os.h> 45#include <xen/hypervisor.h> 46#include <xen/features.h> 47#include <xen/xenstore/xenstorevar.h> 48 49static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 50 51/* Convert from KB (as fetched from xenstore) to number of PAGES */ 52#define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) 53 54struct mtx balloon_mutex; 55 56/* We increase/decrease in batches which fit in a page */ 57static xen_pfn_t frame_list[PAGE_SIZE / sizeof(xen_pfn_t)]; 58 59struct balloon_stats { 60 /* We aim for 'current allocation' == 'target allocation'. */ 61 unsigned long current_pages; 62 unsigned long target_pages; 63 /* We may hit the hard limit in Xen. If we do then we remember it. */ 64 unsigned long hard_limit; 65 /* 66 * Drivers may alter the memory reservation independently, but they 67 * must inform the balloon driver so we avoid hitting the hard limit. 68 */ 69 unsigned long driver_pages; 70 /* Number of pages in high- and low-memory balloons. */ 71 unsigned long balloon_low; 72 unsigned long balloon_high; 73}; 74 75static struct balloon_stats balloon_stats; 76#define bs balloon_stats 77 78SYSCTL_DECL(_dev_xen); 79static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, 80 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 81 "Balloon"); 82SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 83 &bs.current_pages, 0, "Current allocation"); 84SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 85 &bs.target_pages, 0, "Target allocation"); 86SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 87 &bs.driver_pages, 0, "Driver pages"); 88SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 89 &bs.hard_limit, 0, "Xen hard limit"); 90SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 91 &bs.balloon_low, 0, "Low-mem balloon"); 92SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 93 &bs.balloon_high, 0, "High-mem balloon"); 94 95/* List of ballooned pages, threaded through the mem_map array. */ 96static TAILQ_HEAD(,vm_page) ballooned_pages; 97 98/* Main work function, always executed in process context. */ 99static void balloon_process(void *unused); 100 101#define IPRINTK(fmt, args...) \ 102 printk(KERN_INFO "xen_mem: " fmt, ##args) 103#define WPRINTK(fmt, args...) \ 104 printk(KERN_WARNING "xen_mem: " fmt, ##args) 105 106static unsigned long 107current_target(void) 108{ 109 unsigned long target = min(bs.target_pages, bs.hard_limit); 110 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 111 target = bs.current_pages + bs.balloon_low + bs.balloon_high; 112 return (target); 113} 114 115static unsigned long 116minimum_target(void) 117{ 118 unsigned long min_pages, curr_pages = current_target(); 119 120#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 121 /* 122 * Simple continuous piecewiese linear function: 123 * max MiB -> min MiB gradient 124 * 0 0 125 * 16 16 126 * 32 24 127 * 128 72 (1/2) 128 * 512 168 (1/4) 129 * 2048 360 (1/8) 130 * 8192 552 (1/32) 131 * 32768 1320 132 * 131072 4392 133 */ 134 if (realmem < MB2PAGES(128)) 135 min_pages = MB2PAGES(8) + (realmem >> 1); 136 else if (realmem < MB2PAGES(512)) 137 min_pages = MB2PAGES(40) + (realmem >> 2); 138 else if (realmem < MB2PAGES(2048)) 139 min_pages = MB2PAGES(104) + (realmem >> 3); 140 else 141 min_pages = MB2PAGES(296) + (realmem >> 5); 142#undef MB2PAGES 143 144 /* Don't enforce growth */ 145 return (min(min_pages, curr_pages)); 146} 147 148static int 149increase_reservation(unsigned long nr_pages) 150{ 151 unsigned long i; 152 vm_page_t page; 153 long rc; 154 struct xen_memory_reservation reservation = { 155 .extent_order = 0, 156 .domid = DOMID_SELF 157 }; 158 159 mtx_assert(&balloon_mutex, MA_OWNED); 160 161 if (nr_pages > nitems(frame_list)) 162 nr_pages = nitems(frame_list); 163 164 for (page = TAILQ_FIRST(&ballooned_pages), i = 0; 165 i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) { 166 KASSERT(page != NULL, ("ballooned_pages list corrupt")); 167 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 168 } 169 170 set_xen_guest_handle(reservation.extent_start, frame_list); 171 reservation.nr_extents = nr_pages; 172 rc = HYPERVISOR_memory_op( 173 XENMEM_populate_physmap, &reservation); 174 if (rc < nr_pages) { 175 if (rc > 0) { 176 int ret __diagused; 177 178 /* We hit the Xen hard limit: reprobe. */ 179 reservation.nr_extents = rc; 180 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 181 &reservation); 182 KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 183 } 184 if (rc >= 0) 185 bs.hard_limit = (bs.current_pages + rc - 186 bs.driver_pages); 187 goto out; 188 } 189 190 for (i = 0; i < nr_pages; i++) { 191 page = TAILQ_FIRST(&ballooned_pages); 192 KASSERT(page != NULL, ("Unable to get ballooned page")); 193 TAILQ_REMOVE(&ballooned_pages, page, plinks.q); 194 bs.balloon_low--; 195 196 KASSERT(xen_feature(XENFEAT_auto_translated_physmap), 197 ("auto translated physmap but mapping is valid")); 198 199 vm_page_free(page); 200 } 201 202 bs.current_pages += nr_pages; 203 204 out: 205 return (0); 206} 207 208static int 209decrease_reservation(unsigned long nr_pages) 210{ 211 unsigned long i; 212 vm_page_t page; 213 int need_sleep = 0; 214 int ret __diagused; 215 struct xen_memory_reservation reservation = { 216 .extent_order = 0, 217 .domid = DOMID_SELF 218 }; 219 220 mtx_assert(&balloon_mutex, MA_OWNED); 221 222 if (nr_pages > nitems(frame_list)) 223 nr_pages = nitems(frame_list); 224 225 for (i = 0; i < nr_pages; i++) { 226 /* 227 * Zero the page, or else we might be leaking important data to 228 * other domains on the same host. Xen doesn't scrub ballooned 229 * out memory pages, the guest is in charge of making sure that 230 * no information is leaked. 231 */ 232 if ((page = vm_page_alloc_noobj(VM_ALLOC_ZERO)) == NULL) { 233 nr_pages = i; 234 need_sleep = 1; 235 break; 236 } 237 238 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 239 240 TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); 241 bs.balloon_low++; 242 } 243 244 set_xen_guest_handle(reservation.extent_start, frame_list); 245 reservation.nr_extents = nr_pages; 246 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 247 KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 248 249 bs.current_pages -= nr_pages; 250 251 return (need_sleep); 252} 253 254/* 255 * We avoid multiple worker processes conflicting via the balloon mutex. 256 * We may of course race updates of the target counts (which are protected 257 * by the balloon lock), or with changes to the Xen hard limit, but we will 258 * recover from these in time. 259 */ 260static void 261balloon_process(void *unused) 262{ 263 int need_sleep = 0; 264 long credit; 265 266 mtx_lock(&balloon_mutex); 267 for (;;) { 268 int sleep_time; 269 270 do { 271 credit = current_target() - bs.current_pages; 272 if (credit > 0) 273 need_sleep = (increase_reservation(credit) != 0); 274 if (credit < 0) 275 need_sleep = (decrease_reservation(-credit) != 0); 276 277 } while ((credit != 0) && !need_sleep); 278 279 /* Schedule more work if there is some still to be done. */ 280 if (current_target() != bs.current_pages) 281 sleep_time = hz; 282 else 283 sleep_time = 0; 284 285 msleep(balloon_process, &balloon_mutex, 0, "balloon", 286 sleep_time); 287 } 288 mtx_unlock(&balloon_mutex); 289} 290 291/* Resets the Xen limit, sets new target, and kicks off processing. */ 292static void 293set_new_target(unsigned long target) 294{ 295 /* No need for lock. Not read-modify-write updates. */ 296 bs.hard_limit = ~0UL; 297 bs.target_pages = max(target, minimum_target()); 298 wakeup(balloon_process); 299} 300 301static struct xs_watch target_watch = 302{ 303 .node = "memory/target", 304 .max_pending = 1, 305}; 306 307/* React to a change in the target key */ 308static void 309watch_target(struct xs_watch *watch, 310 const char **vec, unsigned int len) 311{ 312 unsigned long long new_target; 313 int err; 314 315 err = xs_scanf(XST_NIL, "memory", "target", NULL, 316 "%llu", &new_target); 317 if (err) { 318 /* This is ok (for domain0 at least) - so just return */ 319 return; 320 } 321 322 /* 323 * The given memory/target value is in KiB, so it needs converting to 324 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 325 */ 326 set_new_target(new_target >> KB_TO_PAGE_SHIFT); 327} 328 329/*------------------ Private Device Attachment Functions --------------------*/ 330/** 331 * \brief Identify instances of this device type in the system. 332 * 333 * \param driver The driver performing this identify action. 334 * \param parent The NewBus parent device for any devices this method adds. 335 */ 336static void 337xenballoon_identify(driver_t *driver, device_t parent) 338{ 339 /* 340 * A single device instance for our driver is always present 341 * in a system operating under Xen. 342 */ 343 BUS_ADD_CHILD(parent, 0, driver->name, 0); 344} 345 346/** 347 * \brief Probe for the existence of the Xen Balloon device 348 * 349 * \param dev NewBus device_t for this Xen control instance. 350 * 351 * \return Always returns 0 indicating success. 352 */ 353static int 354xenballoon_probe(device_t dev) 355{ 356 357 device_set_desc(dev, "Xen Balloon Device"); 358 return (0); 359} 360 361/** 362 * \brief Attach the Xen Balloon device. 363 * 364 * \param dev NewBus device_t for this Xen control instance. 365 * 366 * \return On success, 0. Otherwise an errno value indicating the 367 * type of failure. 368 */ 369static int 370xenballoon_attach(device_t dev) 371{ 372 int err; 373 374 mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 375 376 bs.current_pages = realmem; 377 bs.target_pages = bs.current_pages; 378 bs.balloon_low = 0; 379 bs.balloon_high = 0; 380 bs.driver_pages = 0UL; 381 bs.hard_limit = ~0UL; 382 383 kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 384 385 target_watch.callback = watch_target; 386 387 err = xs_register_watch(&target_watch); 388 if (err) 389 device_printf(dev, 390 "xenballon: failed to set balloon watcher\n"); 391 392 return (err); 393} 394 395/*-------------------- Private Device Attachment Data -----------------------*/ 396static device_method_t xenballoon_methods[] = { 397 /* Device interface */ 398 DEVMETHOD(device_identify, xenballoon_identify), 399 DEVMETHOD(device_probe, xenballoon_probe), 400 DEVMETHOD(device_attach, xenballoon_attach), 401 402 DEVMETHOD_END 403}; 404 405DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0); 406 407DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, NULL, NULL); 408