#define CEPH_PG_MAX_SIZE 32 /* max # osds in a single pg */
/* * placement group. * we encode this into one __le64.
*/ struct ceph_pg_v1 {
__le16 preferred; /* preferred primary osd */
__le16 ps; /* placement seed */
__le32 pool; /* object pool */
} __attribute__ ((packed));
/* * pg_pool is a set of pgs storing a pool of objects * * pg_num -- base number of pseudorandomly placed pgs * * pgp_num -- effective number when calculating pg placement. this * is used for pg_num increases. new pgs result in data being "split" * into new pgs. for this to proceed smoothly, new pgs are intiially * colocated with their parents; that is, pgp_num doesn't increase * until the new pgs have successfully split. only _then_ are the new * pgs placed independently. * * lpg_num -- localized pg count (per device). replicas are randomly * selected. * * lpgp_num -- as above.
*/ #define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */
/* * stable_mod func is used to control number of placement groups. * similar to straight-up modulo, but produces a stable mapping as b * increases over time. b is the number of bins, and bmask is the * containing power of 2 minus 1. * * b <= bmask and bmask=(2**n)-1 * e.g., b=12 -> bmask=15, b=123 -> bmask=127
*/ staticinlineint ceph_stable_mod(int x, int b, int bmask)
{ if ((x & bmask) < b) return x & bmask; else return x & (bmask >> 1);
}
/* * object layout - how a given object should be stored.
*/ struct ceph_object_layout { struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */
__le32 ol_stripe_unit; /* for per-object parity, if any */
} __attribute__ ((packed));
/* * compound epoch+version, used by storage layer to serialize mutations
*/ struct ceph_eversion {
__le64 version;
__le32 epoch;
} __attribute__ ((packed));
/* * osd map bits
*/
/* status bits */ #define CEPH_OSD_EXISTS (1<<0) #define CEPH_OSD_UP (1<<1) #define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ #define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */
/* * osd map flag bits
*/ #define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC),
not set since ~luminous */ #define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC),
not set since ~luminous */ #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ #define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ #define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ #define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ #define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ #define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ #define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ #define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */ #define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */ #define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */ #define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */ #define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */ #define CEPH_OSDMAP_REQUIRE_JEWEL (1<<16) /* require jewel for booting osds */ #define CEPH_OSDMAP_REQUIRE_KRAKEN (1<<17) /* require kraken for booting osds */ #define CEPH_OSDMAP_REQUIRE_LUMINOUS (1<<18) /* require l for booting osds */ #define CEPH_OSDMAP_RECOVERY_DELETES (1<<19) /* deletes performed during recovery instead of peering */
/* * The error code to return when an OSD can't handle a write * because it is too large.
*/ #define OSD_WRITETOOBIG EMSGSIZE
/* * osd ops * * WARNING: do not use these op codes directly. Use the helpers * defined below instead. In certain cases, op code behavior was * redefined, resulting in special-cases in the helpers.
*/ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 #define CEPH_OSD_OP_MODE_WR 0x2000 #define CEPH_OSD_OP_MODE_RMW 0x3000 #define CEPH_OSD_OP_MODE_SUB 0x4000 #define CEPH_OSD_OP_MODE_CACHE 0x8000
/* * note that the following tmap stuff is also defined in the ceph librados.h * any modification here needs to be updated there
*/ #define CEPH_OSD_TMAP_HDR 'h' #define CEPH_OSD_TMAP_SET 's' #define CEPH_OSD_TMAP_CREATE 'c'/* create key */ #define CEPH_OSD_TMAP_RM 'r' #define CEPH_OSD_TMAP_RMSLOPPY 'R'
externconstchar *ceph_osd_op_name(int op);
/* * osd op flags * * An op may be READ, WRITE, or READ|WRITE.
*/ enum {
CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */
CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */
CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */
CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */
CEPH_OSD_FLAG_READ = 0x0010, /* op may read */
CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */
CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */
CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */
CEPH_OSD_FLAG_BALANCE_READS = 0x0100,
CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */
CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */
CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */
CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */
CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */
CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */
CEPH_OSD_FLAG_IGNORE_CACHE = 0x8000, /* ignore cache logic */
CEPH_OSD_FLAG_SKIPRWLOCKS = 0x10000, /* skip rw locks */
CEPH_OSD_FLAG_IGNORE_OVERLAY = 0x20000, /* ignore pool overlay */
CEPH_OSD_FLAG_FLUSH = 0x40000, /* this is part of flush */
CEPH_OSD_FLAG_MAP_SNAP_CLONE = 0x80000, /* map snap direct to clone id */
CEPH_OSD_FLAG_ENFORCE_SNAPC = 0x100000, /* use snapc provided even if
pool uses pool snaps */
CEPH_OSD_FLAG_REDIRECTED = 0x200000, /* op has been redirected */
CEPH_OSD_FLAG_KNOWN_REDIR = 0x400000, /* redirect bit is authoritative */
CEPH_OSD_FLAG_FULL_TRY = 0x800000, /* try op despite full flag */
CEPH_OSD_FLAG_FULL_FORCE = 0x1000000, /* force op despite full flag */
};
enum {
CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */
CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */
CEPH_OSD_OP_FLAG_FADVISE_RANDOM = 0x4, /* the op is random */
CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */
CEPH_OSD_OP_FLAG_FADVISE_WILLNEED = 0x10,/* data will be accessed in
the near future */
CEPH_OSD_OP_FLAG_FADVISE_DONTNEED = 0x20,/* data will not be accessed
in the near future */
CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only
once by this client */
};
#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ #define EBLOCKLISTED ESHUTDOWN /* blocklisted */
enum {
CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1, /* part of a flush operation */
CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */
CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4, /* ignore osd cache logic */
CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to
* cloneid */
CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16, /* order with write */
CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ = 32, /* send truncate_{seq,size} */
};
enum {
CEPH_OSD_WATCH_OP_UNWATCH = 0,
CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1, /* note: use only ODD ids to prevent pre-giant code from
interpreting the op as UNWATCH */
CEPH_OSD_WATCH_OP_WATCH = 3,
CEPH_OSD_WATCH_OP_RECONNECT = 5,
CEPH_OSD_WATCH_OP_PING = 7,
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.