/* * Copyright (c) 2016 Hisilicon Limited. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
/* Allocate a key for mr from mr_table */
id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
GFP_KERNEL); if (id < 0) {
ibdev_err(ibdev, "failed to alloc id for MR key, id(%d)\n", id); return -ENOMEM;
}
mr->key = hw_index_to_key(id); /* MR key */
err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table,
(unsignedlong)id); if (err) {
ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err); goto err_free_bitmap;
}
if (mr->enabled) {
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
key_to_hw_index(mr->key) &
(hr_dev->caps.num_mtpts - 1)); if (ret)
ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n",
ret);
}
trace_hns_mr(mr); if (mr->type != MR_TYPE_FRMR)
ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr); else
ret = hr_dev->hw->frmr_write_mtpt(mailbox->buf, mr); if (ret) {
dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page;
}
ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) {
dev_err(dev, "failed to create mpt, ret = %d.\n", ret); goto err_page;
}
staticinlinebool mtr_has_mtt(struct hns_roce_buf_attr *attr)
{ int i;
for (i = 0; i < attr->region_count; i++) if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
attr->region[i].hopnum > 0) returntrue;
/* because the mtr only one root base address, when hopnum is 0 means * root base address equals the first buffer address, thus all alloced * memory must in a continuous space accessed by direct mode.
*/ returnfalse;
}
for (i = 0; i < attr->region_count; i++)
size += attr->region[i].size;
return size;
}
/* * check the given pages in continuous address space * Returns 0 on success, or the error page num.
*/ staticinlineint mtr_check_direct_pages(dma_addr_t *pages, int page_count, unsignedint page_shift)
{
size_t page_size = 1 << page_shift; int i;
for (i = 1; i < page_count; i++) if (pages[i] - pages[i - 1] != page_size) return i;
if (udata) {
mtr->kmem = NULL;
mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
buf_attr->user_access); if (IS_ERR(mtr->umem)) {
ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
PTR_ERR(mtr->umem)); return -ENOMEM;
}
} else {
mtr->umem = NULL;
mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
buf_attr->page_shift,
!mtr_has_mtt(buf_attr) ?
HNS_ROCE_BUF_DIRECT : 0); if (IS_ERR(mtr->kmem)) {
ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
PTR_ERR(mtr->kmem)); return PTR_ERR(mtr->kmem);
}
}
return 0;
}
staticint cal_mtr_pg_cnt(struct hns_roce_mtr *mtr)
{ struct hns_roce_buf_region *region; int page_cnt = 0; int i;
for (i = 0; i < mtr->hem_cfg.region_count; i++) {
region = &mtr->hem_cfg.region[i];
page_cnt += region->count;
}
return page_cnt;
}
staticbool need_split_huge_page(struct hns_roce_mtr *mtr)
{ /* When HEM buffer uses 0-level addressing, the page size is * equal to the whole buffer size. If the current MTR has multiple * regions, we split the buffer into small pages(4k, required by hns * ROCEE). These pages will be used in multiple regions.
*/ return mtr->hem_cfg.is_direct && mtr->hem_cfg.region_count > 1;
}
staticint mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
{ struct ib_device *ibdev = &hr_dev->ib_dev; int page_count = cal_mtr_pg_cnt(mtr); unsignedint page_shift;
dma_addr_t *pages; int npage; int ret;
page_shift = need_split_huge_page(mtr) ? HNS_HW_PAGE_SHIFT :
mtr->hem_cfg.buf_pg_shift; /* alloc a tmp array to store buffer's dma address */
pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); if (!pages) return -ENOMEM;
if (npage != page_count) {
ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
page_count);
ret = -ENOBUFS; goto err_alloc_list;
}
if (need_split_huge_page(mtr) && npage > 1) {
ret = mtr_check_direct_pages(pages, npage, page_shift); if (ret) {
ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
mtr->umem ? "umtr" : "kmtr", ret, npage);
ret = -ENOBUFS; goto err_alloc_list;
}
}
ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); if (ret)
ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
err_alloc_list:
kvfree(pages);
return ret;
}
int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
dma_addr_t *pages, unsignedint page_cnt)
{ struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; unsignedint i, mapped_cnt; int ret = 0;
/* * Only use the first page address as root ba when hopnum is 0, this * is because the addresses of all pages are consecutive in this case.
*/ if (mtr->hem_cfg.is_direct) {
mtr->hem_cfg.root_ba = pages[0]; return 0;
}
for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
mapped_cnt < page_cnt; i++) {
r = &mtr->hem_cfg.region[i];
if (r->offset + r->count > page_cnt) {
ret = -EINVAL;
ibdev_err(ibdev, "failed to check mtr%u count %u + %u > %u.\n",
i, r->offset, r->count, page_cnt); return ret;
}
ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
page_cnt - mapped_cnt); if (ret < 0) {
ibdev_err(ibdev, "failed to map mtr%u offset %u, ret = %d.\n",
i, r->offset, ret); return ret;
}
mapped_cnt += ret;
ret = 0;
}
if (mapped_cnt < page_cnt) {
ret = -ENOBUFS;
ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
mapped_cnt, page_cnt);
}
return ret;
}
staticint hns_roce_get_direct_addr_mtt(struct hns_roce_hem_cfg *cfg,
u32 start_index, u64 *mtt_buf, int mtt_cnt)
{ int mtt_count; int total = 0;
u32 npage;
u64 addr;
if (mtt_cnt > cfg->region_count) return -EINVAL;
for (mtt_count = 0; mtt_count < cfg->region_count && total < mtt_cnt;
mtt_count++) {
npage = cfg->region[mtt_count].offset; if (npage < start_index) continue;
staticint hns_roce_get_mhop_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, u32 start_index,
u64 *mtt_buf, int mtt_cnt)
{ int left = mtt_cnt; int total = 0; int mtt_count;
__le64 *mtts;
u32 npage;
if (!buf_attr->adaptive || buf_attr->type != MTR_PBL) return 0;
/* Caculating the number of buf pages, each buf page need a BA */ if (mtr->umem)
ba_cnt = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz); else
ba_cnt = DIV_ROUND_UP(buf_attr->region[0].size, buf_pg_sz);
for (j = 0; j <= HNS_ROCE_MAX_HOP_NUM; j++) { if (ba_cnt <= unit) {
hop_num = j; break;
} /* Number of BAs can be represented at per hop */
unit *= ba_pg_sz / BA_BYTE_LEN;
}
if (hop_num < 0) {
ibdev_err(ibdev, "failed to calculate a valid hopnum.\n"); return -EINVAL;
}
if (!is_buf_attr_valid(hr_dev, attr)) return -EINVAL;
/* If mtt is disabled, all pages must be within a continuous range */
cfg->is_direct = !mtr_has_mtt(attr);
cfg->region_count = attr->region_count;
buf_size = mtr_bufs_size(attr); if (need_split_huge_page(mtr)) {
buf_pg_sz = HNS_HW_PAGE_SIZE;
cfg->buf_pg_count = 1; /* The ROCEE requires the page size to be 4K * 2 ^ N. */
cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
} else {
buf_pg_sz = 1 << attr->page_shift;
cfg->buf_pg_count = mtr->umem ?
ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz) :
DIV_ROUND_UP(buf_size, buf_pg_sz);
cfg->buf_pg_shift = attr->page_shift;
pgoff = mtr->umem ? mtr->umem->address & ~PAGE_MASK : 0;
}
/* Convert buffer size to page index and page count for each region and * the buffer's offset needs to be appended to the first region.
*/ for (page_cnt = 0, i = 0; i < attr->region_count; i++) {
r = &cfg->region[i];
r->offset = page_cnt;
buf_size = hr_hw_page_align(attr->region[i].size + pgoff); if (attr->type == MTR_PBL && mtr->umem)
r->count = ib_umem_num_dma_blocks(mtr->umem, buf_pg_sz); else
r->count = DIV_ROUND_UP(buf_size, buf_pg_sz);
ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
ba_num = 0; for (i = 0; i < mtr->hem_cfg.region_count; i++) {
re = &mtr->hem_cfg.region[i]; if (re->hopnum == 0) continue;
/** * hns_roce_mtr_create - Create hns memory translate region. * * @hr_dev: RoCE device struct pointer * @mtr: memory translate region * @buf_attr: buffer attribute for creating mtr * @ba_page_shift: page shift for multi-hop base address table * @udata: user space context, if it's NULL, means kernel space * @user_addr: userspace virtual address to start at
*/ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct hns_roce_buf_attr *buf_attr, unsignedint ba_page_shift, struct ib_udata *udata, unsignedlong user_addr)
{ struct ib_device *ibdev = &hr_dev->ib_dev; int ret;
trace_hns_buf_attr(buf_attr); /* The caller has its own buffer list and invokes the hns_roce_mtr_map() * to finish the MTT configuration.
*/ if (buf_attr->mtt_only) {
mtr->umem = NULL;
mtr->kmem = NULL;
} else {
ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); if (ret) {
ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); return ret;
}
ret = get_best_page_shift(hr_dev, mtr, buf_attr); if (ret) goto err_init_buf;
ret = get_best_hop_num(hr_dev, mtr, buf_attr, ba_page_shift); if (ret) goto err_init_buf;
}
ret = mtr_init_buf_cfg(hr_dev, mtr, buf_attr); if (ret) goto err_init_buf;
ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); if (ret) {
ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret); goto err_init_buf;
}
if (buf_attr->mtt_only) return 0;
/* Write buffer's dma address to MTT */
ret = mtr_map_bufs(hr_dev, mtr); if (ret) {
ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); goto err_alloc_mtt;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.