主要介绍怎样将crypto hardware accelerate 移植到 linux crypto api中 参考项主要是 stm32的实现
crypto中移植aes 引擎 linux中 hardware ip 是以驱动的方式加载到kernel中的 先看一下驱动的定义
1 2 3 4 5 6 7 8 9 10 11 12 13 14 static const struct of_device_id stm32_dt_ids [] = { { .compatible = "st,stm32f756-cryp" , .data = &f7_data}, { .compatible = "st,stm32mp1-cryp" , .data = &mp1_data}, {}, }; static struct platform_driver stm32_cryp_driver = { .probe = stm32_cryp_probe, .remove = stm32_cryp_remove, .driver = { .name = DRIVER_NAME, .pm = &stm32_cryp_pm_ops, .of_match_table = stm32_dt_ids, }, };
stm32的加密引擎是作为platform driver.
1 2 3 4 5 6 7 13 crypto@50060000 {14 compatible = "st,stm32f756-cryp" ;15 reg = <0x50060000 0x400 >;16 interrupts = <79 >;17 clocks = <&rcc 0 STM32F7_AHB2_CLOCK(CRYP)>;18 resets = <&rcc STM32F7_AHB2_RESET(CRYP)>;19 };
通过platform_driver_register
或者 module_init
注册驱动时, 会将driver 和设备绑定, 自动回调probe接口 一个driver可以同多个设备绑定, 每次绑定都会回调probe接口.
寻找设备的过程, 一般就是在device tree 中通过 of_match_table 寻找匹配的设备的过程. 匹配到后, 根据device tree中的设备信息初始化 driver中dev相关的结构体信息
probe 注册过程 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 static int stm32_cryp_probe (struct platform_device *pdev) { struct device *dev = &pdev->dev; struct stm32_cryp *cryp ; struct reset_control *rst ; int irq, ret; cryp = devm_kzalloc(dev, sizeof (*cryp), GFP_KERNEL); if (!cryp) return -ENOMEM; cryp->caps = of_device_get_match_data(dev); if (!cryp->caps) return -ENODEV; cryp->dev = dev; cryp->regs = devm_platform_ioremap_resource(pdev, 0 ); if (IS_ERR(cryp->regs)) return PTR_ERR(cryp->regs); irq = platform_get_irq(pdev, 0 ); if (irq < 0 ) return irq; ret = devm_request_threaded_irq(dev, irq, stm32_cryp_irq, stm32_cryp_irq_thread, IRQF_ONESHOT, dev_name(dev), cryp); if (ret) { dev_err(dev, "Cannot grab IRQ\n" ); return ret; } cryp->clk = devm_clk_get(dev, NULL ); if (IS_ERR(cryp->clk)) { dev_err(dev, "Could not get clock\n" ); return PTR_ERR(cryp->clk); } ret = clk_prepare_enable(cryp->clk); if (ret) { dev_err(cryp->dev, "Failed to enable clock\n" ); return ret; } pm_runtime... rst = devm_reset_control_get(dev, NULL ); if (!IS_ERR(rst)) { reset_control_assert(rst); udelay(2 ); reset_control_deassert(rst); } platform_set_drvdata(pdev, cryp); spin_lock(&cryp_list.lock); list_add(&cryp->list , &cryp_list.dev_list); spin_unlock(&cryp_list.lock); cryp->engine = crypto_engine_alloc_init(dev, 1 ); if (!cryp->engine) { dev_err(dev, "Could not init crypto engine\n" ); ret = -ENOMEM; goto err_engine1; } ret = crypto_engine_start(cryp->engine); if (ret) { dev_err(dev, "Could not start crypto engine\n" ); goto err_engine2; } ret = crypto_register_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs)); if (ret) { dev_err(dev, "Could not register algs\n" ); goto err_algs; } ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs)); if (ret) goto err_aead_algs; dev_info(dev, "Initialized\n" ); return 0 ; ... return ret; }
1.1 crypto_engine_alloc_init –> crypto_engine_alloc_init
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 struct crypto_engine *crypto_engine_alloc_init (struct device *dev, bool rt) { struct sched_param param = { .sched_priority = MAX_RT_PRIO / 2 }; struct crypto_engine *engine ; if (!dev) return NULL ; engine = devm_kzalloc(dev, sizeof (*engine), GFP_KERNEL); if (!engine) return NULL ; engine->dev = dev; engine->rt = rt; engine->running = false ; engine->busy = false ; engine->idling = false ; engine->cur_req_prepared = false ; engine->priv_data = dev; snprintf (engine->name, sizeof (engine->name), "%s-engine" , dev_name(dev)); crypto_init_queue(&engine->queue , CRYPTO_ENGINE_MAX_QLEN); spin_lock_init(&engine->queue_lock); engine->kworker = kthread_create_worker(0 , "%s" , engine->name); kthread_init_work(&engine->pump_requests, crypto_pump_work); if (engine->rt) { dev_info(dev, "will run requests pump with realtime priority\n" ); sched_setscheduler(engine->kworker->task, SCHED_FIFO, ¶m); } return engine; }
kthread_worker 与 kthread_work 内核线程创建函数创建一个内核线程,它判断属于这个线程的kthread_worker中是否有要处理的kthread_work,如果有,就取出这个kthread_work,然后调用kthread_work上面指定的处理函数,如果没有这个线程就进行休眠,当有新的kthread_work添加到kthread_worker上时,会再次唤醒kthread_worker的处理线程重复上述工作。
engine->pump_requests 是 kthread_work, crypto_pump_work 则是该work上的指定的处理函数 engine->kworker 是kthread_worker
上面crypto_engine_alloc_init的过程只是初始化了crypto engine相关的数据, 分配了内存, 创建了worker与work, 但worker上还没有work, worker与work并没有关联起来
1.2 crypto_engine_start(cryp->engine) –> 1.2 crypto_engine_start
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 int crypto_engine_start (struct crypto_engine *engine) { unsigned long flags; spin_lock_irqsave(&engine->queue_lock, flags); if (engine->running || engine->busy) { spin_unlock_irqrestore(&engine->queue_lock, flags); return -EBUSY; } engine->running = true ; spin_unlock_irqrestore(&engine->queue_lock, flags); kthread_queue_work(engine->kworker, &engine->pump_requests); return 0 ; }
crypto_pump_work 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 static void crypto_pump_work (struct kthread_work *work) { struct crypto_engine *engine = container_of(work, struct crypto_engine, pump_requests); crypto_pump_requests(engine, true ); } static void crypto_pump_requests (struct crypto_engine *engine, bool in_kthread) { struct crypto_async_request *async_req , *backlog ; struct crypto_engine_ctx *enginectx ; spin_lock_irqsave(&engine->queue_lock, flags); if (engine->idling) { kthread_queue_work(engine->kworker, &engine->pump_requests); goto out; } if (!crypto_queue_len(&engine->queue ) || !engine->running) { } ... }
这里先知道当work被推给worker后, worker 会执行work上指定的处理函数, 该处理函数主要的作用就是处理engine->queue上的消息队列即可, 至于消息是怎样推到queue上的以及queue的结构是什么样的, 后面等到有消息处理时再分解
1.3 crypto_register_skciphers –> 1.3 crypto_register_skciphers
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 static struct skcipher_alg crypto_algs [] = {{ .base.cra_name = "ecb(aes)" , .base.cra_driver_name = "stm32-ecb-aes" , .base.cra_priority = 200 , .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof (struct stm32_cryp_ctx), .base.cra_alignmask = 0xf , .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .setkey = stm32_cryp_aes_setkey, .encrypt = stm32_cryp_aes_ecb_encrypt, .decrypt = stm32_cryp_aes_ecb_decrypt, }, crypto_register_skcipher(&algs[i]); skcipher_prepare_alg(alg); crypto_register_alg(alg->base); ...
遍历 crypto_algs数组 注册crypto 算法, 这里用了linux crypto api的通用结构, 目的估计是向系统中注册对应的算法, kernel中的其他模块可以通过名字找到该算法, 调用其对应的init setkey encrypt等函数指针crypto_skcipher_init_tfm
crypto_alloc_skcipher
crypto_skcipher_setkey
crypto_skcipher_encrypt
crypto_skcipher_decrypt
crypto_alloc_skcipher 通过 alg_name 查找到注册的算法, 最后会调用其init的函数指针.
1 2 3 4 5 6 7 8 9 10 11 12 struct crypto_skcipher *crypto_alloc_skcipher (const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask); } + --+ crypto_alloc_tfm \ -- + crypto_find_alg(alg_name, frontend, type, mask) \ -- crypto_alg_mod_lookup(alg_name, type, mask) | -- + crypto_create_tfm(alg, frontend) \ -- + frontend->init_tfm(tfm) === crypto_skcipher_type--> init_tfm === crypto_skcipher_init_tfm \ -- alg->init(skcipher) === stm32_cryp_init_tfm | -- alg->cra_init(tfm)
这里需要顺带看下 init 做了什么
stm32_cryp_init_tfm 1 2 3 4 5 6 7 8 9 10 11 12 static int stm32_cryp_init_tfm (struct crypto_skcipher *tfm) { struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_skcipher_set_reqsize(tfm, sizeof (struct stm32_cryp_reqctx)); ctx->enginectx.op.do_one_request = stm32_cryp_cipher_one_req; ctx->enginectx.op.prepare_request = stm32_cryp_prepare_cipher_req; ctx->enginectx.op.unprepare_request = NULL ; return 0 ; }
1.4 crypto_register_aeads –> 1.4 crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs)); 注册aead_algs(带关联认证的加密算法), 这里不展开了, 同 crypto_register_skciphers 过程差不多, 只是要注意为什么区分成了 aeads, type由 crypto_skcipher_type
变成了crypto_aead_type
有需要再看, 这里CCM GCM 需要通过这个接口实现
request 处理 crypto_skcipher_encrypt 跟一下这个过程, 看下request 路由
1 2 3 4 5 6 7 8 struct skcipher_request { unsigned int cryptlen; u8 *iv; struct scatterlist *src ; struct scatterlist *dst ; struct crypto_async_request base ; void *__ctx[] CRYPTO_MINALIGN_ATTR; };
skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); request 封装包含 len ,src dst,iv 等. key并不在里面
1 2 3 4 5 6 7 8 9 10 --+ skcipher_request_set_crypt \--+ crypto_skcipher_alg(tfm) ->encrypt(req) ; \--+ stm32_cryp_aes_ecb_encrypt(req) \--+ stm32_cryp_crypt(req, FLG_AES | FLG_ECB | FLG_ENCRYPT) ; \ -- 找到 之前注册的 stm32_cryp_ctx, 并从 cryp_list 中找到probe时添加的cryp结构体指针, 最终赋值给 stm32_cryp_ctx 的ctx->cryp |--+ crypto_transfer_skcipher_request_to_engine(cryp->engine, req) \--+ crypto_transfer_request_to_engine(engine, &req->base) \--+ crypto_transfer_request(engine, req, true) \-- crypto_enqueue_request(&engine->queue, req) #"入队" 见 --> 2.1 入队分析 |-- kthread_queue_work(engine->kworker, &engine->pump_requests); #"work 推给worker"
crypto_enqueue_request –> 2.1 入队分析
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 int crypto_enqueue_request (struct crypto_queue *queue , struct crypto_async_request *request) { int err = -EINPROGRESS; if (unlikely(queue ->qlen >= queue ->max_qlen)) { if (!(request->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { err = -ENOSPC; goto out; } err = -EBUSY; if (queue ->backlog == &queue ->list ) queue ->backlog = &request->list ; } queue ->qlen++; list_add_tail(&request->list , &queue ->list ); out: return err; }
此时因为work推给了worker, worker会运行 work的处理函数 crypto_pump_work 接着分析crypto_pump_work的后半部分
crypto_pump_work 后半部分 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 backlog = crypto_get_backlog(&engine->queue ); async_req = crypto_dequeue_request(&engine->queue ); if (!async_req) goto out; engine->cur_req = async_req; if (backlog) backlog->complete(backlog, -EINPROGRESS); if (engine->busy) was_busy = true ; else engine->busy = true ; spin_unlock_irqrestore(&engine->queue_lock, flags); if (!was_busy && engine->prepare_crypt_hardware) { ret = engine->prepare_crypt_hardware(engine); } enginectx = crypto_tfm_ctx(async_req->tfm); if (enginectx->op.prepare_request) { ret = enginectx->op.prepare_request(engine, async_req); ... engine->cur_req_prepared = true ; } ret = enginectx->op.do_one_request(engine, async_req); ... return ; req_err: crypto_finalize_request(engine, async_req, ret); return ; out: spin_unlock_irqrestore(&engine->queue_lock, flags);
stm32_cryp_prepare_cipher_req 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 static int stm32_cryp_prepare_req (struct skcipher_request *req, struct aead_request *areq) { rctx->mode &= FLG_MODE_MASK; ctx->cryp = cryp; cryp->flags = (cryp->flags & ~FLG_MODE_MASK) | rctx->mode; cryp->hw_blocksize = is_aes(cryp) ? AES_BLOCK_SIZE : DES_BLOCK_SIZE; cryp->ctx = ctx; if (req) { cryp->req = req; cryp->areq = NULL ; cryp->total_in = req->cryptlen; cryp->total_out = cryp->total_in; } cryp->in_sg = req ? req->src : areq->src; cryp->out_sg = req ? req->dst : areq->dst; cryp->out_sg_save = cryp->out_sg; ret = stm32_cryp_hw_init(cryp); }
stm32_cryp_cipher_one_req 1 2 3 4 5 6 7 8 9 10 11 static int stm32_cryp_cipher_one_req (struct crypto_engine *engine, void *areq) { struct skcipher_request *req = container_of(areq, struct skcipher_request, base); struct stm32_cryp_ctx *ctx = crypto_skcipher_ctx( crypto_skcipher_reqtfm(req)); struct stm32_cryp *cryp = ctx->cryp; return stm32_cryp_cpu_start(cryp); }
中断处理 接下来需要看下中断怎么处理 cryp 硬件的执行流的 在probe 时设置了中断处理函数为 stm32_cryp_irq
1 2 3 ret = devm_request_threaded_irq(dev, irq, stm32_cryp_irq, stm32_cryp_irq_thread, IRQF_ONESHOT, dev_name(dev), cryp);
irq_handler 为 stm32_cryp_irq
, thread_fn为stm32_cryp_irq_thread
这里分中断上半部, 下半部, 中断到来后由irq_handler -> stm32_cryp_irq
处理中断, 设置cryp->irq_status = stm32_cryp_read(cryp, CRYP_MISR)
返回 IRQ_WAKE_THREAD
, 唤醒中断处理线程, 到下半部, 由thread_fn -> stm32_cryp_irq_thread
处理后续的工作
stm32_cryp_irq_thread 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 static irqreturn_t stm32_cryp_irq_thread (int irq, void *arg) { struct stm32_cryp *cryp = arg; u32 ph; if (cryp->irq_status & MISR_OUT) if (unlikely(stm32_cryp_irq_read_data(cryp))) { stm32_cryp_write(cryp, CRYP_IMSCR, 0 ); stm32_cryp_finish_req(cryp, 0 ); return IRQ_HANDLED; } if (cryp->irq_status & MISR_IN) { ... } else { stm32_cryp_irq_write_data(cryp); } } return IRQ_HANDLED; }
可见这里并没有用dma的方式
使用dma的例子 ux500 这里只提一点, dma的callback 并不是用的当前硬件ip的中断, dma每个通道都绑定了一个中断处理函数, 应该是这个处理函数 处理的callback
1 2 3 4 5 6 7 8 desc = dmaengine_prep_slave_sg(channel, ctx->device->dma.sg_dst, ctx->device->dma.sg_dst_len, DMA_DEV_TO_MEM, DMA_CTRL_ACK | DMA_PREP_INTERRUPT); desc->callback = cryp_dma_out_callback;
callback调用 complete 函数, 唤醒之前因调用wait_for_completion
而阻塞的线程
1 2 3 4 5 6 7 static void cryp_dma_out_callback (void *data) { struct cryp_ctx *ctx = (struct cryp_ctx *) data; dev_dbg(ctx->device->dev, "[%s]: " , __func__); complete(&ctx->device->dma.cryp_dma_complete); }
在 ablk_dma_crypt
函数中, 设置完dma 传输后, 最终设置了wait_for_completion(&ctx->device->dma.cryp_dma_complete);
阻塞了当前线程, 在dma完成传输后会调用对应channel的中断处理函数, 调用callback 中的 complete 唤醒当前线程. 当然callback需要自己设置.
君正x2000的aes 驱动 使用dma 时要注意物理地址与虚拟地址之间的转换, 因为DMA只认识物理地址.
看下x2000 的aes驱动的实现全是dma方案.
ecb 加密 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 static int ingenic_aes_start (struct ingenic_aes_dev *aes, struct ablkcipher_request *req) { spin_lock_irqsave(&aes->lock, flags); ret = ablkcipher_enqueue_request(&aes->queue , req); if (aes->flags & FLAGS_BUSY) { spin_unlock_irqrestore(&aes->lock, flags); return ret; } async_req = crypto_dequeue_request(&aes->queue ); if (async_req) aes->flags |= FLAGS_BUSY; sin_unlock_irqrestore(&aes->lock, flags); req = ablkcipher_request_cast(async_req); aes->req = req; aes->total = req->nbytes; aes->in_offset = 0 ; aes->in_sg = req->src; aes->out_offset = 0 ; aes->out_sg = req->dst; ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); aes->ctx = ctx; ctx->aes = aes; err = ingenic_aes_write_ctrl(aes); err = ingenic_aes_crypt_dma_start(aes); ingenic_aes_stop(aes); ingenic_aes_finish_req(aes, err); } 1.1 --> ingenic_aes_crypt_dma_start(aes);static int ingenic_aes_crypt_dma_start (struct ingenic_aes_dev *aes) { struct scatterlist *in_sg , *out_sg ; int len32; if (sg_is_last(aes->in_sg) && sg_is_last(aes->out_sg)) { in = IS_ALIGNED((unsigned long )aes->in_sg->offset, sizeof (unsigned long )); out = IS_ALIGNED((unsigned long )aes->out_sg->offset, sizeof (unsigned long )); fast = in && out; } if (fast) { count = min(aes->total, sg_dma_len(aes->in_sg)); count = min(count, sg_dma_len(aes->out_sg)); prep_sgdata(aes->in_sg); err = dma_map_sg(aes->dev, aes->in_sg, 1 , DMA_TO_DEVICE); err = dma_map_sg(aes->dev, aes->out_sg, 1 , DMA_FROM_DEVICE); addr_in = sg_dma_address(aes->in_sg); addr_out = sg_dma_address(aes->out_sg); in_sg = aes->in_sg; out_sg = aes->out_sg; aes->flags |= FLAGS_FAST; } else { count = sg_copy(&aes->in_sg, &aes->in_offset, aes->buf_in, aes->buflen, aes->total, 0 ); len32 = DIV_ROUND_UP(count, DMA_MIN) * DMA_MIN; sg_init_one(&aes->in_sgl,aes->buf_in,len32); sg_dma_len(&aes->in_sgl) = len32; sg_dma_address(&aes->in_sgl) = aes->dma_addr_in; sg_init_one(&aes->out_sgl,aes->buf_out,len32); sg_dma_len(&aes->out_sgl) = len32; sg_dma_address(&aes->out_sgl) = aes->dma_addr_out; in_sg = &aes->in_sgl; out_sg = &aes->out_sgl; addr_in = aes->dma_addr_in; addr_out = aes->dma_addr_out; prep_sgdata(in_sg); aes->flags &= ~FLAGS_FAST; dma_cache_sync(aes->dev, sg_virt(in_sg), len32,DMA_TO_DEVICE); dma_cache_sync(aes->dev, sg_virt(out_sg),len32,DMA_FROM_DEVICE); } aes->total -= count; err = ingenic_aes_crypt_dma(tfm, in_sg, out_sg); if (err) { dma_unmap_sg(aes->dev, aes->in_sg, 1 , DMA_TO_DEVICE); dma_unmap_sg(aes->dev, aes->out_sg, 1 , DMA_TO_DEVICE); } return err; }
这个地方有疑问的一点, aes->in_sgl aes->buf_in aes->out_sgl aes->buf_out, 这几个实际上只映射了一个page, 那传的数据量很大怎么办呢? 其实这个处理是在中断中做的, 在aes 处理完毕后, 会发送中断, 在中断服务程序中, 就必须处理接下来的数据请求 这里也很好的体现了流式数据的处理策略
接着看一下中断的处理:
处理dma 中断 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 static irqreturn_t ingenic_aec_irqthread (int irq, void *data) { val = aes_read(aes,AES_ASSR); mask = aes_read(aes,AES_ASINTM); val = val & mask; if (val & 4 ){ err = ingenic_aes_crypt_dma_stop(aes); aes_write(aes,AES_ASSR,4 ); if (aes->total && !err) { err = ingenic_aes_crypt_dma_start(aes); if (!err) return IRQ_HANDLED; } ingenic_aes_finish_req(aes, err); ingenic_aes_start(aes,NULL ); } } 2.1 --> static int ingenic_aes_crypt_dma_stop (struct ingenic_aes_dev *aes) { if (aes->flags & FLAGS_FAST) { dma_unmap_sg(aes->dev, aes->out_sg, 1 , DMA_FROM_DEVICE); prep_sgdata(aes->out_sg); dma_unmap_sg(aes->dev, aes->in_sg, 1 , DMA_TO_DEVICE); } else { dma_sync_single_for_device(aes->dev, aes->dma_addr_out, aes->dma_size, DMA_FROM_DEVICE); prep_sgdata(&aes->out_sgl); count = sg_copy(&aes->out_sg, &aes->out_offset, aes->buf_out, aes->buflen, aes->dma_size, 1 ); } }
用户空间使用 AF_ALG socket https://www.kernel.org/doc/html/v4.19/crypto/userspace-if.html
The kernel crypto API is accessible from user space. Currently, the following ciphers are accessible:
Message digest including keyed message digest (HMAC, CMAC)
Symmetric ciphers
AEAD ciphers
Random Number Generators
使用对称加密算法的例子:
1 2 3 4 5 struct sockaddr_alg sa = { .salg_family = AF_ALG, .salg_type = "skcipher" , .salg_name = "cbc(aes)" };
cryptodev-linux cryptodev-linux 是kernel 原生CryptoApi 提供给用户空间使用的可扩展基座. openssl 编译时通过 加入 -DHAVE_CRYPTODEV
和 -DUSE_CRYPTODEV_DIGESTS
编译选项支持通过cryptodev访问kernel cryptoapi 来进行 hash 加密等, 注意这种方式也不支持非对称加密
openssl 参考 openssl 使用afalg引擎 openssl speed -evp aes-128-cbc -engine afalg -elapsed
代码分析 afalg 作为插件使用, 其实现是在engines/e_afalg.c 中, 编译成 afalg.so 放在/usr/lib/engines-1.1/下
调用setup_engine, 对engine 进行初始化, load 该engine的库
使用时, 需要调用 EVP_CipherInit_ex
获得engine 对应的ciper
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 impl = ENGINE_get_cipher_engine(cipher->nid); const EVP_CIPHER *c = ENGINE_get_cipher(impl, cipher->nid);ctx->cipher = cipher = c; >>> p *cipher $7 = { nid = 419 , block_size = 16 , key_len = 16 , iv_len = 16 , flags = 4098 , init = 0xffffb53bd804 <afalg_cipher_init>, do_cipher = 0xffffb53bd9a4 <afalg_do_cipher>, cleanup = 0xffffb53bdb2c <afalg_cipher_cleanup>, ctx_size = 128 , set_asn1_parameters = 0x0 , get_asn1_parameters = 0x0 , ctrl = 0x0 , app_data = 0x0 } const EVP_CIPHER *ENGINE_get_cipher(ENGINE *e, int nid){ const EVP_CIPHER *ret; ENGINE_CIPHERS_PTR fn = ENGINE_get_ciphers(e); if (!fn || !fn(e, &ret, NULL , nid)) { ENGINEerr(ENGINE_F_ENGINE_GET_CIPHER, ENGINE_R_UNIMPLEMENTED_CIPHER); return NULL ; } return ret; }
afalg_ciphers
是在 setup_engine 阶段bind的
1 2 3 4 5 6 7 8 9 10 11 12 #0 bind_afalg (e=0x19209700 ) at engines/e_afalg.c:715 #1 0x0000ffffa5b72fd4 in bind_helper (e=0x19209700 , id=0x19209860 "afalg" ) at engines/e_afalg.c:755 #2 0x0000ffffa5b73080 in bind_engine (e=0x19209700 , id=0x19209860 "afalg" , fns=0xffffe9357908 ) at engines/e_afalg.c:761 #3 0x0000ffffa5eef3bc in dynamic_load (e=0x19209700 , ctx=0x1920c000 ) at crypto/engine/eng_dyn.c:480 #4 0x0000ffffa5eeee80 in dynamic_ctrl (e=0x19209700 , cmd=206 , i=0 , p=0x0 , f=0x0 ) at crypto/engine/eng_dyn.c:336 #5 0x0000ffffa5eee154 in ENGINE_ctrl (e=0x19209700 , cmd=206 , i=0 , p=0x0 , f=0x0 ) at crypto/engine/eng_ctrl.c:174 #6 0x0000ffffa5eee4ac in ENGINE_ctrl_cmd_string (e=0x19209700 , cmd_name=0xffffa5ff7010 "LOAD" , arg=0x0 , cmd_optional=0 ) at crypto/engine/eng_ctrl.c:289 #7 0x0000ffffa5ef1290 in ENGINE_by_id (id=0xffffe9359f32 "afalg" ) at crypto/engine/eng_list.c:328 #8 0x0000000000470a54 in setup_engine (engine=0xffffe9359f32 "afalg" , debug=0 ) at apps/apps.c:1300 #9 0x000000000045c978 in speed_main (argc=0 , argv=0xffffe9359b40 ) at apps/speed.c:1814 #10 0x00000000004380a0 in do_cmd (prog=0x19207940 , argc=6 , argv=0xffffe9359b10 ) at apps/openssl.c:570 #11 0x0000000000437278 in main (argc=6 , argv=0xffffe9359b10 ) at apps/openssl.c:189
后面所有的运算, 都需要传递ctx, 从ctx 中解出ciper来, ciper 封装了具体的操作, 对应了engine的能力. 同linux crypto api的对接, 是engine的主要工作.
其中ciper中的init
指向 afalg_cipher_init
, 这个里面afalg_create_sk
会建立同kernel crypto api的socket
1 从目前实现的代码来看, 这个afalg只支持aes-cbc-128 aes-cbc-192 aes-cbc-256
1 2 3 4 5 6 7 8 9 10 11 12 ciphertype = EVP_CIPHER_CTX_nid(ctx); switch (ciphertype) {case NID_aes_128_cbc:case NID_aes_192_cbc:case NID_aes_256_cbc: ciphername = "cbc(aes)" ; break ; default : ALG_WARN("%s(%d): Unsupported Cipher type %d\n" , __FILE__, __LINE__, ciphertype); return 0 ; }
支持能力是在load 阶段会调用ENGINE_set_default_ciphers
查询engine 支持的算法
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 int ENGINE_set_default_ciphers (ENGINE *e) { if (e->ciphers) { const int *nids; int num_nids = e->ciphers(e, NULL , &nids, 0 ); if (num_nids > 0 ) return engine_table_register(&cipher_table, engine_unregister_all_ciphers, e, nids, num_nids, 1 ); } return 1 ; } #0 ENGINE_set_default_ciphers (e=0x36e4d700 ) at crypto/engine/tb_cipher.c:50 #1 0x0000ffffb687a4ec in ENGINE_set_default (e=0x36e4d700 , flags=65535 ) at crypto/engine/eng_fat.c:16 #2 0x0000000000470b1c in setup_engine (engine=0xffffe3d3df32 "afalg" , debug=0 ) at apps/apps.c:1310 #3 0x000000000045c978 in speed_main (argc=0 , argv=0xffffe3d3d840 ) at apps/speed.c:1814 #4 0x00000000004380a0 in do_cmd (prog=0x36e4b940 , argc=6 , argv=0xffffe3d3d810 ) at apps/openssl.c:570 #5 0x0000000000437278 in main (argc=6 , argv=0xffffe3d3d810 ) at apps/openssl.c:189
如果不支持某个算法, 则通过impl = ENGINE_get_cipher_engine(cipher->nid);
查询会返回空 所有支持的算法会注册到全局的链表中, ENGINE_get_cipher_engine 就是通过查询这个全局的链表得到的哪个engine支持这个nid标识的算法.