2025-05-10 21:58:58 +08:00

464 lines
12 KiB
C

/*
* Rockchip VPU codec driver
*
* Copyright (C) 2014 Google, Inc.
* Tomasz Figa <tfiga@chromium.org>
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
* may be copied, distributed, and modified under those terms.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include "rockchip_vpu_common.h"
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/dma-iommu.h>
/* Various parameters specific to VP8 encoder. */
#define VP8_KEY_FRAME_HDR_SIZE 10
#define VP8_INTER_FRAME_HDR_SIZE 3
#define VP8_FRAME_TAG_KEY_FRAME_BIT BIT(0)
#define VP8_FRAME_TAG_LENGTH_SHIFT 5
#define VP8_FRAME_TAG_LENGTH_MASK (0x7ffff << 5)
/**
* struct rockchip_vpu_codec_ops - codec mode specific operations
*
* @codec_mode: Codec mode related to this format. See
* enum rockchip_vpu_codec_mode.
* @init: Prepare for streaming. Called from VB2 .start_streaming()
* when streaming from both queues is being enabled.
* @exit: Clean-up after streaming. Called from VB2 .stop_streaming()
* when streaming from first of both enabled queues is being
* disabled.
* @irq: Handle {en,de}code irq. Check and clear interrupt.
* @run: Start single {en,de)coding run. Called from non-atomic context
* to indicate that a pair of buffers is ready and the hardware
* should be programmed and started.
* @done: Read back processing results and additional data from hardware.
* @reset: Reset the hardware in case of a timeout.
*/
struct rockchip_vpu_codec_ops {
enum rockchip_vpu_codec_mode codec_mode;
int (*init)(struct rockchip_vpu_ctx *);
void (*exit)(struct rockchip_vpu_ctx *);
int (*irq)(int, struct rockchip_vpu_dev *);
void (*run)(struct rockchip_vpu_ctx *);
void (*done)(struct rockchip_vpu_ctx *, enum vb2_buffer_state);
void (*reset)(struct rockchip_vpu_ctx *);
};
/*
* Hardware control routines.
*/
void rockchip_vpu_power_on(struct rockchip_vpu_dev *vpu)
{
vpu_debug_enter();
/* TODO: Clock gating. */
pm_runtime_get_sync(vpu->dev);
vpu_debug_leave();
}
static void rockchip_vpu_power_off(struct rockchip_vpu_dev *vpu)
{
vpu_debug_enter();
pm_runtime_mark_last_busy(vpu->dev);
pm_runtime_put_autosuspend(vpu->dev);
/* TODO: Clock gating. */
vpu_debug_leave();
}
/*
* Interrupt handlers.
*/
static irqreturn_t vepu_irq(int irq, void *dev_id)
{
struct rockchip_vpu_dev *vpu = dev_id;
struct rockchip_vpu_ctx *ctx = vpu->current_ctx;
if (!ctx->hw.codec_ops->irq(irq, vpu)) {
rockchip_vpu_power_off(vpu);
cancel_delayed_work(&vpu->watchdog_work);
ctx->hw.codec_ops->done(ctx, VB2_BUF_STATE_DONE);
}
return IRQ_HANDLED;
}
static irqreturn_t vdpu_irq(int irq, void *dev_id)
{
struct rockchip_vpu_dev *vpu = dev_id;
struct rockchip_vpu_ctx *ctx = vpu->current_ctx;
if (!ctx->hw.codec_ops->irq(irq, vpu)) {
rockchip_vpu_power_off(vpu);
cancel_delayed_work(&vpu->watchdog_work);
ctx->hw.codec_ops->done(ctx, VB2_BUF_STATE_DONE);
}
return IRQ_HANDLED;
}
static void rockchip_vpu_watchdog(struct work_struct *work)
{
struct rockchip_vpu_dev *vpu = container_of(to_delayed_work(work),
struct rockchip_vpu_dev, watchdog_work);
struct rockchip_vpu_ctx *ctx = vpu->current_ctx;
unsigned long flags;
spin_lock_irqsave(&vpu->irqlock, flags);
ctx->hw.codec_ops->reset(ctx);
spin_unlock_irqrestore(&vpu->irqlock, flags);
vpu_err("frame processing timed out!\n");
rockchip_vpu_power_off(vpu);
ctx->hw.codec_ops->done(ctx, VB2_BUF_STATE_ERROR);
}
/*
* Initialization/clean-up.
*/
#if defined(CONFIG_ROCKCHIP_IOMMU)
static int rockchip_vpu_iommu_init(struct rockchip_vpu_dev *vpu)
{
int ret;
vpu->dev->dma_parms = devm_kzalloc(vpu->dev,
sizeof(*vpu->dev->dma_parms),
GFP_KERNEL);
if (!vpu->dev->dma_parms)
return -ENOMEM;
vpu->domain = iommu_domain_alloc(vpu->dev->bus);
if (!vpu->domain) {
ret = -ENOMEM;
goto err_free_parms;
}
ret = iommu_get_dma_cookie(vpu->domain);
if (ret)
goto err_free_domain;
ret = dma_set_coherent_mask(vpu->dev, DMA_BIT_MASK(32));
if (ret)
goto err_put_cookie;
dma_set_max_seg_size(vpu->dev, DMA_BIT_MASK(32));
ret = iommu_attach_device(vpu->domain, vpu->dev);
if (ret)
goto err_put_cookie;
common_iommu_setup_dma_ops(vpu->dev, 0x10000000, SZ_2G,
vpu->domain->ops);
return 0;
err_put_cookie:
iommu_put_dma_cookie(vpu->domain);
err_free_domain:
iommu_domain_free(vpu->domain);
err_free_parms:
return ret;
}
static void rockchip_vpu_iommu_cleanup(struct rockchip_vpu_dev *vpu)
{
iommu_detach_device(vpu->domain, vpu->dev);
iommu_put_dma_cookie(vpu->domain);
iommu_domain_free(vpu->domain);
}
#else /* CONFIG_ROCKCHIP_IOMMU */
static inline int rockchip_vpu_iommu_init(struct rockchip_vpu_dev *vpu)
{
return 0;
}
static inline void rockchip_vpu_iommu_cleanup(struct rockchip_vpu_dev *vpu) { }
#endif /* CONFIG_ROCKCHIP_IOMMU */
int rockchip_vpu_hw_probe(struct rockchip_vpu_dev *vpu)
{
struct resource *res;
int irq_enc, irq_dec;
int ret;
pr_info("probe device %s\n", dev_name(vpu->dev));
INIT_DELAYED_WORK(&vpu->watchdog_work, rockchip_vpu_watchdog);
vpu->aclk = devm_clk_get(vpu->dev, "aclk");
if (IS_ERR(vpu->aclk)) {
dev_err(vpu->dev, "failed to get aclk\n");
return PTR_ERR(vpu->aclk);
}
vpu->hclk = devm_clk_get(vpu->dev, "hclk");
if (IS_ERR(vpu->hclk)) {
dev_err(vpu->dev, "failed to get hclk\n");
return PTR_ERR(vpu->hclk);
}
res = platform_get_resource(vpu->pdev, IORESOURCE_MEM, 0);
vpu->base = devm_ioremap_resource(vpu->dev, res);
if (IS_ERR(vpu->base))
return PTR_ERR(vpu->base);
clk_prepare_enable(vpu->aclk);
clk_prepare_enable(vpu->hclk);
vpu->enc_base = vpu->base + vpu->variant->enc_offset;
vpu->dec_base = vpu->base + vpu->variant->dec_offset;
ret = dma_set_coherent_mask(vpu->dev, DMA_BIT_MASK(32));
if (ret) {
dev_err(vpu->dev, "could not set DMA coherent mask\n");
goto err_power;
}
ret = rockchip_vpu_iommu_init(vpu);
if (ret)
goto err_power;
irq_enc = platform_get_irq_byname(vpu->pdev, "vepu");
if (irq_enc <= 0) {
dev_err(vpu->dev, "could not get vepu IRQ\n");
ret = -ENXIO;
goto err_iommu;
}
ret = devm_request_threaded_irq(vpu->dev, irq_enc, NULL, vepu_irq,
IRQF_ONESHOT, dev_name(vpu->dev), vpu);
if (ret) {
dev_err(vpu->dev, "could not request vepu IRQ\n");
goto err_iommu;
}
irq_dec = platform_get_irq_byname(vpu->pdev, "vdpu");
if (irq_dec <= 0) {
dev_err(vpu->dev, "could not get vdpu IRQ\n");
ret = -ENXIO;
goto err_iommu;
}
ret = devm_request_threaded_irq(vpu->dev, irq_dec, NULL, vdpu_irq,
IRQF_ONESHOT, dev_name(vpu->dev), vpu);
if (ret) {
dev_err(vpu->dev, "could not request vdpu IRQ\n");
goto err_iommu;
}
pm_runtime_set_autosuspend_delay(vpu->dev, 100);
pm_runtime_use_autosuspend(vpu->dev);
pm_runtime_enable(vpu->dev);
return 0;
err_iommu:
rockchip_vpu_iommu_cleanup(vpu);
err_power:
clk_disable_unprepare(vpu->hclk);
clk_disable_unprepare(vpu->aclk);
return ret;
}
void rockchip_vpu_hw_remove(struct rockchip_vpu_dev *vpu)
{
rockchip_vpu_iommu_cleanup(vpu);
pm_runtime_disable(vpu->dev);
clk_disable_unprepare(vpu->hclk);
clk_disable_unprepare(vpu->aclk);
}
static const struct rockchip_vpu_codec_ops mode_ops[] = {
{
.codec_mode = RK3288_VPU_CODEC_VP8E,
.init = rk3288_vpu_vp8e_init,
.exit = rk3288_vpu_vp8e_exit,
.irq = rk3288_vpu_enc_irq,
.run = rk3288_vpu_vp8e_run,
.done = rk3288_vpu_vp8e_done,
.reset = rk3288_vpu_enc_reset,
},
{
.codec_mode = RK3288_VPU_CODEC_VP8D,
.init = rk3288_vpu_vp8d_init,
.exit = rk3288_vpu_vp8d_exit,
.irq = rk3288_vpu_dec_irq,
.run = rk3288_vpu_vp8d_run,
.done = rockchip_vpu_run_done,
.reset = rk3288_vpu_dec_reset,
},
{
.codec_mode = RK3288_VPU_CODEC_H264E,
.init = rk3288_vpu_h264e_init,
.exit = rk3288_vpu_h264e_exit,
.irq = rk3288_vpu_enc_irq,
.run = rk3288_vpu_h264e_run,
.done = rk3288_vpu_h264e_done,
.reset = rk3288_vpu_enc_reset,
},
{
.codec_mode = RK3288_VPU_CODEC_H264D,
.init = rk3288_vpu_h264d_init,
.exit = rk3288_vpu_h264d_exit,
.irq = rk3288_vpu_dec_irq,
.run = rk3288_vpu_h264d_run,
.done = rockchip_vpu_run_done,
.reset = rk3288_vpu_dec_reset,
},
};
void rockchip_vpu_run(struct rockchip_vpu_ctx *ctx)
{
ctx->hw.codec_ops->run(ctx);
}
int rockchip_vpu_init(struct rockchip_vpu_ctx *ctx)
{
enum rockchip_vpu_codec_mode codec_mode;
int i;
if (rockchip_vpu_ctx_is_encoder(ctx))
codec_mode = ctx->vpu_dst_fmt->codec_mode; /* Encoder */
else
codec_mode = ctx->vpu_src_fmt->codec_mode; /* Decoder */
for (i = 0; i < ARRAY_SIZE(mode_ops); i++) {
if (mode_ops[i].codec_mode == codec_mode) {
ctx->hw.codec_ops = &mode_ops[i];
break;
}
}
if (!ctx->hw.codec_ops)
return -1;
return ctx->hw.codec_ops->init(ctx);
}
void rockchip_vpu_deinit(struct rockchip_vpu_ctx *ctx)
{
ctx->hw.codec_ops->exit(ctx);
}
/*
* The hardware takes care only of ext hdr and dct partition. The software
* must take care of frame header.
*
* Buffer layout as received from hardware:
* |<--gap-->|<--ext hdr-->|<-gap->|<---dct part---
* |<-------dct part offset------->|
*
* Required buffer layout:
* |<--hdr-->|<--ext hdr-->|<---dct part---
*/
void rockchip_vpu_vp8e_assemble_bitstream(struct rockchip_vpu_ctx *ctx,
struct rockchip_vpu_buf *dst_buf)
{
struct vb2_v4l2_buffer *vb2_dst = to_vb2_v4l2_buffer(&dst_buf->vb.vb2_buf);
size_t ext_hdr_size = dst_buf->vp8e.ext_hdr_size;
size_t dct_size = dst_buf->vp8e.dct_size;
size_t hdr_size = dst_buf->vp8e.hdr_size;
size_t dst_size;
size_t tag_size;
void *dst;
u32 *tag;
dst_size = vb2_plane_size(&dst_buf->vb.vb2_buf, 0);
dst = vb2_plane_vaddr(&dst_buf->vb.vb2_buf, 0);
tag = dst; /* To access frame tag words. */
if (WARN_ON(hdr_size + ext_hdr_size + dct_size > dst_size))
return;
if (WARN_ON(dst_buf->vp8e.dct_offset + dct_size > dst_size))
return;
vpu_debug(1, "%s: hdr_size = %d, ext_hdr_size = %d, dct_size = %d\n",
__func__, hdr_size, ext_hdr_size, dct_size);
memmove(dst + hdr_size + ext_hdr_size,
dst + dst_buf->vp8e.dct_offset, dct_size);
memcpy(dst, dst_buf->vp8e.header, hdr_size);
/* Patch frame tag at first 32-bit word of the frame. */
if (vb2_dst->flags & V4L2_BUF_FLAG_KEYFRAME) {
tag_size = VP8_KEY_FRAME_HDR_SIZE;
tag[0] &= ~VP8_FRAME_TAG_KEY_FRAME_BIT;
} else {
tag_size = VP8_INTER_FRAME_HDR_SIZE;
tag[0] |= VP8_FRAME_TAG_KEY_FRAME_BIT;
}
tag[0] &= ~VP8_FRAME_TAG_LENGTH_MASK;
tag[0] |= (hdr_size + ext_hdr_size - tag_size)
<< VP8_FRAME_TAG_LENGTH_SHIFT;
vb2_set_plane_payload(&dst_buf->vb.vb2_buf, 0,
hdr_size + ext_hdr_size + dct_size);
}
void rockchip_vpu_h264e_assemble_bitstream(struct rockchip_vpu_ctx *ctx,
struct rockchip_vpu_buf *dst_buf)
{
size_t sps_size = dst_buf->h264e.sps_size;
size_t pps_size = dst_buf->h264e.pps_size;
size_t slices_size = dst_buf->h264e.slices_size;
size_t dst_size;
void *dst;
struct stream_s *sps = &ctx->run.h264e.sps;
struct stream_s *pps = &ctx->run.h264e.pps;
dst_size = vb2_plane_size(&dst_buf->vb.vb2_buf, 0);
dst = vb2_plane_vaddr(&dst_buf->vb.vb2_buf, 0);
if (WARN_ON(sps_size + pps_size + slices_size > dst_size))
return;
vpu_debug(1, "%s: sps_size = %u, pps_size = %u, slices_size = %u\n",
__func__, sps_size, pps_size, slices_size);
memcpy(dst, sps->buffer, sps_size);
memcpy(dst + sps_size, pps->buffer, pps_size);
vb2_set_plane_payload(&dst_buf->vb.vb2_buf, 0,
sps_size + pps_size + slices_size);
}