2022-03-07 18:26:34

by Melissa Wen

[permalink] [raw]
Subject: [PATCH 0/3] Move FPU related code from DCN3.1x drivers to DML folder

This series moves FPU code from DCN 3.1x drivers to dml/dcn31 folder to
isolate FPU operations. For this, it creates dcn31_fpu files to centralize
FPU operations and structs from dcn31x drivers, that include:
- _vcs_dpi_ip_params_st and _vcs_dpi_soc_bounding_box_st structs
- dcn31x_update_bw_bounding_box() functions
- dcn31_calculate_wm_and_dlg_fp()

Also, it adds dc_assert_fp_enabled() in public dml-fpu functions, as required,
and I've checked if their calls are properly wrapped by DC_FP_START/END (and
removed when inside dml/fpu files too).

Melissa Wen (3):
drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder
drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder
drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder

drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 26 -
.../drm/amd/display/dc/dcn31/dcn31_resource.c | 355 +------
.../drm/amd/display/dc/dcn31/dcn31_resource.h | 4 +-
.../gpu/drm/amd/display/dc/dcn315/Makefile | 26 -
.../amd/display/dc/dcn315/dcn315_resource.c | 232 +----
.../amd/display/dc/dcn315/dcn315_resource.h | 3 +
.../gpu/drm/amd/display/dc/dcn316/Makefile | 26 -
.../amd/display/dc/dcn316/dcn316_resource.c | 231 +----
.../amd/display/dc/dcn316/dcn316_resource.h | 3 +
drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 +
.../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 863 ++++++++++++++++++
.../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 44 +
12 files changed, 921 insertions(+), 894 deletions(-)
create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h

--
2.34.1


2022-03-07 21:17:30

by Melissa Wen

[permalink] [raw]
Subject: [PATCH 3/3] drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder

Moves FPU-related structs and dcn316_update_bw_bounding_box from dcn316
driver to dml/dcn31 that centralize FPU operations for DCN 3.1x

Signed-off-by: Melissa Wen <[email protected]>
---
.../gpu/drm/amd/display/dc/dcn316/Makefile | 26 --
.../amd/display/dc/dcn316/dcn316_resource.c | 231 +-----------------
.../amd/display/dc/dcn316/dcn316_resource.h | 3 +
.../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 229 +++++++++++++++++
.../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 2 +
5 files changed, 235 insertions(+), 256 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
index cd87b687c5e2..819d44a9439b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile
@@ -25,32 +25,6 @@

DCN316 = dcn316_resource.o

-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -msse2
-endif
-endif
-
AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316))

AMD_DISPLAY_FILES += $(AMD_DAL_DCN316)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
index 90c17c44dd7c..1e451d069bc3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
@@ -66,6 +66,7 @@
#include "virtual/virtual_stream_encoder.h"
#include "dce110/dce110_resource.h"
#include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
#include "dcn31/dcn31_dccg.h"
#include "dcn10/dcn10_resource.h"
#include "dcn31/dcn31_panel_cntl.h"
@@ -123,157 +124,10 @@

#include "link_enc_cfg.h"

-#define DC_LOGGER_INIT(logger)
-
-#define DCN3_16_DEFAULT_DET_SIZE 192
#define DCN3_16_MAX_DET_SIZE 384
#define DCN3_16_MIN_COMPBUF_SIZE_KB 128
#define DCN3_16_CRB_SEGMENT_SIZE_KB 64

-struct _vcs_dpi_ip_params_st dcn3_16_ip = {
- .gpuvm_enable = 1,
- .gpuvm_max_page_table_levels = 1,
- .hostvm_enable = 1,
- .hostvm_max_page_table_levels = 2,
- .rob_buffer_size_kbytes = 64,
- .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
- .config_return_buffer_size_in_kbytes = 1024,
- .compressed_buffer_segment_size_in_kbytes = 64,
- .meta_fifo_size_in_kentries = 32,
- .zero_size_buffer_entries = 512,
- .compbuf_reserved_space_64b = 256,
- .compbuf_reserved_space_zs = 64,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .meta_chunk_size_kbytes = 2,
- .min_meta_chunk_size_bytes = 256,
- .writeback_chunk_size_kbytes = 8,
- .ptoi_supported = false,
- .num_dsc = 3,
- .maximum_dsc_bits_per_component = 10,
- .dsc422_native_support = false,
- .is_line_buffer_bpp_fixed = true,
- .line_buffer_fixed_bpp = 48,
- .line_buffer_size_bits = 789504,
- .max_line_buffer_lines = 12,
- .writeback_interface_buffer_size_kbytes = 90,
- .max_num_dpp = 4,
- .max_num_otg = 4,
- .max_num_hdmi_frl_outputs = 1,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .dispclk_ramp_margin_percent = 1,
- .max_inter_dcn_tile_repeaters = 8,
- .cursor_buffer_size = 16,
- .cursor_chunk_size = 2,
- .writeback_line_buffer_buffer_size = 0,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .dppclk_delay_subtotal = 46,
- .dppclk_delay_scl = 50,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dynamic_metadata_vm_enabled = false,
- .odm_combine_4to1_supported = false,
- .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
- /*TODO: correct dispclk/dppclk voltage level determination*/
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 556.0,
- .dppclk_mhz = 556.0,
- .phyclk_mhz = 600.0,
- .phyclk_d18_mhz = 445.0,
- .dscclk_mhz = 186.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 1,
- .dispclk_mhz = 625.0,
- .dppclk_mhz = 625.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 2,
- .dispclk_mhz = 625.0,
- .dppclk_mhz = 625.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 209.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 3,
- .dispclk_mhz = 1112.0,
- .dppclk_mhz = 1112.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 371.0,
- .dtbclk_mhz = 625.0,
- },
- {
- .state = 4,
- .dispclk_mhz = 1250.0,
- .dppclk_mhz = 1250.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 625.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 9.0,
- .sr_enter_plus_exit_time_us = 11.0,
- .sr_exit_z8_time_us = 442.0,
- .sr_enter_plus_exit_z8_time_us = 560.0,
- .writeback_latency_us = 12.0,
- .dram_channel_width_bytes = 4,
- .round_trip_ping_latency_dcfclk_cycles = 106,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_sdp_bw_after_urgent = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 60.0,
- .fabric_datapath_to_dcn_data_return_bytes = 32,
- .return_bus_width_bytes = 64,
- .downspread_percent = 0.38,
- .dcn_downspread_percent = 0.5,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = false,
- .urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
-};
-
enum dcn31_clk_src_array_id {
DCN31_CLK_SRC_PLL0,
DCN31_CLK_SRC_PLL1,
@@ -1859,89 +1713,6 @@ static struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn20_get_dcc_compression_cap
};

-static void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-{
- struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
- unsigned int i, closest_clk_lvl;
- int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
- int j;
-
- // Default clock levels are used for diags, which may lead to overclocking.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
-
- dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
- dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count;
- dcn3_16_soc.num_chans = bw_params->num_channels;
-
- ASSERT(clk_table->num_entries);
-
- /* Prepass to find max clocks independent of voltage level. */
- for (i = 0; i < clk_table->num_entries; ++i) {
- if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
- if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
- }
-
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
- // Ported from DCN315
- if (clk_table->num_entries == 1) {
- /*smu gives one DPM level, let's take the highest one*/
- closest_clk_lvl = dcn3_16_soc.num_states - 1;
- }
-
- clock_limits[i].state = i;
-
- /* Clocks dependent on voltage level. */
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- if (clk_table->num_entries == 1 &&
- clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
- /*SMU fix not released yet*/
- clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
- }
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
-
- /* Clocks independent of voltage level. */
- clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
- dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-
- clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
- dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
- }
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_16_soc.clock_limits[i] = clock_limits[i];
- if (clk_table->num_entries) {
- dcn3_16_soc.num_states = clk_table->num_entries;
- }
- }
-
- if (max_dispclk_mhz) {
- dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- }
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31);
- else
- dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA);
-}
-
static struct resource_funcs dcn316_res_pool_funcs = {
.destroy = dcn316_destroy_resource_pool,
.link_enc_create = dcn31_link_encoder_create,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
index 9d0d60cb9482..0dc5a6c13ae7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h
@@ -31,6 +31,9 @@
#define TO_DCN316_RES_POOL(pool)\
container_of(pool, struct dcn316_resource_pool, base)

+extern struct _vcs_dpi_ip_params_st dcn3_16_ip;
+extern struct _vcs_dpi_ip_params_st dcn3_16_soc;
+
struct dcn316_resource_pool {
struct resource_pool base;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index f70b47ef850c..a0a2e125c9c8 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -339,6 +339,150 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};

+struct _vcs_dpi_ip_params_st dcn3_16_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE,
+ .config_return_buffer_size_in_kbytes = 1024,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 48,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 8,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 556.0,
+ .dppclk_mhz = 556.0,
+ .phyclk_mhz = 600.0,
+ .phyclk_d18_mhz = 445.0,
+ .dscclk_mhz = 186.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 625.0,
+ .dppclk_mhz = 625.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 625.0,
+ .dppclk_mhz = 625.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 209.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1112.0,
+ .dppclk_mhz = 1112.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 371.0,
+ .dtbclk_mhz = 625.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1250.0,
+ .dppclk_mhz = 1250.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 625.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 442.0,
+ .sr_enter_plus_exit_z8_time_us = 560.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.5,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};
+
void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
@@ -632,3 +776,88 @@ void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
else
dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA);
}
+
+void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
+
+ dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_16_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ // Ported from DCN315
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_16_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn3_16_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn3_16_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ if (max_dispclk_mhz) {
+ dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31);
+ else
+ dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
index b15b587cf8c4..24ac19c83687 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -29,6 +29,7 @@
#define DCN3_1_DEFAULT_DET_SIZE 384
#define DCN3_15_DEFAULT_DET_SIZE 192
#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
+#define DCN3_16_DEFAULT_DET_SIZE 192

void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
@@ -38,5 +39,6 @@ void dcn31_calculate_wm_and_dlg_fp(

void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);

#endif /* __DCN31_FPU_H__*/
--
2.34.1

2022-03-08 23:48:37

by Melissa Wen

[permalink] [raw]
Subject: [PATCH 2/3] drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder

Moves related structs and dcn315_update_bw_bounding_box from dcn315
driver code to dml/dcn31_fpu that centralizes FPU code for DCN 3.1x.

Signed-off-by: Melissa Wen <[email protected]>
---
.../gpu/drm/amd/display/dc/dcn315/Makefile | 26 --
.../amd/display/dc/dcn315/dcn315_resource.c | 232 +-----------------
.../amd/display/dc/dcn315/dcn315_resource.h | 3 +
.../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 228 +++++++++++++++++
.../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 3 +
5 files changed, 235 insertions(+), 257 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
index c831ad46e81c..59381d24800b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile
@@ -25,32 +25,6 @@

DCN315 = dcn315_resource.o

-ifdef CONFIG_X86
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -msse
-endif
-
-ifdef CONFIG_PPC64
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec
-endif
-
-ifdef CONFIG_CC_IS_GCC
-ifeq ($(call cc-ifversion, -lt, 0701, y), y)
-IS_OLD_GCC = 1
-endif
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mhard-float
-endif
-
-ifdef CONFIG_X86
-ifdef IS_OLD_GCC
-# Stack alignment mismatch, proceed with caution.
-# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
-# (8B stack alignment).
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mpreferred-stack-boundary=4
-else
-CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -msse2
-endif
-endif
-
AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315))

AMD_DISPLAY_FILES += $(AMD_DAL_DCN315)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
index 756fec81b9ad..51a712958dbd 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
@@ -66,6 +66,7 @@
#include "virtual/virtual_stream_encoder.h"
#include "dce110/dce110_resource.h"
#include "dml/display_mode_vba.h"
+#include "dml/dcn31/dcn31_fpu.h"
#include "dcn31/dcn31_dccg.h"
#include "dcn10/dcn10_resource.h"
#include "dcn31/dcn31_panel_cntl.h"
@@ -133,158 +134,9 @@

#include "link_enc_cfg.h"

-#define DC_LOGGER_INIT(logger)
-
-#define DCN3_15_DEFAULT_DET_SIZE 192
#define DCN3_15_MAX_DET_SIZE 384
-#define DCN3_15_MIN_COMPBUF_SIZE_KB 128
#define DCN3_15_CRB_SEGMENT_SIZE_KB 64

-struct _vcs_dpi_ip_params_st dcn3_15_ip = {
- .gpuvm_enable = 1,
- .gpuvm_max_page_table_levels = 1,
- .hostvm_enable = 1,
- .hostvm_max_page_table_levels = 2,
- .rob_buffer_size_kbytes = 64,
- .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
- .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB,
- .config_return_buffer_size_in_kbytes = 1024,
- .compressed_buffer_segment_size_in_kbytes = 64,
- .meta_fifo_size_in_kentries = 32,
- .zero_size_buffer_entries = 512,
- .compbuf_reserved_space_64b = 256,
- .compbuf_reserved_space_zs = 64,
- .dpp_output_buffer_pixels = 2560,
- .opp_output_buffer_lines = 1,
- .pixel_chunk_size_kbytes = 8,
- .meta_chunk_size_kbytes = 2,
- .min_meta_chunk_size_bytes = 256,
- .writeback_chunk_size_kbytes = 8,
- .ptoi_supported = false,
- .num_dsc = 3,
- .maximum_dsc_bits_per_component = 10,
- .dsc422_native_support = false,
- .is_line_buffer_bpp_fixed = true,
- .line_buffer_fixed_bpp = 49,
- .line_buffer_size_bits = 789504,
- .max_line_buffer_lines = 12,
- .writeback_interface_buffer_size_kbytes = 90,
- .max_num_dpp = 4,
- .max_num_otg = 4,
- .max_num_hdmi_frl_outputs = 1,
- .max_num_wb = 1,
- .max_dchub_pscl_bw_pix_per_clk = 4,
- .max_pscl_lb_bw_pix_per_clk = 2,
- .max_lb_vscl_bw_pix_per_clk = 4,
- .max_vscl_hscl_bw_pix_per_clk = 4,
- .max_hscl_ratio = 6,
- .max_vscl_ratio = 6,
- .max_hscl_taps = 8,
- .max_vscl_taps = 8,
- .dpte_buffer_size_in_pte_reqs_luma = 64,
- .dpte_buffer_size_in_pte_reqs_chroma = 34,
- .dispclk_ramp_margin_percent = 1,
- .max_inter_dcn_tile_repeaters = 9,
- .cursor_buffer_size = 16,
- .cursor_chunk_size = 2,
- .writeback_line_buffer_buffer_size = 0,
- .writeback_min_hscl_ratio = 1,
- .writeback_min_vscl_ratio = 1,
- .writeback_max_hscl_ratio = 1,
- .writeback_max_vscl_ratio = 1,
- .writeback_max_hscl_taps = 1,
- .writeback_max_vscl_taps = 1,
- .dppclk_delay_subtotal = 46,
- .dppclk_delay_scl = 50,
- .dppclk_delay_scl_lb_only = 16,
- .dppclk_delay_cnvc_formatter = 27,
- .dppclk_delay_cnvc_cursor = 6,
- .dispclk_delay_subtotal = 119,
- .dynamic_metadata_vm_enabled = false,
- .odm_combine_4to1_supported = false,
- .dcc_supported = true,
-};
-
-struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
- /*TODO: correct dispclk/dppclk voltage level determination*/
- .clock_limits = {
- {
- .state = 0,
- .dispclk_mhz = 1372.0,
- .dppclk_mhz = 1372.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 600.0,
- },
- {
- .state = 1,
- .dispclk_mhz = 1372.0,
- .dppclk_mhz = 1372.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 600.0,
- },
- {
- .state = 2,
- .dispclk_mhz = 1372.0,
- .dppclk_mhz = 1372.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 600.0,
- },
- {
- .state = 3,
- .dispclk_mhz = 1372.0,
- .dppclk_mhz = 1372.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 600.0,
- },
- {
- .state = 4,
- .dispclk_mhz = 1372.0,
- .dppclk_mhz = 1372.0,
- .phyclk_mhz = 810.0,
- .phyclk_d18_mhz = 667.0,
- .dscclk_mhz = 417.0,
- .dtbclk_mhz = 600.0,
- },
- },
- .num_states = 5,
- .sr_exit_time_us = 9.0,
- .sr_enter_plus_exit_time_us = 11.0,
- .sr_exit_z8_time_us = 50.0,
- .sr_enter_plus_exit_z8_time_us = 50.0,
- .writeback_latency_us = 12.0,
- .dram_channel_width_bytes = 4,
- .round_trip_ping_latency_dcfclk_cycles = 106,
- .urgent_latency_pixel_data_only_us = 4.0,
- .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
- .urgent_latency_vm_data_only_us = 4.0,
- .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
- .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
- .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
- .pct_ideal_sdp_bw_after_urgent = 80.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
- .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
- .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
- .max_avg_sdp_bw_use_normal_percent = 60.0,
- .max_avg_dram_bw_use_normal_percent = 60.0,
- .fabric_datapath_to_dcn_data_return_bytes = 32,
- .return_bus_width_bytes = 64,
- .downspread_percent = 0.38,
- .dcn_downspread_percent = 0.38,
- .gpuvm_min_page_size_bytes = 4096,
- .hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = false,
- .urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
-};
-
enum dcn31_clk_src_array_id {
DCN31_CLK_SRC_PLL0,
DCN31_CLK_SRC_PLL1,
@@ -1859,88 +1711,6 @@ static struct dc_cap_funcs cap_funcs = {
.get_dcc_compression_cap = dcn20_get_dcc_compression_cap
};

-static void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
-{
- struct clk_limit_table *clk_table = &bw_params->clk_table;
- struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
- unsigned int i, closest_clk_lvl;
- int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
- int j;
-
- // Default clock levels are used for diags, which may lead to overclocking.
- if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
-
- dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
- dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count;
- dcn3_15_soc.num_chans = bw_params->num_channels;
-
- ASSERT(clk_table->num_entries);
-
- /* Prepass to find max clocks independent of voltage level. */
- for (i = 0; i < clk_table->num_entries; ++i) {
- if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
- max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
- if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
- max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
- }
-
- for (i = 0; i < clk_table->num_entries; i++) {
- /* loop backwards*/
- for (closest_clk_lvl = 0, j = dcn3_15_soc.num_states - 1; j >= 0; j--) {
- if ((unsigned int) dcn3_15_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
- closest_clk_lvl = j;
- break;
- }
- }
- if (clk_table->num_entries == 1) {
- /*smu gives one DPM level, let's take the highest one*/
- closest_clk_lvl = dcn3_15_soc.num_states - 1;
- }
-
- clock_limits[i].state = i;
-
- /* Clocks dependent on voltage level. */
- clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
- if (clk_table->num_entries == 1 &&
- clock_limits[i].dcfclk_mhz < dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
- /*SMU fix not released yet*/
- clock_limits[i].dcfclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
- }
- clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
- clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
- clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
-
- /* Clocks independent of voltage level. */
- clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
- dcn3_15_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
-
- clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
- dcn3_15_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
-
- clock_limits[i].dram_bw_per_chan_gbps = dcn3_15_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
- clock_limits[i].dscclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
- clock_limits[i].dtbclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
- clock_limits[i].phyclk_d18_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
- clock_limits[i].phyclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
- }
- for (i = 0; i < clk_table->num_entries; i++)
- dcn3_15_soc.clock_limits[i] = clock_limits[i];
- if (clk_table->num_entries) {
- dcn3_15_soc.num_states = clk_table->num_entries;
- }
- }
-
- if (max_dispclk_mhz) {
- dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
- }
-
- if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
- dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31);
- else
- dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA);
-}
-
static struct resource_funcs dcn315_res_pool_funcs = {
.destroy = dcn315_destroy_resource_pool,
.link_enc_create = dcn31_link_encoder_create,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
index f3a36820a31f..39929fa67a51 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h
@@ -31,6 +31,9 @@
#define TO_DCN315_RES_POOL(pool)\
container_of(pool, struct dcn315_resource_pool, base)

+extern struct _vcs_dpi_ip_params_st dcn3_15_ip;
+extern struct _vcs_dpi_ip_params_st dcn3_15_soc;
+
struct dcn315_resource_pool {
struct resource_pool base;
};
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index 7ff8fe9e8712..f70b47ef850c 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -194,6 +194,150 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = {
.urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};

+struct _vcs_dpi_ip_params_st dcn3_15_ip = {
+ .gpuvm_enable = 1,
+ .gpuvm_max_page_table_levels = 1,
+ .hostvm_enable = 1,
+ .hostvm_max_page_table_levels = 2,
+ .rob_buffer_size_kbytes = 64,
+ .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE,
+ .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB,
+ .config_return_buffer_size_in_kbytes = 1024,
+ .compressed_buffer_segment_size_in_kbytes = 64,
+ .meta_fifo_size_in_kentries = 32,
+ .zero_size_buffer_entries = 512,
+ .compbuf_reserved_space_64b = 256,
+ .compbuf_reserved_space_zs = 64,
+ .dpp_output_buffer_pixels = 2560,
+ .opp_output_buffer_lines = 1,
+ .pixel_chunk_size_kbytes = 8,
+ .meta_chunk_size_kbytes = 2,
+ .min_meta_chunk_size_bytes = 256,
+ .writeback_chunk_size_kbytes = 8,
+ .ptoi_supported = false,
+ .num_dsc = 3,
+ .maximum_dsc_bits_per_component = 10,
+ .dsc422_native_support = false,
+ .is_line_buffer_bpp_fixed = true,
+ .line_buffer_fixed_bpp = 49,
+ .line_buffer_size_bits = 789504,
+ .max_line_buffer_lines = 12,
+ .writeback_interface_buffer_size_kbytes = 90,
+ .max_num_dpp = 4,
+ .max_num_otg = 4,
+ .max_num_hdmi_frl_outputs = 1,
+ .max_num_wb = 1,
+ .max_dchub_pscl_bw_pix_per_clk = 4,
+ .max_pscl_lb_bw_pix_per_clk = 2,
+ .max_lb_vscl_bw_pix_per_clk = 4,
+ .max_vscl_hscl_bw_pix_per_clk = 4,
+ .max_hscl_ratio = 6,
+ .max_vscl_ratio = 6,
+ .max_hscl_taps = 8,
+ .max_vscl_taps = 8,
+ .dpte_buffer_size_in_pte_reqs_luma = 64,
+ .dpte_buffer_size_in_pte_reqs_chroma = 34,
+ .dispclk_ramp_margin_percent = 1,
+ .max_inter_dcn_tile_repeaters = 9,
+ .cursor_buffer_size = 16,
+ .cursor_chunk_size = 2,
+ .writeback_line_buffer_buffer_size = 0,
+ .writeback_min_hscl_ratio = 1,
+ .writeback_min_vscl_ratio = 1,
+ .writeback_max_hscl_ratio = 1,
+ .writeback_max_vscl_ratio = 1,
+ .writeback_max_hscl_taps = 1,
+ .writeback_max_vscl_taps = 1,
+ .dppclk_delay_subtotal = 46,
+ .dppclk_delay_scl = 50,
+ .dppclk_delay_scl_lb_only = 16,
+ .dppclk_delay_cnvc_formatter = 27,
+ .dppclk_delay_cnvc_cursor = 6,
+ .dispclk_delay_subtotal = 119,
+ .dynamic_metadata_vm_enabled = false,
+ .odm_combine_4to1_supported = false,
+ .dcc_supported = true,
+};
+
+struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = {
+ /*TODO: correct dispclk/dppclk voltage level determination*/
+ .clock_limits = {
+ {
+ .state = 0,
+ .dispclk_mhz = 1372.0,
+ .dppclk_mhz = 1372.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 1,
+ .dispclk_mhz = 1372.0,
+ .dppclk_mhz = 1372.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 2,
+ .dispclk_mhz = 1372.0,
+ .dppclk_mhz = 1372.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 3,
+ .dispclk_mhz = 1372.0,
+ .dppclk_mhz = 1372.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ {
+ .state = 4,
+ .dispclk_mhz = 1372.0,
+ .dppclk_mhz = 1372.0,
+ .phyclk_mhz = 810.0,
+ .phyclk_d18_mhz = 667.0,
+ .dscclk_mhz = 417.0,
+ .dtbclk_mhz = 600.0,
+ },
+ },
+ .num_states = 5,
+ .sr_exit_time_us = 9.0,
+ .sr_enter_plus_exit_time_us = 11.0,
+ .sr_exit_z8_time_us = 50.0,
+ .sr_enter_plus_exit_z8_time_us = 50.0,
+ .writeback_latency_us = 12.0,
+ .dram_channel_width_bytes = 4,
+ .round_trip_ping_latency_dcfclk_cycles = 106,
+ .urgent_latency_pixel_data_only_us = 4.0,
+ .urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
+ .urgent_latency_vm_data_only_us = 4.0,
+ .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096,
+ .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096,
+ .pct_ideal_sdp_bw_after_urgent = 80.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0,
+ .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0,
+ .max_avg_sdp_bw_use_normal_percent = 60.0,
+ .max_avg_dram_bw_use_normal_percent = 60.0,
+ .fabric_datapath_to_dcn_data_return_bytes = 32,
+ .return_bus_width_bytes = 64,
+ .downspread_percent = 0.38,
+ .dcn_downspread_percent = 0.38,
+ .gpuvm_min_page_size_bytes = 4096,
+ .hostvm_min_page_size_bytes = 4096,
+ .do_urgent_latency_adjustment = false,
+ .urgent_latency_adjustment_fabric_clock_component_us = 0,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
+};

void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
@@ -404,3 +548,87 @@ void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
else
dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA);
}
+
+void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
+{
+ struct clk_limit_table *clk_table = &bw_params->clk_table;
+ struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES];
+ unsigned int i, closest_clk_lvl;
+ int max_dispclk_mhz = 0, max_dppclk_mhz = 0;
+ int j;
+
+ dc_assert_fp_enabled();
+
+ // Default clock levels are used for diags, which may lead to overclocking.
+ if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
+
+ dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator;
+ dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count;
+ dcn3_15_soc.num_chans = bw_params->num_channels;
+
+ ASSERT(clk_table->num_entries);
+
+ /* Prepass to find max clocks independent of voltage level. */
+ for (i = 0; i < clk_table->num_entries; ++i) {
+ if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz)
+ max_dispclk_mhz = clk_table->entries[i].dispclk_mhz;
+ if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz)
+ max_dppclk_mhz = clk_table->entries[i].dppclk_mhz;
+ }
+
+ for (i = 0; i < clk_table->num_entries; i++) {
+ /* loop backwards*/
+ for (closest_clk_lvl = 0, j = dcn3_15_soc.num_states - 1; j >= 0; j--) {
+ if ((unsigned int) dcn3_15_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) {
+ closest_clk_lvl = j;
+ break;
+ }
+ }
+ if (clk_table->num_entries == 1) {
+ /*smu gives one DPM level, let's take the highest one*/
+ closest_clk_lvl = dcn3_15_soc.num_states - 1;
+ }
+
+ clock_limits[i].state = i;
+
+ /* Clocks dependent on voltage level. */
+ clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz;
+ if (clk_table->num_entries == 1 &&
+ clock_limits[i].dcfclk_mhz < dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) {
+ /*SMU fix not released yet*/
+ clock_limits[i].dcfclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz;
+ }
+ clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz;
+ clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz;
+ clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio;
+
+ /* Clocks independent of voltage level. */
+ clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz :
+ dcn3_15_soc.clock_limits[closest_clk_lvl].dispclk_mhz;
+
+ clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz :
+ dcn3_15_soc.clock_limits[closest_clk_lvl].dppclk_mhz;
+
+ clock_limits[i].dram_bw_per_chan_gbps = dcn3_15_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps;
+ clock_limits[i].dscclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dscclk_mhz;
+ clock_limits[i].dtbclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dtbclk_mhz;
+ clock_limits[i].phyclk_d18_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz;
+ clock_limits[i].phyclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_mhz;
+ }
+ for (i = 0; i < clk_table->num_entries; i++)
+ dcn3_15_soc.clock_limits[i] = clock_limits[i];
+ if (clk_table->num_entries) {
+ dcn3_15_soc.num_states = clk_table->num_entries;
+ }
+ }
+
+ if (max_dispclk_mhz) {
+ dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
+ }
+
+ if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
+ dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31);
+ else
+ dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
index baadb5150e7d..b15b587cf8c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -27,6 +27,8 @@
#define __DCN31_FPU_H__

#define DCN3_1_DEFAULT_DET_SIZE 384
+#define DCN3_15_DEFAULT_DET_SIZE 192
+#define DCN3_15_MIN_COMPBUF_SIZE_KB 128

void dcn31_calculate_wm_and_dlg_fp(
struct dc *dc, struct dc_state *context,
@@ -35,5 +37,6 @@ void dcn31_calculate_wm_and_dlg_fp(
int vlevel);

void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);

#endif /* __DCN31_FPU_H__*/
--
2.34.1

2022-03-17 06:00:28

by Rodrigo Siqueira Jordao

[permalink] [raw]
Subject: Re: [PATCH 0/3] Move FPU related code from DCN3.1x drivers to DML folder



On 2022-03-07 10:47, Melissa Wen wrote:
> This series moves FPU code from DCN 3.1x drivers to dml/dcn31 folder to
> isolate FPU operations. For this, it creates dcn31_fpu files to centralize
> FPU operations and structs from dcn31x drivers, that include:
> - _vcs_dpi_ip_params_st and _vcs_dpi_soc_bounding_box_st structs
> - dcn31x_update_bw_bounding_box() functions
> - dcn31_calculate_wm_and_dlg_fp()
>
> Also, it adds dc_assert_fp_enabled() in public dml-fpu functions, as required,
> and I've checked if their calls are properly wrapped by DC_FP_START/END (and
> removed when inside dml/fpu files too).
>
> Melissa Wen (3):
> drm/amd/dicplay: move FPU related code from dcn31 to dml/dcn31 folder
> drm/amd/display: move FPU related code from dcn315 to dml/dcn31 folder
> drm/amd/display: move FPU related code from dcn316 to dml/dcn31 folder
>
> drivers/gpu/drm/amd/display/dc/dcn31/Makefile | 26 -
> .../drm/amd/display/dc/dcn31/dcn31_resource.c | 355 +------
> .../drm/amd/display/dc/dcn31/dcn31_resource.h | 4 +-
> .../gpu/drm/amd/display/dc/dcn315/Makefile | 26 -
> .../amd/display/dc/dcn315/dcn315_resource.c | 232 +----
> .../amd/display/dc/dcn315/dcn315_resource.h | 3 +
> .../gpu/drm/amd/display/dc/dcn316/Makefile | 26 -
> .../amd/display/dc/dcn316/dcn316_resource.c | 231 +----
> .../amd/display/dc/dcn316/dcn316_resource.h | 3 +
> drivers/gpu/drm/amd/display/dc/dml/Makefile | 2 +
> .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.c | 863 ++++++++++++++++++
> .../drm/amd/display/dc/dml/dcn31/dcn31_fpu.h | 44 +
> 12 files changed, 921 insertions(+), 894 deletions(-)
> create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
> create mode 100644 drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
>

Hi Melissa,

Thanks a lot for your patchset. We have already tested it with IGT, and
we are running some manual tests to ensure that everything is fine. We
will include your patch in this week's DC upstream, and if everything is
alright, we will merge it next Monday.