/* * Copyright (c) 2012-2013 Etnaviv Project * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sub license, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /* Gallium pipe driver */ #include "etna_pipe.h" #include "etna_translate.h" #include #include #include #include #include #include #include "etna_blend.h" #include "etna_clear_blit.h" #include "etna_compiler.h" #include "etna_debug.h" #include "etna_fence.h" #include "etna_rasterizer.h" #include "etna_resource.h" #include "etna_shader.h" #include "etna_surface.h" #include "etna_texture.h" #include "etna_transfer.h" #include "etna_zsa.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_math.h" #include "util/u_inlines.h" #include "util/u_memory.h" #include "util/u_prim.h" #include #include #include #include #include #include #include #include #include #include #include #include /*********************************************************************/ /* Context handling */ #define ETNA_3D_CONTEXT_SIZE (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */ /* Create bit field that specifies which samplers are active and thus need to be programmed * 32 bits is enough for 32 samplers. As far as I know this is the upper bound supported on any Vivante hw * up to GC4000. */ static uint32_t active_samplers_bits(struct pipe_context *pipe) { struct etna_pipe_context *restrict e = etna_pipe_context(pipe); unsigned num_fragment_samplers = MIN2(e->num_fragment_samplers, e->num_fragment_sampler_views); unsigned num_vertex_samplers = MIN2(e->num_vertex_samplers, e->num_vertex_sampler_views); uint32_t active_samplers = etna_bits_ones(num_fragment_samplers) | etna_bits_ones(num_vertex_samplers) << e->specs.vertex_sampler_offset; return active_samplers; } /* Reset / re-upload context. * * This pushes the current register state in pipe->gpu3d to the GPU. * The function is used to initialize the GPU in a predictable state * at the beginning of rendering, as well as to create a context * buffer for the kernel driver. */ static void reset_context(struct pipe_context *restrict pipe) { struct etna_pipe_context *restrict e = etna_pipe_context(pipe); struct etna_ctx *restrict ctx = e->ctx; #define EMIT_STATE(state_name, dest_field) \ ETNA_COALESCE_STATE_UPDATE(state_name, e->gpu3d.dest_field, 0) #define EMIT_STATE_FIXP(state_name, dest_field) \ ETNA_COALESCE_STATE_UPDATE(state_name, e->gpu3d.dest_field, 1) uint32_t last_reg, last_fixp, span_start; ETNA_COALESCE_STATE_OPEN(ETNA_3D_CONTEXT_SIZE); /* multi sample config is set first, and outside of the normal sorting * order, as changing the multisample state clobbers PS.INPUT_COUNT (and * possibly PS.TEMP_REGISTER_CONTROL). */ /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, GL_MULTI_SAMPLE_CONFIG); /* below code generated by gen_weave_state.py, keep this in sync with sync_context! */ /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here directly * or indirectly */ for(int x=0; xgpu3d.num_vertex_elements; ++x) { /*00600*/ EMIT_STATE(FE_VERTEX_ELEMENT_CONFIG(x), FE_VERTEX_ELEMENT_CONFIG[x]); } /*00644*/ EMIT_STATE(FE_INDEX_STREAM_BASE_ADDR, FE_INDEX_STREAM_BASE_ADDR); /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, FE_INDEX_STREAM_CONTROL); /*0064C*/ EMIT_STATE(FE_VERTEX_STREAM_BASE_ADDR, FE_VERTEX_STREAM_BASE_ADDR); /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, FE_VERTEX_STREAM_CONTROL); for(int x=0; x<8; ++x) { /*00680*/ EMIT_STATE(FE_VERTEX_STREAMS_BASE_ADDR(x), FE_VERTEX_STREAMS_BASE_ADDR[x]); } for(int x=0; x<8; ++x) { /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), FE_VERTEX_STREAMS_CONTROL[x]); } /*00800*/ EMIT_STATE(VS_END_PC, VS_END_PC); /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, VS_OUTPUT_COUNT); /*00808*/ EMIT_STATE(VS_INPUT_COUNT, VS_INPUT_COUNT); /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, VS_TEMP_REGISTER_CONTROL); for(int x=0; x<4; ++x) { /*00810*/ EMIT_STATE(VS_OUTPUT(x), VS_OUTPUT[x]); } for(int x=0; x<4; ++x) { /*00820*/ EMIT_STATE(VS_INPUT(x), VS_INPUT[x]); } /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, VS_LOAD_BALANCING); /*00838*/ EMIT_STATE(VS_START_PC, VS_START_PC); if (e->specs.has_shader_range_registers) { /*0085C*/ EMIT_STATE(VS_RANGE, VS_RANGE); } /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, PA_VIEWPORT_SCALE_X); /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, PA_VIEWPORT_SCALE_Y); /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, PA_VIEWPORT_SCALE_Z); /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, PA_VIEWPORT_OFFSET_X); /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, PA_VIEWPORT_OFFSET_Y); /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, PA_VIEWPORT_OFFSET_Z); /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, PA_LINE_WIDTH); /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, PA_POINT_SIZE); /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, PA_SYSTEM_MODE); /*00A2C*/ EMIT_STATE(PA_W_CLIP_LIMIT, PA_W_CLIP_LIMIT); /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, PA_ATTRIBUTE_ELEMENT_COUNT); /*00A34*/ EMIT_STATE(PA_CONFIG, PA_CONFIG); for(int x=0; x<10; ++x) { /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), PA_SHADER_ATTRIBUTES[x]); } /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, SE_SCISSOR_LEFT); /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, SE_SCISSOR_TOP); /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, SE_SCISSOR_RIGHT); /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, SE_SCISSOR_BOTTOM); /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, SE_DEPTH_SCALE); /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, SE_DEPTH_BIAS); /*00C18*/ EMIT_STATE(SE_CONFIG, SE_CONFIG); /*00E00*/ EMIT_STATE(RA_CONTROL, RA_CONTROL); /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, RA_MULTISAMPLE_UNK00E04); /*00E08*/ EMIT_STATE(RA_EARLY_DEPTH, RA_EARLY_DEPTH); for(int x=0; x<4; ++x) { /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), RA_MULTISAMPLE_UNK00E10[x]); } for(int x=0; x<16; ++x) { /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), RA_CENTROID_TABLE[x]); } /*01000*/ EMIT_STATE(PS_END_PC, PS_END_PC); /*01004*/ EMIT_STATE(PS_OUTPUT_REG, PS_OUTPUT_REG); /*01008*/ EMIT_STATE(PS_INPUT_COUNT, PS_INPUT_COUNT); /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL, PS_TEMP_REGISTER_CONTROL); /*01010*/ EMIT_STATE(PS_CONTROL, PS_CONTROL); /*01018*/ EMIT_STATE(PS_START_PC, PS_START_PC); if (e->specs.has_shader_range_registers) { /*0101C*/ EMIT_STATE(PS_RANGE, PS_RANGE); } /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, PE_DEPTH_CONFIG); /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, PE_DEPTH_NEAR); /*01408*/ EMIT_STATE(PE_DEPTH_FAR, PE_DEPTH_FAR); /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, PE_DEPTH_NORMALIZE); if (ctx->conn->chip.pixel_pipes == 1) { /*01410*/ EMIT_STATE(PE_DEPTH_ADDR, PE_DEPTH_ADDR); } /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, PE_DEPTH_STRIDE); /*01418*/ EMIT_STATE(PE_STENCIL_OP, PE_STENCIL_OP); /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, PE_STENCIL_CONFIG); /*01420*/ EMIT_STATE(PE_ALPHA_OP, PE_ALPHA_OP); /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, PE_ALPHA_BLEND_COLOR); /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, PE_ALPHA_CONFIG); /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, PE_COLOR_FORMAT); if (ctx->conn->chip.pixel_pipes == 1) { /*01430*/ EMIT_STATE(PE_COLOR_ADDR, PE_COLOR_ADDR); } /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE); /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL); if (ctx->conn->chip.pixel_pipes != 1) { for(int x=0; xconn->chip.pixel_pipes; ++x) { /*01460*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(x), PE_PIPE_COLOR_ADDR[x]); } for(int x=0; xconn->chip.pixel_pipes; ++x) { /*01480*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(x), PE_PIPE_DEPTH_ADDR[x]); } } /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, PE_STENCIL_CONFIG_EXT); /*014A4*/ EMIT_STATE(PE_LOGIC_OP, PE_LOGIC_OP); for(int x=0; x<2; ++x) { /*014A8*/ EMIT_STATE(PE_DITHER(x), PE_DITHER[x]); } /*01654*/ EMIT_STATE(TS_MEM_CONFIG, TS_MEM_CONFIG); /*01658*/ EMIT_STATE(TS_COLOR_STATUS_BASE, TS_COLOR_STATUS_BASE); /*0165C*/ EMIT_STATE(TS_COLOR_SURFACE_BASE, TS_COLOR_SURFACE_BASE); /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, TS_COLOR_CLEAR_VALUE); /*01664*/ EMIT_STATE(TS_DEPTH_STATUS_BASE, TS_DEPTH_STATUS_BASE); /*01668*/ EMIT_STATE(TS_DEPTH_SURFACE_BASE, TS_DEPTH_SURFACE_BASE); /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, TS_DEPTH_CLEAR_VALUE); for(int x=0; x<12; ++x) { /*02000*/ EMIT_STATE(TE_SAMPLER_CONFIG0(x), TE_SAMPLER_CONFIG0[x]); } for(int x=0; x<12; ++x) { /*02040*/ EMIT_STATE(TE_SAMPLER_SIZE(x), TE_SAMPLER_SIZE[x]); } for(int x=0; x<12; ++x) { /*02080*/ EMIT_STATE(TE_SAMPLER_LOG_SIZE(x), TE_SAMPLER_LOG_SIZE[x]); } for(int x=0; x<12; ++x) { /*020C0*/ EMIT_STATE(TE_SAMPLER_LOD_CONFIG(x), TE_SAMPLER_LOD_CONFIG[x]); } for(int x=0; x<12; ++x) { /*021C0*/ EMIT_STATE(TE_SAMPLER_CONFIG1(x), TE_SAMPLER_CONFIG1[x]); } for(int y=0; y<14; ++y) { for(int x=0; x<12; ++x) { /*02400*/ EMIT_STATE(TE_SAMPLER_LOD_ADDR(x, y), TE_SAMPLER_LOD_ADDR[y][x]); } } /*03814*/ EMIT_STATE(GL_VERTEX_ELEMENT_CONFIG, GL_VERTEX_ELEMENT_CONFIG); /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, GL_VARYING_TOTAL_COMPONENTS); /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, GL_VARYING_NUM_COMPONENTS); for(int x=0; x<2; ++x) { /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), GL_VARYING_COMPONENT_USE[x]); } /*0384C*/ EMIT_STATE(GL_API_MODE, GL_API_MODE); ETNA_COALESCE_STATE_CLOSE(); /* end only EMIT_STATE */ #undef EMIT_STATE #undef EMIT_STATE_FIXP /* re-submit current shader program and uniforms */ /*04000 or 0C000*/ etna_set_state_multi(ctx, e->specs.vs_offset, e->gpu3d.vs_inst_mem_size, e->gpu3d.VS_INST_MEM); /*06000 or 0D000*/ etna_set_state_multi(ctx, e->specs.ps_offset, e->gpu3d.ps_inst_mem_size, e->gpu3d.PS_INST_MEM); /*05000*/ etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), e->gpu3d.vs_uniforms_size, e->gpu3d.VS_UNIFORMS); /*07000*/ etna_set_state_multi(ctx, VIVS_PS_UNIFORMS(0), e->gpu3d.ps_uniforms_size, e->gpu3d.PS_UNIFORMS); } /* Weave state before draw operation. This function merges all the compiled state blocks under * the context into one device register state. Parts of this state that are changed since * last call (dirty) will be uploaded as state changes in the command buffer. */ static void sync_context(struct pipe_context *restrict pipe) { struct etna_pipe_context *restrict e = etna_pipe_context(pipe); struct etna_ctx *restrict ctx = e->ctx; uint32_t active_samplers = active_samplers_bits(pipe); uint32_t dirty = e->dirty_bits; /* CSOs must be bound before calling this */ assert(e->blend_p && e->rasterizer_p && e->depth_stencil_alpha_p && e->vertex_elements_p); /* Pre-processing: re-link shader if needed. */ if(unlikely((dirty & ETNA_STATE_SHADER)) && e->vs && e->fs) { /* re-link vs and fs if needed */ etna_link_shaders(pipe, &e->shader_state, e->vs, e->fs); } /* Pre-processing: see what caches we need to flush before making state * changes. */ uint32_t to_flush = 0; if(unlikely(dirty & (ETNA_STATE_BLEND))) { /* Need flush COLOR when changing PE.COLOR_FORMAT.OVERWRITE. */ if((e->gpu3d.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE) != (e->blend.PE_COLOR_FORMAT & VIVS_PE_COLOR_FORMAT_OVERWRITE)) to_flush |= VIVS_GL_FLUSH_CACHE_COLOR; } if(unlikely(dirty & (ETNA_STATE_TEXTURE_CACHES))) to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE; if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) /* Framebuffer config changed? */ to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH; if(DBG_ENABLED(ETNA_DBG_CFLUSH_ALL)) to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH; if(to_flush) { etna_set_state(ctx, VIVS_GL_FLUSH_CACHE, to_flush); etna_stall(ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); } /* If MULTI_SAMPLE_CONFIG.MSAA_SAMPLES changed, clobber affected shader * state to make sure it is always rewritten. */ if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) { if((e->gpu3d.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK) != (e->framebuffer.GL_MULTI_SAMPLE_CONFIG & VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES__MASK)) { /* XXX what does the GPU set these states to on MSAA samples change? Does it do the right thing? * (increase/decrease as necessary) or something else? Just set some invalid value until we know for * sure. */ e->gpu3d.PS_INPUT_COUNT = 0xffffffff; e->gpu3d.PS_TEMP_REGISTER_CONTROL = 0xffffffff; } } /* * Cached state update emission. * The etna_3d_state structure e->gpu3d is used to keep the current context. * State is only emitted if the new value of the register is different from the cached value * in the context. Update the state afterwards. */ #define EMIT_STATE(state_name, dest_field, src_value) \ if(e->gpu3d.dest_field != (src_value)) { \ ETNA_COALESCE_STATE_UPDATE(state_name, src_value, 0) \ e->gpu3d.dest_field = (src_value); \ } #define EMIT_STATE_FIXP(state_name, dest_field, src_value) \ if(e->gpu3d.dest_field != (src_value)) { \ ETNA_COALESCE_STATE_UPDATE(state_name, src_value, 1) \ e->gpu3d.dest_field = (src_value); \ } /* Update vertex elements. This is different from any of the other states, in that * a) the number of vertex elements written matters: so write only active ones * b) the vertex element states must all be written: do not skip entries that stay the same */ if(dirty & (ETNA_STATE_VERTEX_ELEMENTS)) { if(e->gpu3d.num_vertex_elements != e->vertex_elements.num_elements || memcmp(e->gpu3d.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG, e->gpu3d.num_vertex_elements * 4)) { /* Special case: vertex elements must always be sent in full if changed */ /*00600*/ etna_set_state_multi(ctx, VIVS_FE_VERTEX_ELEMENT_CONFIG(0), e->vertex_elements.num_elements, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG); memcpy(e->gpu3d.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.FE_VERTEX_ELEMENT_CONFIG, e->vertex_elements.num_elements * 4); e->gpu3d.num_vertex_elements = e->vertex_elements.num_elements; } } /* The following code is originally generated by gen_merge_state.py, to * emit state in increasing order of address (this makes it possible to merge * consecutive register updates into one SET_STATE command) * * There have been some manual changes, where the weaving operation is not * simply bitwise or: * - scissor fixp * - num vertex elements * - scissor handling * - num samplers * - texture lod * - ETNA_STATE_TS * - removed ETNA_STATE_BASE_SETUP statements -- these are guaranteed to not change anyway * - PS / framebuffer interaction for MSAA * - move update of GL_MULTI_SAMPLE_CONFIG first * - add unlikely()/likely() */ uint32_t last_reg, last_fixp, span_start; ETNA_COALESCE_STATE_OPEN(ETNA_3D_CONTEXT_SIZE); /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here directly * or indirectly */ /* multi sample config is set first, and outside of the normal sorting * order, as changing the multisample state clobbers PS.INPUT_COUNT (and * possibly PS.TEMP_REGISTER_CONTROL). */ if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER | ETNA_STATE_SAMPLE_MASK))) { /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, GL_MULTI_SAMPLE_CONFIG, e->sample_mask.GL_MULTI_SAMPLE_CONFIG | e->framebuffer.GL_MULTI_SAMPLE_CONFIG); } if(likely(dirty & (ETNA_STATE_INDEX_BUFFER))) { /*00644*/ EMIT_STATE(FE_INDEX_STREAM_BASE_ADDR, FE_INDEX_STREAM_BASE_ADDR, e->index_buffer.FE_INDEX_STREAM_BASE_ADDR); /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, FE_INDEX_STREAM_CONTROL, e->index_buffer.FE_INDEX_STREAM_CONTROL); } if(likely(dirty & (ETNA_STATE_VERTEX_BUFFERS))) { /*0064C*/ EMIT_STATE(FE_VERTEX_STREAM_BASE_ADDR, FE_VERTEX_STREAM_BASE_ADDR, e->vertex_buffer[0].FE_VERTEX_STREAM_BASE_ADDR); /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, FE_VERTEX_STREAM_CONTROL, e->vertex_buffer[0].FE_VERTEX_STREAM_CONTROL); if (e->specs.has_shader_range_registers) { for(int x=0; x<8; ++x) { /*00680*/ EMIT_STATE(FE_VERTEX_STREAMS_BASE_ADDR(x), FE_VERTEX_STREAMS_BASE_ADDR[x], e->vertex_buffer[x].FE_VERTEX_STREAM_BASE_ADDR); } for(int x=0; x<8; ++x) { /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), FE_VERTEX_STREAMS_CONTROL[x], e->vertex_buffer[x].FE_VERTEX_STREAM_CONTROL); } } } if(unlikely(dirty & (ETNA_STATE_SHADER))) { /*00800*/ EMIT_STATE(VS_END_PC, VS_END_PC, e->shader_state.VS_END_PC); } if(unlikely(dirty & (ETNA_STATE_SHADER | ETNA_STATE_RASTERIZER))) { /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, VS_OUTPUT_COUNT, e->rasterizer.point_size_per_vertex ? e->shader_state.VS_OUTPUT_COUNT_PSIZE : e->shader_state.VS_OUTPUT_COUNT); } if(unlikely(dirty & (ETNA_STATE_VERTEX_ELEMENTS | ETNA_STATE_SHADER))) { /*00808*/ EMIT_STATE(VS_INPUT_COUNT, VS_INPUT_COUNT, VIVS_VS_INPUT_COUNT_COUNT(e->vertex_elements.num_elements) | e->shader_state.VS_INPUT_COUNT); } if(unlikely(dirty & (ETNA_STATE_SHADER))) { /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, VS_TEMP_REGISTER_CONTROL, e->shader_state.VS_TEMP_REGISTER_CONTROL); for(int x=0; x<4; ++x) { /*00810*/ EMIT_STATE(VS_OUTPUT(x), VS_OUTPUT[x], e->shader_state.VS_OUTPUT[x]); } for(int x=0; x<4; ++x) { /*00820*/ EMIT_STATE(VS_INPUT(x), VS_INPUT[x], e->shader_state.VS_INPUT[x]); } /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, VS_LOAD_BALANCING, e->shader_state.VS_LOAD_BALANCING); /*00838*/ EMIT_STATE(VS_START_PC, VS_START_PC, e->shader_state.VS_START_PC); if (e->specs.has_shader_range_registers) { /*0085C*/ EMIT_STATE(VS_RANGE, VS_RANGE, (e->shader_state.vs_inst_mem_size/4-1)<<16); } } if(unlikely(dirty & (ETNA_STATE_VIEWPORT))) { /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, PA_VIEWPORT_SCALE_X, e->viewport.PA_VIEWPORT_SCALE_X); /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, PA_VIEWPORT_SCALE_Y, e->viewport.PA_VIEWPORT_SCALE_Y); /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, PA_VIEWPORT_SCALE_Z, e->viewport.PA_VIEWPORT_SCALE_Z); /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, PA_VIEWPORT_OFFSET_X, e->viewport.PA_VIEWPORT_OFFSET_X); /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, PA_VIEWPORT_OFFSET_Y, e->viewport.PA_VIEWPORT_OFFSET_Y); /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, PA_VIEWPORT_OFFSET_Z, e->viewport.PA_VIEWPORT_OFFSET_Z); } if(unlikely(dirty & (ETNA_STATE_RASTERIZER))) { /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, PA_LINE_WIDTH, e->rasterizer.PA_LINE_WIDTH); /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, PA_POINT_SIZE, e->rasterizer.PA_POINT_SIZE); /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, PA_SYSTEM_MODE, e->rasterizer.PA_SYSTEM_MODE); } if(unlikely(dirty & (ETNA_STATE_SHADER))) { /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, PA_ATTRIBUTE_ELEMENT_COUNT, e->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT); } if(unlikely(dirty & (ETNA_STATE_RASTERIZER | ETNA_STATE_SHADER))) { /*00A34*/ EMIT_STATE(PA_CONFIG, PA_CONFIG, e->rasterizer.PA_CONFIG & e->shader_state.PA_CONFIG); } if(unlikely(dirty & (ETNA_STATE_SHADER))) { for(int x=0; x<10; ++x) { /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), PA_SHADER_ATTRIBUTES[x], e->shader_state.PA_SHADER_ATTRIBUTES[x]); } } if(unlikely(dirty & (ETNA_STATE_SCISSOR | ETNA_STATE_FRAMEBUFFER | ETNA_STATE_RASTERIZER | ETNA_STATE_VIEWPORT))) { /* this is a bit of a mess: rasterizer.scissor determines whether to use only the * framebuffer scissor, or specific scissor state, and the viewport clips too so the logic * spans four CSOs */ uint32_t scissor_left = MAX2(e->framebuffer.SE_SCISSOR_LEFT, e->viewport.SE_SCISSOR_LEFT); uint32_t scissor_top = MAX2(e->framebuffer.SE_SCISSOR_TOP, e->viewport.SE_SCISSOR_TOP); uint32_t scissor_right = MIN2(e->framebuffer.SE_SCISSOR_RIGHT, e->viewport.SE_SCISSOR_RIGHT); uint32_t scissor_bottom = MIN2(e->framebuffer.SE_SCISSOR_BOTTOM, e->viewport.SE_SCISSOR_BOTTOM); if(e->rasterizer.scissor) { scissor_left = MAX2(e->scissor.SE_SCISSOR_LEFT, scissor_left); scissor_top = MAX2(e->scissor.SE_SCISSOR_TOP, scissor_top); scissor_right = MIN2(e->scissor.SE_SCISSOR_RIGHT, scissor_right); scissor_bottom = MIN2(e->scissor.SE_SCISSOR_BOTTOM, scissor_bottom); } /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, SE_SCISSOR_LEFT, scissor_left); /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, SE_SCISSOR_TOP, scissor_top); /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, SE_SCISSOR_RIGHT, scissor_right); /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, SE_SCISSOR_BOTTOM, scissor_bottom); } if(unlikely(dirty & (ETNA_STATE_RASTERIZER))) { /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, SE_DEPTH_SCALE, e->rasterizer.SE_DEPTH_SCALE); /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, SE_DEPTH_BIAS, e->rasterizer.SE_DEPTH_BIAS); /*00C18*/ EMIT_STATE(SE_CONFIG, SE_CONFIG, e->rasterizer.SE_CONFIG); } if(unlikely(dirty & (ETNA_STATE_SHADER))) { /*00E00*/ EMIT_STATE(RA_CONTROL, RA_CONTROL, e->shader_state.RA_CONTROL); } if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) { /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, RA_MULTISAMPLE_UNK00E04, e->framebuffer.RA_MULTISAMPLE_UNK00E04); for(int x=0; x<4; ++x) { /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), RA_MULTISAMPLE_UNK00E10[x], e->framebuffer.RA_MULTISAMPLE_UNK00E10[x]); } for(int x=0; x<16; ++x) { /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), RA_CENTROID_TABLE[x], e->framebuffer.RA_CENTROID_TABLE[x]); } } if(unlikely(dirty & (ETNA_STATE_SHADER | ETNA_STATE_FRAMEBUFFER))) { /*01000*/ EMIT_STATE(PS_END_PC, PS_END_PC, e->shader_state.PS_END_PC); /*01004*/ EMIT_STATE(PS_OUTPUT_REG, PS_OUTPUT_REG, e->shader_state.PS_OUTPUT_REG); /*01008*/ EMIT_STATE(PS_INPUT_COUNT, PS_INPUT_COUNT, e->framebuffer.msaa_mode ? e->shader_state.PS_INPUT_COUNT_MSAA : e->shader_state.PS_INPUT_COUNT); /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL, PS_TEMP_REGISTER_CONTROL, e->framebuffer.msaa_mode ? e->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA : e->shader_state.PS_TEMP_REGISTER_CONTROL); /*01010*/ EMIT_STATE(PS_CONTROL, PS_CONTROL, e->shader_state.PS_CONTROL); /*01018*/ EMIT_STATE(PS_START_PC, PS_START_PC, e->shader_state.PS_START_PC); if (e->specs.has_shader_range_registers) { /*0101C*/ EMIT_STATE(PS_RANGE, PS_RANGE, ((e->shader_state.ps_inst_mem_size/4-1+0x100)<<16) | 0x100); } } if(unlikely(dirty & (ETNA_STATE_DSA | ETNA_STATE_FRAMEBUFFER))) { /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, PE_DEPTH_CONFIG, e->depth_stencil_alpha.PE_DEPTH_CONFIG | e->framebuffer.PE_DEPTH_CONFIG); } if(unlikely(dirty & (ETNA_STATE_VIEWPORT))) { /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, PE_DEPTH_NEAR, e->viewport.PE_DEPTH_NEAR); /*01408*/ EMIT_STATE(PE_DEPTH_FAR, PE_DEPTH_FAR, e->viewport.PE_DEPTH_FAR); } if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) { /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, PE_DEPTH_NORMALIZE, e->framebuffer.PE_DEPTH_NORMALIZE); if (ctx->conn->chip.pixel_pipes == 1) { /*01410*/ EMIT_STATE(PE_DEPTH_ADDR, PE_DEPTH_ADDR, e->framebuffer.PE_DEPTH_ADDR); } /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, PE_DEPTH_STRIDE, e->framebuffer.PE_DEPTH_STRIDE); } if(unlikely(dirty & (ETNA_STATE_DSA))) { /*01418*/ EMIT_STATE(PE_STENCIL_OP, PE_STENCIL_OP, e->depth_stencil_alpha.PE_STENCIL_OP); } if(unlikely(dirty & (ETNA_STATE_DSA | ETNA_STATE_STENCIL_REF))) { /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, PE_STENCIL_CONFIG, e->depth_stencil_alpha.PE_STENCIL_CONFIG | e->stencil_ref.PE_STENCIL_CONFIG); } if(unlikely(dirty & (ETNA_STATE_DSA))) { /*01420*/ EMIT_STATE(PE_ALPHA_OP, PE_ALPHA_OP, e->depth_stencil_alpha.PE_ALPHA_OP); } if(unlikely(dirty & (ETNA_STATE_BLEND_COLOR))) { /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, PE_ALPHA_BLEND_COLOR, e->blend_color.PE_ALPHA_BLEND_COLOR); } if(unlikely(dirty & (ETNA_STATE_BLEND))) { /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, PE_ALPHA_CONFIG, e->blend.PE_ALPHA_CONFIG); } if(unlikely(dirty & (ETNA_STATE_BLEND | ETNA_STATE_FRAMEBUFFER))) { /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, PE_COLOR_FORMAT, e->blend.PE_COLOR_FORMAT | e->framebuffer.PE_COLOR_FORMAT); } if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER))) { if (ctx->conn->chip.pixel_pipes == 1) { /*01430*/ EMIT_STATE(PE_COLOR_ADDR, PE_COLOR_ADDR, e->framebuffer.PE_COLOR_ADDR); /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE, e->framebuffer.PE_COLOR_STRIDE); /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL, e->framebuffer.PE_HDEPTH_CONTROL); } else if (ctx->conn->chip.pixel_pipes == 2) { /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, PE_COLOR_STRIDE, e->framebuffer.PE_COLOR_STRIDE); /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, PE_HDEPTH_CONTROL, e->framebuffer.PE_HDEPTH_CONTROL); /*01460*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(0), PE_PIPE_COLOR_ADDR[0], e->framebuffer.PE_PIPE_COLOR_ADDR[0]); /*01464*/ EMIT_STATE(PE_PIPE_COLOR_ADDR(1), PE_PIPE_COLOR_ADDR[1], e->framebuffer.PE_PIPE_COLOR_ADDR[1]); /*01480*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(0), PE_PIPE_DEPTH_ADDR[0], e->framebuffer.PE_PIPE_DEPTH_ADDR[0]); /*01484*/ EMIT_STATE(PE_PIPE_DEPTH_ADDR(1), PE_PIPE_DEPTH_ADDR[1], e->framebuffer.PE_PIPE_DEPTH_ADDR[1]); } } if(unlikely(dirty & (ETNA_STATE_STENCIL_REF))) { /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, PE_STENCIL_CONFIG_EXT, e->stencil_ref.PE_STENCIL_CONFIG_EXT); } if(unlikely(dirty & (ETNA_STATE_BLEND))) { /*014A4*/ EMIT_STATE(PE_LOGIC_OP, PE_LOGIC_OP, e->blend.PE_LOGIC_OP); for(int x=0; x<2; ++x) { /*014A8*/ EMIT_STATE(PE_DITHER(x), PE_DITHER[x], e->blend.PE_DITHER[x]); } } if(unlikely(dirty & (ETNA_STATE_FRAMEBUFFER | ETNA_STATE_TS))) { /*01654*/ EMIT_STATE(TS_MEM_CONFIG, TS_MEM_CONFIG, e->framebuffer.TS_MEM_CONFIG); /*01658*/ EMIT_STATE(TS_COLOR_STATUS_BASE, TS_COLOR_STATUS_BASE, e->framebuffer.TS_COLOR_STATUS_BASE); /*0165C*/ EMIT_STATE(TS_COLOR_SURFACE_BASE, TS_COLOR_SURFACE_BASE, e->framebuffer.TS_COLOR_SURFACE_BASE); /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, TS_COLOR_CLEAR_VALUE, e->framebuffer.TS_COLOR_CLEAR_VALUE); /*01664*/ EMIT_STATE(TS_DEPTH_STATUS_BASE, TS_DEPTH_STATUS_BASE, e->framebuffer.TS_DEPTH_STATUS_BASE); /*01668*/ EMIT_STATE(TS_DEPTH_SURFACE_BASE, TS_DEPTH_SURFACE_BASE, e->framebuffer.TS_DEPTH_SURFACE_BASE); /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, TS_DEPTH_CLEAR_VALUE, e->framebuffer.TS_DEPTH_CLEAR_VALUE); } if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS | ETNA_STATE_SAMPLERS))) { for(int x=0; xsampler[x].TE_SAMPLER_CONFIG0 & e->sampler_view[x].TE_SAMPLER_CONFIG0_MASK) | e->sampler_view[x].TE_SAMPLER_CONFIG0):0); } } if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS))) { for(int x=0; xsampler_view[x].TE_SAMPLER_SIZE); } } for(int x=0; xsampler_view[x].TE_SAMPLER_LOG_SIZE); } } } if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS | ETNA_STATE_SAMPLERS))) { for(int x=0; xsampler[x].TE_SAMPLER_LOD_CONFIG | VIVS_TE_SAMPLER_LOD_CONFIG_MAX(MIN2(e->sampler[x].max_lod, e->sampler_view[x].max_lod)) | VIVS_TE_SAMPLER_LOD_CONFIG_MIN(MAX2(e->sampler[x].min_lod, e->sampler_view[x].min_lod))); } } for(int x=0; xsampler[x].TE_SAMPLER_CONFIG1 | e->sampler_view[x].TE_SAMPLER_CONFIG1); } } } if(unlikely(dirty & (ETNA_STATE_SAMPLER_VIEWS))) { for(int y=0; ysampler_view[x].TE_SAMPLER_LOD_ADDR[y]); } } } } if(unlikely(dirty & (ETNA_STATE_SHADER))) { /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, GL_VARYING_TOTAL_COMPONENTS, e->shader_state.GL_VARYING_TOTAL_COMPONENTS); /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, GL_VARYING_NUM_COMPONENTS, e->shader_state.GL_VARYING_NUM_COMPONENTS); for(int x=0; x<2; ++x) { /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), GL_VARYING_COMPONENT_USE[x], e->shader_state.GL_VARYING_COMPONENT_USE[x]); } } ETNA_COALESCE_STATE_CLOSE(); /* end only EMIT_STATE */ /**** Large dynamically-sized state ****/ if(dirty & (ETNA_STATE_SHADER)) { /* Special case: a new shader was loaded; simply re-load all uniforms and shader code at once */ /*04000 or 0C000*/ etna_set_state_multi(ctx, e->specs.vs_offset, e->shader_state.vs_inst_mem_size, e->shader_state.VS_INST_MEM); /*06000 or 0D000*/ etna_set_state_multi(ctx, e->specs.ps_offset, e->shader_state.ps_inst_mem_size, e->shader_state.PS_INST_MEM); /*05000*/ etna_set_state_multi(ctx, VIVS_VS_UNIFORMS(0), e->shader_state.vs_uniforms_size, e->shader_state.VS_UNIFORMS); /*07000*/ etna_set_state_multi(ctx, VIVS_PS_UNIFORMS(0), e->shader_state.ps_uniforms_size, e->shader_state.PS_UNIFORMS); /* Copy uniforms to gpu3d, so that incremental updates to uniforms are possible as long as the * same shader remains bound */ e->gpu3d.vs_uniforms_size = e->shader_state.vs_uniforms_size; e->gpu3d.ps_uniforms_size = e->shader_state.ps_uniforms_size; e->gpu3d.vs_inst_mem_size = e->shader_state.vs_inst_mem_size; e->gpu3d.ps_inst_mem_size = e->shader_state.ps_inst_mem_size; memcpy(e->gpu3d.VS_UNIFORMS, e->shader_state.VS_UNIFORMS, e->shader_state.vs_uniforms_size * 4); memcpy(e->gpu3d.PS_UNIFORMS, e->shader_state.PS_UNIFORMS, e->shader_state.ps_uniforms_size * 4); memcpy(e->gpu3d.VS_INST_MEM, e->shader_state.VS_INST_MEM, e->shader_state.vs_inst_mem_size * 4); memcpy(e->gpu3d.PS_INST_MEM, e->shader_state.PS_INST_MEM, e->shader_state.ps_inst_mem_size * 4); } else { /* If new uniforms loaded with current shader, only submit what changed */ if(dirty & (ETNA_STATE_VS_UNIFORMS)) { ETNA_COALESCE_STATE_OPEN(e->shader_state.vs_uniforms_size); /* worst case */ for(int x=0; xshader_state.vs_uniforms_size; ++x) { /*05000*/ EMIT_STATE(VS_UNIFORMS(x), VS_UNIFORMS[x], e->shader_state.VS_UNIFORMS[x]); } ETNA_COALESCE_STATE_CLOSE(); } if(dirty & (ETNA_STATE_PS_UNIFORMS)) { ETNA_COALESCE_STATE_OPEN(e->shader_state.ps_uniforms_size); /* worst case */ for(int x=0; xshader_state.ps_uniforms_size; ++x) { /*07000*/ EMIT_STATE(PS_UNIFORMS(x), PS_UNIFORMS[x], e->shader_state.PS_UNIFORMS[x]); } ETNA_COALESCE_STATE_CLOSE(); } } /**** End of state update ****/ #undef EMIT_STATE #undef EMIT_STATE_FIXP e->dirty_bits = 0; } /** Build new explicit context for etna. This is a command buffer that contains * all commands needed to set up the GPU to current state, to be used after a context * switch (when multiple processes are using the GPU at once). * * This function is called as callback by etna_flush for kernel drivers * that require an explicit context) */ static int update_context(void *pipe, struct etna_ctx *ctx, enum etna_pipe *initial_pipe, enum etna_pipe *final_pipe) { reset_context((struct pipe_context*) pipe); *initial_pipe = ETNA_PIPE_3D; *final_pipe = ETNA_PIPE_3D; return ETNA_OK; } /*********************************************************************/ /** Destroy etna pipe. After calling this the pipe object must never be * used again. */ static void etna_pipe_destroy(struct pipe_context *pipe) { struct etna_pipe_context *priv = etna_pipe_context(pipe); etna_pipe_clear_blit_destroy(pipe); etna_pipe_transfer_destroy(pipe); etna_free(priv->ctx); FREE(pipe); } /** Main draw function. Draw primitives from a vertex buffer object, * using optonally an index buffer. */ static void etna_pipe_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct etna_pipe_context *priv = etna_pipe_context(pipe); if(priv->vertex_elements_p == NULL || priv->vertex_elements.num_elements == 0) return; /* Nothing to do */ int prims = u_decomposed_prims_for_vertices(info->mode, info->count); if(unlikely(prims <= 0)) { DBG("Invalid draw primitive mode=%i or no primitives to be drawn", info->mode); return; } /* First, sync state, then emit DRAW_PRIMITIVES or DRAW_INDEXED_PRIMITIVES */ sync_context(pipe); if(priv->vs && priv->vertex_elements.num_elements != priv->vs->num_inputs) { BUG("Number of elements %i does not match the number of VS inputs %i", priv->vertex_elements.num_elements, priv->vs->num_inputs); return; } if(info->indexed) { etna_draw_indexed_primitives(priv->ctx, translate_draw_mode(info->mode), info->start, prims, info->index_bias); } else { etna_draw_primitives(priv->ctx, translate_draw_mode(info->mode), info->start, prims); } if(DBG_ENABLED(ETNA_DBG_FLUSH_ALL)) { pipe->flush(pipe, NULL, 0); } } /** Create vertex element states, which define a layout for fetching * vertices for rendering. */ static void *etna_pipe_create_vertex_elements_state(struct pipe_context *pipe, unsigned num_elements, const struct pipe_vertex_element *elements) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_vertex_elements_state *cs = CALLOC_STRUCT(compiled_vertex_elements_state); /* XXX could minimize number of consecutive stretches here by sorting, and * permuting the inputs in shader or does Mesa do this already? */ /* Check that vertex element binding is compatible with hardware; thus * elements[idx].vertex_buffer_index are < stream_count. If not, the binding * uses more streams than is supported, and u_vbuf should have done some reorganization * for compatibility. */ bool incompatible = false; for(unsigned idx=0; idx= priv->specs.stream_count || elements[idx].instance_divisor > 0) incompatible = true; } cs->num_elements = num_elements; if(incompatible || num_elements == 0) { DBG("Error: zero vertex elements, or more vertex buffers used than supported"); FREE(cs); return NULL; } unsigned start_offset = 0; /* start of current consecutive stretch */ bool nonconsecutive = true; /* previous value of nonconsecutive */ for(unsigned idx=0; idxFE_VERTEX_ELEMENT_CONFIG[idx] = (nonconsecutive ? VIVS_FE_VERTEX_ELEMENT_CONFIG_NONCONSECUTIVE : 0) | translate_vertex_format_type(elements[idx].src_format, false) | VIVS_FE_VERTEX_ELEMENT_CONFIG_NUM(util_format_get_nr_components(elements[idx].src_format)) | translate_vertex_format_normalize(elements[idx].src_format) | VIVS_FE_VERTEX_ELEMENT_CONFIG_ENDIAN(ENDIAN_MODE_NO_SWAP) | VIVS_FE_VERTEX_ELEMENT_CONFIG_STREAM(elements[idx].vertex_buffer_index) | VIVS_FE_VERTEX_ELEMENT_CONFIG_START(elements[idx].src_offset) | VIVS_FE_VERTEX_ELEMENT_CONFIG_END(end_offset - start_offset); } return cs; } static void etna_pipe_bind_vertex_elements_state(struct pipe_context *pipe, void *ve) { struct etna_pipe_context *priv = etna_pipe_context(pipe); priv->dirty_bits |= ETNA_STATE_VERTEX_ELEMENTS; priv->vertex_elements_p = ve; if(ve) priv->vertex_elements = *(struct compiled_vertex_elements_state*)ve; } static void etna_pipe_delete_vertex_elements_state(struct pipe_context *pipe, void *ve) { struct compiled_vertex_elements_state *cs = (struct compiled_vertex_elements_state*)ve; //struct etna_pipe_context *priv = etna_pipe_context(pipe); FREE(cs); } static void etna_pipe_set_blend_color(struct pipe_context *pipe, const struct pipe_blend_color *bc) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_blend_color *cs = &priv->blend_color; cs->PE_ALPHA_BLEND_COLOR = VIVS_PE_ALPHA_BLEND_COLOR_R(etna_cfloat_to_uint8(bc->color[0])) | VIVS_PE_ALPHA_BLEND_COLOR_G(etna_cfloat_to_uint8(bc->color[1])) | VIVS_PE_ALPHA_BLEND_COLOR_B(etna_cfloat_to_uint8(bc->color[2])) | VIVS_PE_ALPHA_BLEND_COLOR_A(etna_cfloat_to_uint8(bc->color[3])); priv->dirty_bits |= ETNA_STATE_BLEND_COLOR; } static void etna_pipe_set_stencil_ref(struct pipe_context *pipe, const struct pipe_stencil_ref *sr) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_stencil_ref *cs = &priv->stencil_ref; priv->stencil_ref_s = *sr; cs->PE_STENCIL_CONFIG = VIVS_PE_STENCIL_CONFIG_REF_FRONT(sr->ref_value[0]); /* rest of bits weaved in from depth_stencil_alpha */ cs->PE_STENCIL_CONFIG_EXT = VIVS_PE_STENCIL_CONFIG_EXT_REF_BACK(sr->ref_value[0]); priv->dirty_bits |= ETNA_STATE_STENCIL_REF; } static void etna_pipe_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_sample_mask *cs = &priv->sample_mask; priv->sample_mask_s = sample_mask; cs->GL_MULTI_SAMPLE_CONFIG = /* to be merged with render target state */ VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(sample_mask); priv->dirty_bits |= ETNA_STATE_SAMPLE_MASK; } static void etna_pipe_set_framebuffer_state(struct pipe_context *pipe, const struct pipe_framebuffer_state *sv) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_framebuffer_state *cs = &priv->framebuffer; int nr_samples_color = -1; int nr_samples_depth = -1; /* Set up TS as well. Warning: this state is used by both the RS and PE */ uint32_t ts_mem_config = 0; if(sv->nr_cbufs > 0) /* at least one color buffer? */ { struct etna_surface *cbuf = etna_surface(sv->cbufs[0]); bool color_supertiled = (cbuf->layout & 2)!=0; assert(cbuf->layout & 1); /* Cannot render to linear surfaces */ pipe_surface_reference(&cs->cbuf, &cbuf->base); cs->PE_COLOR_FORMAT = VIVS_PE_COLOR_FORMAT_FORMAT(translate_rt_format(cbuf->base.format, false)) | (color_supertiled ? VIVS_PE_COLOR_FORMAT_SUPER_TILED : 0); /* XXX VIVS_PE_COLOR_FORMAT_OVERWRITE and the rest comes from blend_state / depth_stencil_alpha */ /* merged with depth_stencil_alpha */ if((cbuf->surf.offset & 63) || (((cbuf->surf.stride*4) & 63) && cbuf->surf.height > 4)) { /* XXX Must make temporary surface here. * Need the same mechanism on gc2000 when we want to do mipmap generation by * rendering to levels > 1 due to multitiled / tiled conversion. */ BUG("Alignment error, trying to render to offset %08x with tile stride %i", cbuf->surf.offset, cbuf->surf.stride*4); } struct etna_resource *res = etna_resource(cbuf->base.texture); struct etna_bo *bo = res->bo; if (priv->ctx->conn->chip.pixel_pipes == 1) { cs->PE_COLOR_ADDR = etna_bo_gpu_address(bo) + cbuf->surf.offset; } else if (priv->ctx->conn->chip.pixel_pipes == 2) { cs->PE_PIPE_COLOR_ADDR[0] = res->pipe_addr[0]; cs->PE_PIPE_COLOR_ADDR[1] = res->pipe_addr[1]; } cs->PE_COLOR_STRIDE = cbuf->surf.stride; if(cbuf->surf.ts_size) { struct etna_bo *ts_bo = etna_resource(cbuf->base.texture)->ts_bo; ts_mem_config |= VIVS_TS_MEM_CONFIG_COLOR_FAST_CLEAR; cs->TS_COLOR_CLEAR_VALUE = cbuf->level->clear_value; cs->TS_COLOR_STATUS_BASE = etna_bo_gpu_address(ts_bo) + cbuf->surf.ts_offset; cs->TS_COLOR_SURFACE_BASE = etna_bo_gpu_address(bo) + cbuf->surf.offset; } /* MSAA */ if(cbuf->base.texture->nr_samples > 1) ts_mem_config |= VIVS_TS_MEM_CONFIG_MSAA | translate_msaa_format(cbuf->base.format, false); nr_samples_color = cbuf->base.texture->nr_samples; } else { pipe_surface_reference(&cs->cbuf, NULL); cs->PE_COLOR_FORMAT = 0; /* Is this enough to render without color? */ } if(sv->zsbuf != NULL) { struct etna_surface *zsbuf = etna_surface(sv->zsbuf); pipe_surface_reference(&cs->zsbuf, &zsbuf->base); assert(zsbuf->layout & 1); /* Cannot render to linear surfaces */ uint32_t depth_format = translate_depth_format(zsbuf->base.format, false); unsigned depth_bits = depth_format == VIVS_PE_DEPTH_CONFIG_DEPTH_FORMAT_D16 ? 16 : 24; bool depth_supertiled = (zsbuf->layout & 2)!=0; cs->PE_DEPTH_CONFIG = depth_format | (depth_supertiled ? VIVS_PE_DEPTH_CONFIG_SUPER_TILED : 0) | VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_Z; /* VIVS_PE_DEPTH_CONFIG_ONLY_DEPTH */ /* merged with depth_stencil_alpha */ struct etna_resource *res = etna_resource(zsbuf->base.texture); struct etna_bo *bo = res->bo; if (priv->ctx->conn->chip.pixel_pipes == 1) { cs->PE_DEPTH_ADDR = etna_bo_gpu_address(bo) + zsbuf->surf.offset; } else if (priv->ctx->conn->chip.pixel_pipes == 2) { cs->PE_PIPE_DEPTH_ADDR[0] = res->pipe_addr[0]; cs->PE_PIPE_DEPTH_ADDR[1] = res->pipe_addr[1]; } cs->PE_DEPTH_STRIDE = zsbuf->surf.stride; cs->PE_HDEPTH_CONTROL = VIVS_PE_HDEPTH_CONTROL_FORMAT_DISABLED; cs->PE_DEPTH_NORMALIZE = etna_f32_to_u32(exp2f(depth_bits) - 1.0f); if(zsbuf->surf.ts_size) { struct etna_bo *ts_bo = etna_resource(zsbuf->base.texture)->ts_bo; ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_FAST_CLEAR; cs->TS_DEPTH_CLEAR_VALUE = zsbuf->level->clear_value; cs->TS_DEPTH_STATUS_BASE = etna_bo_gpu_address(ts_bo) + zsbuf->surf.ts_offset; cs->TS_DEPTH_SURFACE_BASE = etna_bo_gpu_address(bo) + zsbuf->surf.offset; } ts_mem_config |= (depth_bits == 16 ? VIVS_TS_MEM_CONFIG_DEPTH_16BPP : 0); /* MSAA */ if(zsbuf->base.texture->nr_samples > 1) /* XXX VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION; * Disable without MSAA for now, as it causes corruption in glquake. */ ts_mem_config |= VIVS_TS_MEM_CONFIG_DEPTH_COMPRESSION; nr_samples_depth = zsbuf->base.texture->nr_samples; } else { pipe_surface_reference(&cs->zsbuf, NULL); cs->PE_DEPTH_CONFIG = VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_NONE; } /* MSAA setup */ if(nr_samples_depth != -1 && nr_samples_color != -1 && nr_samples_depth != nr_samples_color) { BUG("Number of samples in color and depth texture must match (%i and %i respectively)", nr_samples_color, nr_samples_depth); } switch(MAX2(nr_samples_depth, nr_samples_color)) { case 0: case 1: /* Are 0 and 1 samples allowed? */ cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_NONE; cs->msaa_mode = false; break; case 2: cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_2X; cs->msaa_mode = true; /* Add input to PS */ cs->RA_MULTISAMPLE_UNK00E04 = 0x0; cs->RA_MULTISAMPLE_UNK00E10[0] = 0x0000aa22; cs->RA_CENTROID_TABLE[0] = 0x66aa2288; cs->RA_CENTROID_TABLE[1] = 0x88558800; cs->RA_CENTROID_TABLE[2] = 0x88881100; cs->RA_CENTROID_TABLE[3] = 0x33888800; break; case 4: cs->GL_MULTI_SAMPLE_CONFIG = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_SAMPLES_4X; cs->msaa_mode = true; /* Add input to PS */ cs->RA_MULTISAMPLE_UNK00E04 = 0x0; cs->RA_MULTISAMPLE_UNK00E10[0] = 0xeaa26e26; cs->RA_MULTISAMPLE_UNK00E10[1] = 0xe6ae622a; cs->RA_MULTISAMPLE_UNK00E10[2] = 0xaaa22a22; cs->RA_CENTROID_TABLE[0] = 0x4a6e2688; cs->RA_CENTROID_TABLE[1] = 0x888888a2; cs->RA_CENTROID_TABLE[2] = 0x888888ea; cs->RA_CENTROID_TABLE[3] = 0x888888c6; cs->RA_CENTROID_TABLE[4] = 0x46622a88; cs->RA_CENTROID_TABLE[5] = 0x888888ae; cs->RA_CENTROID_TABLE[6] = 0x888888e6; cs->RA_CENTROID_TABLE[7] = 0x888888ca; cs->RA_CENTROID_TABLE[8] = 0x262a2288; cs->RA_CENTROID_TABLE[9] = 0x886688a2; cs->RA_CENTROID_TABLE[10] = 0x888866aa; cs->RA_CENTROID_TABLE[11] = 0x668888a6; break; } /* Scissor setup */ cs->SE_SCISSOR_LEFT = 0; /* affected by rasterizer and scissor state as well */ cs->SE_SCISSOR_TOP = 0; cs->SE_SCISSOR_RIGHT = (sv->width << 16)-1; cs->SE_SCISSOR_BOTTOM = (sv->height << 16)-1; cs->TS_MEM_CONFIG = ts_mem_config; priv->dirty_bits |= ETNA_STATE_FRAMEBUFFER; priv->framebuffer_s = *sv; /* keep copy of original structure */ } static void etna_pipe_set_scissor_states( struct pipe_context *pipe, unsigned start_slot, unsigned num_scissors, const struct pipe_scissor_state *ss) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_scissor_state *cs = &priv->scissor; priv->scissor_s = *ss; cs->SE_SCISSOR_LEFT = (ss->minx << 16); cs->SE_SCISSOR_TOP = (ss->miny << 16); cs->SE_SCISSOR_RIGHT = (ss->maxx << 16)-1; cs->SE_SCISSOR_BOTTOM = (ss->maxy << 16)-1; /* note that this state is only used when rasterizer_state->scissor is on */ priv->dirty_bits |= ETNA_STATE_SCISSOR; } static void etna_pipe_set_viewport_states( struct pipe_context *pipe, unsigned start_slot, unsigned num_scissors, const struct pipe_viewport_state *vs) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_viewport_state *cs = &priv->viewport; priv->viewport_s = *vs; /** * For Vivante GPU, viewport z transformation is 0..1 to 0..1 instead of -1..1 to 0..1. * scaling and translation to 0..1 already happened, so remove that * * z' = (z * 2 - 1) * scale + translate * = z * (2 * scale) + (translate - scale) * * scale' = 2 * scale * translate' = translate - scale */ cs->PA_VIEWPORT_SCALE_X = etna_f32_to_fixp16(vs->scale[0]); /* must be fixp as v4 state deltas assume it is */ cs->PA_VIEWPORT_SCALE_Y = etna_f32_to_fixp16(vs->scale[1]); cs->PA_VIEWPORT_SCALE_Z = etna_f32_to_u32(vs->scale[2] * 2.0f); cs->PA_VIEWPORT_OFFSET_X = etna_f32_to_fixp16(vs->translate[0]); cs->PA_VIEWPORT_OFFSET_Y = etna_f32_to_fixp16(vs->translate[1]); cs->PA_VIEWPORT_OFFSET_Z = etna_f32_to_u32(vs->translate[2] - vs->scale[2]); /* Compute scissor rectangle (fixp) from viewport. * Make sure left is always < right and top always < bottom. */ cs->SE_SCISSOR_LEFT = etna_f32_to_fixp16(MAX2(vs->translate[0] - vs->scale[0], 0.0f)); cs->SE_SCISSOR_TOP = etna_f32_to_fixp16(MAX2(vs->translate[1] - vs->scale[1], 0.0f)); cs->SE_SCISSOR_RIGHT = etna_f32_to_fixp16(MAX2(vs->translate[0] + vs->scale[0], 0.0f)); cs->SE_SCISSOR_BOTTOM = etna_f32_to_fixp16(MAX2(vs->translate[1] + vs->scale[1], 0.0f)); if(cs->SE_SCISSOR_LEFT > cs->SE_SCISSOR_RIGHT) { uint32_t tmp = cs->SE_SCISSOR_RIGHT; cs->SE_SCISSOR_RIGHT = cs->SE_SCISSOR_LEFT; cs->SE_SCISSOR_LEFT = tmp; } if(cs->SE_SCISSOR_TOP > cs->SE_SCISSOR_BOTTOM) { uint32_t tmp = cs->SE_SCISSOR_BOTTOM; cs->SE_SCISSOR_BOTTOM = cs->SE_SCISSOR_TOP; cs->SE_SCISSOR_TOP = tmp; } cs->PE_DEPTH_NEAR = etna_f32_to_u32(0.0); /* not affected if depth mode is Z (as in GL) */ cs->PE_DEPTH_FAR = etna_f32_to_u32(1.0); priv->dirty_bits |= ETNA_STATE_VIEWPORT; } static void etna_pipe_set_vertex_buffers( struct pipe_context *pipe, unsigned start_slot, unsigned num_buffers, const struct pipe_vertex_buffer *vb) { struct etna_pipe_context *priv = etna_pipe_context(pipe); assert((start_slot + num_buffers) <= PIPE_MAX_ATTRIBS); struct pipe_vertex_buffer zero_vb = {}; for(unsigned idx=0; idx...[slot] */ const struct pipe_vertex_buffer *vbi = vb ? &vb[idx] : &zero_vb; struct compiled_set_vertex_buffer *cs = &priv->vertex_buffer[slot]; assert(!vbi->user_buffer); /* XXX support user_buffer using etna_usermem_map */ /* copy pipe_vertex_buffer structure and take reference */ priv->vertex_buffer_s[slot].stride = vbi->stride; priv->vertex_buffer_s[slot].buffer_offset = vbi->buffer_offset; pipe_resource_reference(&priv->vertex_buffer_s[slot].buffer, vbi->buffer); priv->vertex_buffer_s[slot].user_buffer = vbi->user_buffer; /* determine addresses */ viv_addr_t gpu_addr = 0; if(vbi->buffer) /* GPU buffer */ { struct etna_bo *bo = etna_resource(vbi->buffer)->bo; gpu_addr = etna_bo_gpu_address(bo) + vbi->buffer_offset; } /* compiled state */ cs->FE_VERTEX_STREAM_CONTROL = FE_VERTEX_STREAM_CONTROL_VERTEX_STRIDE(vbi->stride); cs->FE_VERTEX_STREAM_BASE_ADDR = gpu_addr; etna_resource_touch(pipe, vbi->buffer); } priv->dirty_bits |= ETNA_STATE_VERTEX_BUFFERS; } static void etna_pipe_set_index_buffer( struct pipe_context *pipe, const struct pipe_index_buffer *ib) { struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_set_index_buffer *cs = &priv->index_buffer; if(ib == NULL) { pipe_resource_reference(&priv->index_buffer_s.buffer, NULL); /* update reference to buffer */ cs->FE_INDEX_STREAM_CONTROL = 0; cs->FE_INDEX_STREAM_BASE_ADDR = 0; } else { assert(ib->buffer); /* XXX user_buffer using etna_usermem_map */ pipe_resource_reference(&priv->index_buffer_s.buffer, ib->buffer); /* update reference to buffer */ priv->index_buffer_s.index_size = ib->index_size; priv->index_buffer_s.offset = ib->offset; priv->index_buffer_s.user_buffer = ib->user_buffer; struct etna_bo *bo = etna_resource(ib->buffer)->bo; cs->FE_INDEX_STREAM_CONTROL = translate_index_size(ib->index_size); cs->FE_INDEX_STREAM_BASE_ADDR = etna_bo_gpu_address(bo) + ib->offset; etna_resource_touch(pipe, ib->buffer); } priv->dirty_bits |= ETNA_STATE_INDEX_BUFFER; } static void etna_pipe_flush(struct pipe_context *pipe, struct pipe_fence_handle **fence_out, enum pipe_flush_flags flags) { struct etna_pipe_context *priv = etna_pipe_context(pipe); uint32_t _fence_tmp; /* just pass through fence, though we have to convert the type... */ uint32_t *fence_in = (fence_out == NULL) ? NULL : (&_fence_tmp); if(etna_flush(priv->ctx, fence_in) != ETNA_OK) { BUG("Error: etna_flush failed, GPU may be in unpredictable state"); } if(fence_out) *fence_out = ETNA_FENCE_TO_PIPE_HANDLE(*fence_in); if(DBG_ENABLED(ETNA_DBG_FINISH_ALL)) { if(etna_finish(priv->ctx) != ETNA_OK) { BUG("Error: etna_finish failed, GPU may be in unpredictable state"); abort(); } } } static void etna_pipe_set_clip_state(struct pipe_context *pipe, const struct pipe_clip_state *pcs) { /* NOOP */ } static void etna_pipe_set_polygon_stipple(struct pipe_context *pctx, const struct pipe_poly_stipple *stipple) { /* NOP */ } struct pipe_context *etna_new_pipe_context(struct viv_conn *dev, const struct etna_pipe_specs *specs, struct pipe_screen *screen, void *priv) { struct etna_pipe_context *ectx = CALLOC_STRUCT(etna_pipe_context); if(ectx == NULL) return NULL; struct pipe_context *pc = &ectx->base; pc->priv = priv; pc->screen = screen; if(etna_create(dev, &ectx->ctx) < 0) { FREE(pc); return NULL; } etna_set_context_cb(ectx->ctx, update_context, ectx); /* context ctxate setup */ ectx->dirty_bits = 0xffffffff; ectx->conn = dev; ectx->specs = *specs; /* Set sensible defaults for state */ ectx->gpu3d.PA_W_CLIP_LIMIT = 0x34000001; ectx->gpu3d.GL_VERTEX_ELEMENT_CONFIG = 0x1; ectx->gpu3d.GL_API_MODE = VIVS_GL_API_MODE_OPENGL; ectx->gpu3d.RA_EARLY_DEPTH = 0x00000031; /* enable */ /* fill in vtable entries one by one */ pc->destroy = etna_pipe_destroy; pc->draw_vbo = etna_pipe_draw_vbo; /* XXX render_condition */ /* XXX create_query */ /* XXX destroy_query */ /* XXX begin_query */ /* XXX end_query */ /* XXX get_query_result */ pc->create_vertex_elements_state = etna_pipe_create_vertex_elements_state; pc->bind_vertex_elements_state = etna_pipe_bind_vertex_elements_state; pc->delete_vertex_elements_state = etna_pipe_delete_vertex_elements_state; pc->set_blend_color = etna_pipe_set_blend_color; pc->set_stencil_ref = etna_pipe_set_stencil_ref; pc->set_sample_mask = etna_pipe_set_sample_mask; pc->set_clip_state = etna_pipe_set_clip_state; pc->set_framebuffer_state = etna_pipe_set_framebuffer_state; pc->set_polygon_stipple = etna_pipe_set_polygon_stipple; pc->set_scissor_states = etna_pipe_set_scissor_states; pc->set_viewport_states = etna_pipe_set_viewport_states; pc->set_vertex_buffers = etna_pipe_set_vertex_buffers; pc->set_index_buffer = etna_pipe_set_index_buffer; /* XXX create_stream_output_target */ /* XXX stream_output_target_destroy */ /* XXX set_stream_output_targets */ pc->flush = etna_pipe_flush; /* XXX create_video_decoder */ /* XXX create_video_buffer */ /* XXX create_compute_state */ /* XXX bind_compute_state */ /* XXX delete_compute_state */ /* XXX set_compute_resources */ /* XXX set_global_binding */ /* XXX launch_grid */ etna_pipe_blend_init(pc); etna_pipe_rasterizer_init(pc); etna_pipe_shader_init(pc); etna_pipe_surface_init(pc); etna_pipe_texture_init(pc); etna_pipe_transfer_init(pc); etna_pipe_zsa_init(pc); etna_pipe_clear_blit_init(pc); /* Reset GPU to initial state */ reset_context(pc); return pc; }