diff options
author | Wladimir J. van der Laan <laanwj@gmail.com> | 2013-10-08 12:30:38 +0200 |
---|---|---|
committer | Wladimir J. van der Laan <laanwj@gmail.com> | 2013-10-08 12:30:38 +0200 |
commit | 349ecb6b9d0acd759308f369400c5976d33705c7 (patch) | |
tree | fa4033a8d6d73a5870d0d56348c241d54df93269 | |
parent | 7b506a7c0835437ef39c08db3fc57a68c2a1cc97 (diff) |
driver: comments updates and small cleanups
-rw-r--r-- | README.md | 31 | ||||
-rw-r--r-- | src/driver/etna_asm.c | 9 | ||||
-rw-r--r-- | src/driver/etna_asm.h | 28 | ||||
-rw-r--r-- | src/driver/etna_blend.c | 25 | ||||
-rw-r--r-- | src/driver/etna_clear_blit.c | 19 | ||||
-rw-r--r-- | src/driver/etna_compiler.c | 67 |
6 files changed, 96 insertions, 83 deletions
@@ -72,9 +72,9 @@ to find which of the above sets of headers is most similar, and use or adapt tha General -------- If the goal is to build Mesa and you are not planning to do reverse engineering, only `libetnaviv.a` needs to be built. -In this case it is sufficient to run make in `native/etnaviv`. +In this case it is sufficient to run make in `src/etnaviv`. -Otherwise, run `make` and `make rev` in `native/` (see the README.md in `native` for a description of all the directories contained within). +Otherwise, run `make` and `make rev` in `src/` (see the README.md in `src` for a description of all the directories contained within). `gc_abi.h` ----------- @@ -135,7 +135,7 @@ environment variables, for example like this: export GCABI="arnova" To build the egl samples (for command stream interception), you need to copy `libEGL_VIVANTE.so` `libGLESv2_VIVANTE.so` from -the device `/system/lib/egl` to `native/lib/egl`. This is not needed if you just want to build the `replay`, `etna` or `fb` +the device `/system/lib/egl` to `src/lib/egl`. This is not needed if you just want to build the `replay`, `etna` or `fb` tests, which do not rely in any way on the userspace blob. Contents @@ -147,15 +147,13 @@ program Vivante GCxxx GPU cores. Framebuffer tests ------------------ - - + +   -To exercise the gallium driver there are a few framebuffer tests in: - - native/fb/ +To exercise the gallium driver there are a few framebuffer tests in `src/fb`. These demos do double-buffered animated rendering of 1000 frames to the framebuffer using the proof-of-concept `etna` rendering and command stream building API. The goal of this API is to provide a Gallium-like @@ -213,7 +211,7 @@ from the madness of kernel-specific headers and defines. - register description headers - converting surfaces and textures from and to Vivante specific tiling formats -Currently used only by the 3D driver in `native/driver`. A future 2D, SVG or OpenCL driver can share this code. +Currently used only by the 3D driver in `src/driver`. A future 2D, SVG or OpenCL driver can share this code. Debugging support ------------------ @@ -348,24 +346,11 @@ Replay tests The replay tests replay the command stream and ioctl commands of the EGL demos, to get the same output. -They can be found in: - - native/replay/ +They can be found in `src/replay`. Currently this is available for the `cube` example that renders a smoothed cube, and the `cube_companion` example that renders a textured cube. -Command stream builder ------------------------ - -A beginning has been made of a simple driver that builds the command stream from scratch and submits -it to the kernel driver: - - native/lib/viv.(c|h) - native/replay/etna.(c|h) - native/replay/etna_test.c (to experiment with shaders) - native/replay/cube_etna.c (renders the GLES2 smoothed cube) - Vivante GPL kernel drivers --------------------------- diff --git a/src/driver/etna_asm.c b/src/driver/etna_asm.c index c4a4802..1a760ee 100644 --- a/src/driver/etna_asm.c +++ b/src/driver/etna_asm.c @@ -23,15 +23,6 @@ #include "etna_asm.h" #include "etna_debug.h" -#include <etnaviv/isa.xml.h> - -/* Return whether the rgroup is one of the uniforms */ -int etna_rgroup_is_uniform(unsigned rgroup) -{ - return rgroup == INST_RGROUP_UNIFORM_0 || - rgroup == INST_RGROUP_UNIFORM_1; -} - /** An instruction can only read from one distinct uniform. * This function verifies this property and returns true if the instruction * is deemed correct and false otherwise. diff --git a/src/driver/etna_asm.h b/src/driver/etna_asm.h index ad2e075..8ace6ca 100644 --- a/src/driver/etna_asm.h +++ b/src/driver/etna_asm.h @@ -25,11 +25,23 @@ #define H_ETNA_ASM #include <stdint.h> +#include <etnaviv/isa.xml.h> + /* Size of an instruction in 32-bit words */ #define ETNA_INST_SIZE (4) /* Number of source operands per instruction */ #define ETNA_NUM_SRC (3) +/* Broadcast swizzle to all four components */ +#define INST_SWIZ_BROADCAST(x) \ + (INST_SWIZ_X(x) | INST_SWIZ_Y(x) | INST_SWIZ_Z(x) | INST_SWIZ_W(x)) +/* Identity (NOP) swizzle */ +#define INST_SWIZ_IDENTITY \ + (INST_SWIZ_X(0) | INST_SWIZ_Y(1) | INST_SWIZ_Z(2) | INST_SWIZ_W(3)) +/* Fully specified swizzle */ +#define INST_SWIZ(x,y,z,w) \ + (INST_SWIZ_X(x) | INST_SWIZ_Y(y) | INST_SWIZ_Z(z) | INST_SWIZ_W(w)) + /*** operands ***/ /* destination operand */ @@ -73,6 +85,22 @@ struct etna_inst unsigned imm; /* takes place of src[2] for BRANCH/CALL */ }; +/* Compose two swizzles (computes swz1.swz2) */ +static inline uint32_t inst_swiz_compose(uint32_t swz1, uint32_t swz2) +{ + return INST_SWIZ_X((swz1 >> (((swz2 >> 0)&3)*2))&3) | + INST_SWIZ_Y((swz1 >> (((swz2 >> 2)&3)*2))&3) | + INST_SWIZ_Z((swz1 >> (((swz2 >> 4)&3)*2))&3) | + INST_SWIZ_W((swz1 >> (((swz2 >> 6)&3)*2))&3); +} + +/* Return whether the rgroup is one of the uniforms */ +static inline int etna_rgroup_is_uniform(unsigned rgroup) +{ + return rgroup == INST_RGROUP_UNIFORM_0 || + rgroup == INST_RGROUP_UNIFORM_1; +} + /** * Build vivante instruction from structure with * opcode, cond, sat, dst_use, dst_amode, diff --git a/src/driver/etna_blend.c b/src/driver/etna_blend.c index ebc69bb..9536e49 100644 --- a/src/driver/etna_blend.c +++ b/src/driver/etna_blend.c @@ -40,11 +40,27 @@ static void *etna_pipe_create_blend_state(struct pipe_context *pipe, //struct etna_pipe_context *priv = etna_pipe_context(pipe); struct compiled_blend_state *cs = CALLOC_STRUCT(compiled_blend_state); const struct pipe_rt_blend_state *rt0 = &bs->rt[0]; - bool enable = rt0->blend_enable && !(rt0->rgb_src_factor == PIPE_BLENDFACTOR_ONE && rt0->rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && - rt0->alpha_src_factor == PIPE_BLENDFACTOR_ONE && rt0->alpha_dst_factor == PIPE_BLENDFACTOR_ZERO); + /* Enable blending if + * - blend enabled in blend state + * - NOT source factor is ONE and destination factor ZERO for both rgb and + * alpha (which would mean that blending is effectively disabled) + */ + bool enable = rt0->blend_enable && + !(rt0->rgb_src_factor == PIPE_BLENDFACTOR_ONE && rt0->rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && + rt0->alpha_src_factor == PIPE_BLENDFACTOR_ONE && rt0->alpha_dst_factor == PIPE_BLENDFACTOR_ZERO); + /* Enable separate alpha if + * - Blending enabled (see above) + * - NOT source factor is equal to destination factor for both rgb abd + * alpha (which would effectively that mean alpha is not separate) + */ bool separate_alpha = enable && !(rt0->rgb_src_factor == rt0->alpha_src_factor && rt0->rgb_dst_factor == rt0->alpha_dst_factor); + /* If the complete render target is written, set full_overwrite: + * - The color mask is 1111 + * - No blending is used + */ bool full_overwrite = (rt0->colormask == 15) && !enable; + if(enable) { cs->PE_ALPHA_CONFIG = @@ -59,7 +75,6 @@ static void *etna_pipe_create_blend_state(struct pipe_context *pipe, } else { cs->PE_ALPHA_CONFIG = 0; } - /* XXX should colormask be used if enable==false? */ cs->PE_COLOR_FORMAT = VIVS_PE_COLOR_FORMAT_COMPONENTS(rt0->colormask) | (full_overwrite ? VIVS_PE_COLOR_FORMAT_OVERWRITE : 0); @@ -68,7 +83,9 @@ static void *etna_pipe_create_blend_state(struct pipe_context *pipe, 0x000E4000 /* ??? */; /* independent_blend_enable not needed: only one rt supported */ /* XXX alpha_to_coverage / alpha_to_one? */ - /* XXX dither? VIVS_PE_DITHER(...) and/or VIVS_RS_DITHER(...) on resolve */ + /* Set dither registers based on dither status. These registers set the dither pattern, + * for now, set the same values as the blob. + */ if(bs->dither) { cs->PE_DITHER[0] = 0x6e4ca280; diff --git a/src/driver/etna_clear_blit.c b/src/driver/etna_clear_blit.c index 95579f0..fe356ba 100644 --- a/src/driver/etna_clear_blit.c +++ b/src/driver/etna_clear_blit.c @@ -61,7 +61,7 @@ static void etna_pipe_blit_save_state(struct pipe_context *pipe) priv->num_fragment_sampler_views, priv->sampler_view_s); } -/* Generate clear command for a surface (non-TS case) */ +/* Generate clear command for a surface (non-fast clear case) */ void etna_rs_gen_clear_surface(struct compiled_rs_state *rs_state, struct etna_surface *surf, uint32_t clear_value) { uint bs = util_format_get_blocksize(surf->base.format); @@ -105,8 +105,8 @@ static void etna_pipe_clear(struct pipe_context *pipe, */ etna_set_state(priv->ctx, VIVS_GL_FLUSH_CACHE, VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH); etna_stall(priv->ctx, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE); - /* Preparation: Flush the TS. This must be done after flushing color and depth, otherwise it can - * result in crashes */ + /* Preparation: Flush the TS if needed. This must be done after flushing + * color and depth, otherwise it can result in crashes */ bool need_ts_flush = false; if((buffers & PIPE_CLEAR_COLOR) && priv->framebuffer_s.nr_cbufs) { @@ -124,11 +124,8 @@ static void etna_pipe_clear(struct pipe_context *pipe, { etna_set_state(priv->ctx, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH); } - /* No need to set up the TS here with sync_context. - * RS clear operations (in contrast to resolve and copy) do not require the TS state. - */ - /* Need to update clear command in non-TS (fast clear) case *if* - * clear value is different from previous time. + /* No need to set up the TS here as RS clear operations (in contrast to + * resolve and copy) do not require the TS state. */ if(buffers & PIPE_CLEAR_COLOR) { @@ -150,6 +147,7 @@ static void etna_pipe_clear(struct pipe_context *pipe, } else if(unlikely(new_clear_value != surf->level->clear_value)) /* Queue normal RS clear for non-TS surfaces */ { + /* If clear color changed, re-generate stored command */ etna_rs_gen_clear_surface(&surf->clear_command, surf, new_clear_value); } etna_submit_rs_state(priv->ctx, &surf->clear_command); @@ -162,7 +160,7 @@ static void etna_pipe_clear(struct pipe_context *pipe, uint32_t new_clear_value = translate_clear_depth_stencil(surf->base.format, depth, stencil); if(surf->surf.ts_address) /* TS: use precompiled clear command */ { - /* Set new clear color */ + /* Set new clear depth value */ priv->framebuffer.TS_DEPTH_CLEAR_VALUE = new_clear_value; if(!DBG_ENABLED(ETNA_DBG_NO_AUTODISABLE)) { @@ -173,6 +171,7 @@ static void etna_pipe_clear(struct pipe_context *pipe, priv->dirty_bits |= ETNA_STATE_TS; } else if(unlikely(new_clear_value != surf->level->clear_value)) /* Queue normal RS clear for non-TS surfaces */ { + /* If clear depth value changed, re-generate stored command */ etna_rs_gen_clear_surface(&surf->clear_command, surf, new_clear_value); } etna_submit_rs_state(priv->ctx, &surf->clear_command); @@ -237,7 +236,7 @@ static void etna_pipe_resource_copy_region(struct pipe_context *pipe, static void etna_pipe_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info) { /* This is a more extended version of resource_copy_region */ - /* TODO Some cases can be handled by RS; if not, fall back to rendering */ + /* TODO Some cases can be handled by RS; if not, fall back to rendering or even CPU */ /* copy block of pixels from info->src to info->dst (resource, level, box, format); * function is used for scaling, flipping in x and y direction (negative width/height), format conversion, mask and filter * and even a scissor rectangle diff --git a/src/driver/etna_compiler.c b/src/driver/etna_compiler.c index d27ab54..fe4fc91 100644 --- a/src/driver/etna_compiler.c +++ b/src/driver/etna_compiler.c @@ -41,7 +41,7 @@ * TODO * * Allow loops * * Use an instruction scheduler - * * Avoid using more than one uniform in one instruction (can be used in multiple arguments) + * * Indirect access to uniforms / temporaries using amode */ #include "etna_compiler.h" #include "etna_asm.h" @@ -66,22 +66,6 @@ #include <sys/stat.h> #include <fcntl.h> -/* Broadcast swizzle to all four components */ -#define INST_SWIZ_BROADCAST(x) \ - (INST_SWIZ_X(x) | INST_SWIZ_Y(x) | INST_SWIZ_Z(x) | INST_SWIZ_W(x)) -/* Identity (NOP) swizzle */ -#define INST_SWIZ_IDENTITY \ - (INST_SWIZ_X(0) | INST_SWIZ_Y(1) | INST_SWIZ_Z(2) | INST_SWIZ_W(3)) - -/* Compose two swizzles (computes swz1.swz2) */ -static inline uint32_t inst_swiz_compose(uint32_t swz1, uint32_t swz2) -{ - return INST_SWIZ_X((swz1 >> (((swz2 >> 0)&3)*2))&3) | - INST_SWIZ_Y((swz1 >> (((swz2 >> 2)&3)*2))&3) | - INST_SWIZ_Z((swz1 >> (((swz2 >> 4)&3)*2))&3) | - INST_SWIZ_W((swz1 >> (((swz2 >> 6)&3)*2))&3); -} - /* Native register description structure */ struct etna_native_reg { @@ -91,6 +75,7 @@ struct etna_native_reg unsigned id:9; }; +/* Register description */ struct etna_reg_desc { enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ @@ -141,7 +126,7 @@ struct etna_compile_data uint total_decls; /* Bitmap of dead instructions which are removed in a separate pass */ - bool dead_inst[ETNA_MAX_TOKENS]; /* mark dead input instructions */ + bool dead_inst[ETNA_MAX_TOKENS]; /* Immediate data */ uint32_t imm_data[ETNA_MAX_IMM]; @@ -166,7 +151,7 @@ struct etna_compile_data /* Code generation */ int inst_ptr; /* current instruction pointer */ - uint32_t code[ETNA_MAX_INSTRUCTIONS*4]; + uint32_t code[ETNA_MAX_INSTRUCTIONS*ETNA_INST_SIZE]; /* I/O */ @@ -354,6 +339,7 @@ static void etna_compile_parse_declarations(struct etna_compile_data *cd, const { case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; + /* Extend size of register file to encompass entire declaration */ cd->file_size[decl->Declaration.File] = MAX2(cd->file_size[decl->Declaration.File], decl->Range.Last+1); } break; case TGSI_TOKEN_TYPE_IMMEDIATE: { /* immediates are handled differently from other files; they are not declared @@ -371,7 +357,8 @@ static void etna_compile_parse_declarations(struct etna_compile_data *cd, const tgsi_parse_free(&ctx); } -static void etna_assign_decls(struct etna_compile_data *cd) +/* Allocate register declarations for the registers in all register files */ +static void etna_allocate_decls(struct etna_compile_data *cd) { uint idx=0; for(int x=0; x<TGSI_FILE_COUNT; ++x) @@ -387,7 +374,7 @@ static void etna_assign_decls(struct etna_compile_data *cd) cd->total_decls = idx; } -/* Pass -- check usage of temporaries, inputs, outputs */ +/* Pass -- check and record usage of temporaries, inputs, outputs */ static void etna_compile_pass_check_usage(struct etna_compile_data *cd, const struct tgsi_token *tokens) { struct tgsi_parse_context ctx = {}; @@ -410,9 +397,10 @@ static void etna_compile_pass_check_usage(struct etna_compile_data *cd, const st * used this allows finding ranges where the temporary can be borrowed * as input and/or output register * - * XXX in the case of loops this needs special care, as the last usage of a register - * inside a loop means it can still be used on next loop iteration (execution is no longer - * chronological). The register can only be declared "free" after the loop finishes. + * XXX in the case of loops this needs special care, or even be completely disabled, as + * the last usage of a register inside a loop means it can still be used on next loop + * iteration (execution is no longer * chronological). The register can only be + * declared "free" after the loop finishes. * * Same for inputs: the first usage of a register inside a loop doesn't mean that the register * won't have been overwritten in previous iteration. The register can only be declared free before the loop @@ -471,7 +459,7 @@ static void assign_special_inputs(struct etna_compile_data *cd) { if(cd->processor == TGSI_PROCESSOR_FRAGMENT) { - /* never assign t0; writing to it causes fragment to be discarded? */ + /* never assign t0 as it is the position output, start assigning at t1 */ cd->next_free_native = 1; /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ for(int idx=0; idx<cd->total_decls; ++idx) @@ -533,6 +521,7 @@ static void etna_compile_pass_optimize_outputs(struct etna_compile_data *cd, con /* assignment of temporary to output -- * and the output doesn't yet have a native register assigned * and the last use of the temporary is this instruction + * and the MOV does not do a swizzle */ if(inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && @@ -549,7 +538,9 @@ static void etna_compile_pass_optimize_outputs(struct etna_compile_data *cd, con /* direct assignment of input to output -- * and the input or output doesn't yet have a native register assigned * and the output is only used in this instruction, - * allocate a new register, and associate both input and output to it */ + * allocate a new register, and associate both input and output to it + * and the MOV does not do a swizzle + */ if(inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Src[0].Register.File == TGSI_FILE_INPUT && !cd->file[TGSI_FILE_INPUT][in_idx].native.valid && @@ -558,8 +549,9 @@ static void etna_compile_pass_optimize_outputs(struct etna_compile_data *cd, con cd->file[TGSI_FILE_OUTPUT][out_idx].first_use == inst_idx && etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register)) { - cd->file[TGSI_FILE_OUTPUT][out_idx].native = cd->file[TGSI_FILE_INPUT][in_idx].native = - alloc_new_native_reg(cd); + cd->file[TGSI_FILE_OUTPUT][out_idx].native = + cd->file[TGSI_FILE_INPUT][in_idx].native = + alloc_new_native_reg(cd); /* mark this MOV instruction as a no-op */ cd->dead_inst[inst_idx] = true; } @@ -573,7 +565,7 @@ static void etna_compile_pass_optimize_outputs(struct etna_compile_data *cd, con tgsi_parse_free(&ctx); } -/* Get temporary to be used within one TGSI instruction. +/* Get a temporary to be used within one TGSI instruction. * The first time that this function is called the temporary will be allocated. * Each call to this function will return the same temporary. */ @@ -587,7 +579,7 @@ static struct etna_native_reg etna_compile_get_inner_temp(struct etna_compile_da return cd->inner_temp; } -/* emit instruction and append to program */ +/* Emit instruction and append it to program */ static void emit_inst(struct etna_compile_data *cd, struct etna_inst *inst) { assert(cd->inst_ptr <= ETNA_MAX_INSTRUCTIONS); @@ -674,8 +666,7 @@ static struct etna_inst_src convert_src(struct etna_compile_data *cd, const stru struct etna_inst_src rv = { .use = 1, .swiz = inst_swiz_compose( - INST_SWIZ_X(in->Register.SwizzleX) | INST_SWIZ_Y(in->Register.SwizzleY) | - INST_SWIZ_Z(in->Register.SwizzleZ) | INST_SWIZ_W(in->Register.SwizzleW), + INST_SWIZ(in->Register.SwizzleX, in->Register.SwizzleY, in->Register.SwizzleZ, in->Register.SwizzleW), swizzle), .neg = in->Register.Negate, .abs = in->Register.Absolute, @@ -806,8 +797,7 @@ static void etna_compile_pass_generate_code(struct etna_compile_data *cd, const .opcode = INST_OPCODE_LITP, .sat = 0, .dst = convert_dst(cd, &inst->Dst[0]), - .src[0] = convert_src(cd, &inst->Src[0], - (INST_SWIZ_X(0) | INST_SWIZ_Y(0) | INST_SWIZ_Z(1) | INST_SWIZ_W(1))), /* src.xxyy */ + .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ(0,0,1,1)), /* src.xxyy */ .src[1] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(0)), /* src.xxxx */ .src[2].use = 1, .src[2].swiz = INST_SWIZ_BROADCAST(0), /* tmp.xxxx */ @@ -1265,7 +1255,7 @@ static void etna_compile_add_z_div_if_needed(struct etna_compile_data *cd) /** add a NOP to the shader if * a) the shader is empty * or - * b) there is a label at the end if the shader + * b) there is a label at the end of the shader */ static void etna_compile_add_nop_if_needed(struct etna_compile_data *cd) { @@ -1325,7 +1315,10 @@ static void assign_texture_units(struct etna_compile_data *cd) } } -/* additional pass to fill in branch targets */ +/* Additional pass to fill in branch targets. This pass should be last + * as no instruction reordering or removing/addition can be done anymore + * once the branch targets are computed. + */ static void etna_compile_fill_in_labels(struct etna_compile_data *cd) { for(int idx=0; idx<cd->inst_ptr ; ++idx) @@ -1572,7 +1565,7 @@ int etna_compile_shader_object(const struct etna_pipe_specs *specs, const struct /* Pass one -- check register file declarations and immediates */ etna_compile_parse_declarations(cd, tokens); - etna_assign_decls(cd); + etna_allocate_decls(cd); /* Pass two -- check usage of temporaries, inputs, outputs */ etna_compile_pass_check_usage(cd, tokens); |