/* * Copyright (c) 2012-2013 Etnaviv Project * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sub license, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /* TGSI->Vivante shader ISA conversion */ /* What does the compiler return (see etna_shader_object)? * 1) instruction data * 2) input-to-temporary mapping (fixed for ps) * *) in case of ps, semantic -> varying id mapping * *) for each varying: number of components used (r, rg, rgb, rgba) * 3) temporary-to-output mapping (in case of vs, fixed for ps) * 4) for each input/output: possible semantic (position, color, glpointcoord, ...) * 5) immediates base offset, immediates data * 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to configure the hw, but useful * for error checking * 7) enough information to add the z=(z+w)/2.0 necessary for older chips (output reg id is enough) * * Empty shaders are not allowed, should always at least generate a NOP. Also if there is a label * at the end of the shader, an extra NOP should be generated as jump target. * * TODO * * Allow loops * * Use an instruction scheduler * * Indirect access to uniforms / temporaries using amode */ #include "etna_compiler.h" #include "etna_asm.h" #include "etna_internal.h" #include "etna_debug.h" #include "tgsi/tgsi_iterate.h" #include "tgsi/tgsi_strings.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_info.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" #include "util/u_math.h" #include #include #include #include #include #include #include #include /* Native register description structure */ struct etna_native_reg { unsigned valid:1; unsigned is_tex:1; /* is texture unit, overrides rgroup */ unsigned rgroup:3; unsigned id:9; }; /* Register description */ struct etna_reg_desc { enum tgsi_file_type file; /* IN, OUT, TEMP, ... */ int idx; /* index into file */ bool active; /* used in program */ int first_use; /* instruction id of first use (scope begin) */ int last_use; /* instruction id of last use (scope end, inclusive) */ struct etna_native_reg native; /* native register to map to */ unsigned usage_mask:4; /* usage, per channel */ bool has_semantic; /* register has associated TGSI semantic */ struct tgsi_declaration_semantic semantic; /* TGSI semantic */ struct tgsi_declaration_interp interp; /* Interpolation type */ }; /* Label information structure */ struct etna_compile_label { int inst_idx; /* Instruction id that label points to */ }; enum etna_compile_frame_type { ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */ }; /* nesting scope frame (LOOP, IF, ...) during compilation */ struct etna_compile_frame { enum etna_compile_frame_type type; struct etna_compile_label *lbl_else; struct etna_compile_label *lbl_endif; }; /* scratch area for compiling shader, freed after compilation finishes */ struct etna_compile_data { uint processor; /* TGSI_PROCESSOR_... */ /* Register descriptions, per TGSI file, per register index */ struct etna_reg_desc *file[TGSI_FILE_COUNT]; /* Number of registers in each TGSI file (max register+1) */ uint file_size[TGSI_FILE_COUNT]; /* Keep track of TGSI register declarations */ struct etna_reg_desc decl[ETNA_MAX_DECL]; uint total_decls; /* Bitmap of dead instructions which are removed in a separate pass */ bool dead_inst[ETNA_MAX_TOKENS]; /* Immediate data */ uint32_t imm_data[ETNA_MAX_IMM]; uint32_t imm_base; /* base of immediates (in 32 bit units) */ uint32_t imm_size; /* size of immediates (in 32 bit units) */ /* Next free native register, for register allocation */ uint32_t next_free_native; /* Temporary register for use within translated TGSI instruction, * only allocated when needed. */ int inner_temps; /* number of inner temps used; only up to one available at this point */ struct etna_native_reg inner_temp; /* Fields for handling nested conditionals */ struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH]; int frame_sp; struct etna_compile_label *lbl_usage[ETNA_MAX_INSTRUCTIONS]; /* label usage reference, per instruction */ struct etna_compile_label labels[ETNA_MAX_LABELS]; /* XXX use subheap allocation */ int num_labels; /* Code generation */ int inst_ptr; /* current instruction pointer */ uint32_t code[ETNA_MAX_INSTRUCTIONS*ETNA_INST_SIZE]; /* I/O */ /* Number of varyings (PS only) */ int num_varyings; /* GPU hardware specs */ const struct etna_pipe_specs *specs; }; /** Register allocation **/ enum reg_sort_order { FIRST_USE_ASC, FIRST_USE_DESC, LAST_USE_ASC, LAST_USE_DESC }; /* Augmented register description for sorting */ struct sort_rec { struct etna_reg_desc *ptr; int key; }; static int sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b) { if(a->key < b->key) return -1; if(a->key > b->key) return 1; return 0; } /* create an index on a register set based on certain criteria. */ static int sort_registers( struct sort_rec *sorted, struct etna_reg_desc *regs, int count, enum reg_sort_order so) { /* pre-populate keys from active registers */ int ptr = 0; for(int idx=0; idxnext_free_native < ETNA_MAX_TEMPS); int rv = cd->next_free_native; cd->next_free_native++; return (struct etna_native_reg){ .valid=1, .rgroup=INST_RGROUP_TEMP, .id=rv }; } /* assign TEMPs to native registers */ static void assign_temporaries_to_native(struct etna_compile_data *cd, struct etna_reg_desc *temps, int num_temps) { for(int idx=0; idxfile[file], cd->file_size[file], mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC); num_temps = sort_registers(temps_order, cd->file[TGSI_FILE_TEMPORARY], cd->file_size[TGSI_FILE_TEMPORARY], mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC); while(inout_ptr < num_inouts && temp_ptr < num_temps) { struct etna_reg_desc *inout = inout_order[inout_ptr].ptr; struct etna_reg_desc *temp = temps_order[temp_ptr].ptr; if(!inout->active || inout->native.valid) /* Skip if already a native register assigned */ { inout_ptr++; continue; } /* last usage of this input is before or in same instruction of first use of temporary? */ if(mode_inputs ? (inout->last_use <= temp->first_use) : (inout->first_use >= temp->last_use)) { /* assign it and advance to next input */ inout->native = temp->native; inout_ptr++; } temp_ptr++; } /* if we couldn't reuse current ones, allocate new temporaries */ for(inout_ptr=0; inout_ptractive && !inout->native.valid) { inout->native = alloc_new_native_reg(cd); } } } /* Allocate an immediate with a certain value and return the index. If * there is already an immediate with that value, return that. */ static struct etna_inst_src alloc_imm_u32(struct etna_compile_data *cd, uint32_t value) { int idx; /* Could use a hash table to speed this up */ for(idx = 0; idximm_size; ++idx) { if(cd->imm_data[idx] == value) break; } if(idx == cd->imm_size) /* allocate new immediate */ { assert(cd->imm_size < ETNA_MAX_IMM); idx = cd->imm_size++; cd->imm_data[idx] = value; } /* swizzle so that component with value is returned in all components */ idx += cd->imm_base; struct etna_inst_src imm_src = { .use = 1, .rgroup = INST_RGROUP_UNIFORM_0, .reg = idx/4, .swiz = INST_SWIZ_BROADCAST(idx & 3) }; return imm_src; } /* Allocate immediate with a certain float value. If there is already an * immediate with that value, return that. */ static struct etna_inst_src alloc_imm_f32(struct etna_compile_data *cd, float value) { return alloc_imm_u32(cd, etna_f32_to_u32(value)); } /* Pass -- check register file declarations and immediates */ static void etna_compile_parse_declarations(struct etna_compile_data *cd, const struct tgsi_token *tokens) { struct tgsi_parse_context ctx = {}; unsigned status = TGSI_PARSE_OK; status = tgsi_parse_init(&ctx, tokens); assert(status == TGSI_PARSE_OK); cd->processor = ctx.FullHeader.Processor.Processor; while(!tgsi_parse_end_of_tokens(&ctx)) { tgsi_parse_token(&ctx); switch(ctx.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: { const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; /* Extend size of register file to encompass entire declaration */ cd->file_size[decl->Declaration.File] = MAX2(cd->file_size[decl->Declaration.File], decl->Range.Last+1); } break; case TGSI_TOKEN_TYPE_IMMEDIATE: { /* immediates are handled differently from other files; they are not declared explicitly, and always add four components */ const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate; assert(cd->imm_size <= (ETNA_MAX_IMM-4)); for(int i=0; i<4; ++i) { cd->imm_data[cd->imm_size++] = imm->u[i].Uint; } cd->file_size[TGSI_FILE_IMMEDIATE] = cd->imm_size / 4; } break; } } tgsi_parse_free(&ctx); } /* Allocate register declarations for the registers in all register files */ static void etna_allocate_decls(struct etna_compile_data *cd) { uint idx=0; for(int x=0; xfile[x] = &cd->decl[idx]; for(int sub=0; subfile_size[x]; ++sub) { cd->decl[idx].file = x; cd->decl[idx].idx = sub; idx++; } } cd->total_decls = idx; } /* Pass -- check and record usage of temporaries, inputs, outputs */ static void etna_compile_pass_check_usage(struct etna_compile_data *cd, const struct tgsi_token *tokens) { struct tgsi_parse_context ctx = {}; unsigned status = TGSI_PARSE_OK; status = tgsi_parse_init(&ctx, tokens); assert(status == TGSI_PARSE_OK); for(int idx=0; idxtotal_decls; ++idx) { cd->decl[idx].active = false; cd->decl[idx].first_use = cd->decl[idx].last_use = -1; } int inst_idx = 0; while(!tgsi_parse_end_of_tokens(&ctx)) { tgsi_parse_token(&ctx); /* find out max register #s used * For every register mark first and last instruction index where it's * used this allows finding ranges where the temporary can be borrowed * as input and/or output register * * XXX in the case of loops this needs special care, or even be completely disabled, as * the last usage of a register inside a loop means it can still be used on next loop * iteration (execution is no longer * chronological). The register can only be * declared "free" after the loop finishes. * * Same for inputs: the first usage of a register inside a loop doesn't mean that the register * won't have been overwritten in previous iteration. The register can only be declared free before the loop * starts. * The proper way would be to do full dominator / post-dominator analysis (especially with more complicated * control flow such as direct branch instructions) but not for now... */ switch(ctx.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: { /* Declaration: fill in file details */ const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration; for(int idx=decl->Range.First; idx<=decl->Range.Last; ++idx) { cd->file[decl->Declaration.File][idx].usage_mask = 0; // we'll compute this ourselves cd->file[decl->Declaration.File][idx].has_semantic = decl->Declaration.Semantic; cd->file[decl->Declaration.File][idx].semantic = decl->Semantic; cd->file[decl->Declaration.File][idx].interp = decl->Interp; } } break; case TGSI_TOKEN_TYPE_INSTRUCTION: { /* Instruction: iterate over operands of instruction */ const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; /* iterate over destination registers */ for(int idx=0; idxInstruction.NumDstRegs; ++idx) { struct etna_reg_desc *reg_desc = &cd->file[inst->Dst[idx].Register.File][inst->Dst[idx].Register.Index]; if(reg_desc->first_use == -1) reg_desc->first_use = inst_idx; reg_desc->last_use = inst_idx; reg_desc->active = true; } /* iterate over source registers */ for(int idx=0; idxInstruction.NumSrcRegs; ++idx) { struct etna_reg_desc *reg_desc = &cd->file[inst->Src[idx].Register.File][inst->Src[idx].Register.Index]; if(reg_desc->first_use == -1) reg_desc->first_use = inst_idx; reg_desc->last_use = inst_idx; reg_desc->active = true; /* accumulate usage mask for register, this is used to determine how many slots for varyings * should be allocated */ reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx); } inst_idx += 1; } break; default: break; } } tgsi_parse_free(&ctx); } /* assign inputs that need to be assigned to specific registers */ static void assign_special_inputs(struct etna_compile_data *cd) { if(cd->processor == TGSI_PROCESSOR_FRAGMENT) { /* never assign t0 as it is the position output, start assigning at t1 */ cd->next_free_native = 1; /* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */ for(int idx=0; idxtotal_decls; ++idx) { struct etna_reg_desc *reg = &cd->decl[idx]; if(reg->active && reg->semantic.Name == TGSI_SEMANTIC_POSITION) { reg->native.valid = 1; reg->native.rgroup = INST_RGROUP_TEMP; reg->native.id = 0; } } } } /* Check that a move instruction does not swizzle any of the components * that it writes. */ static bool etna_mov_check_no_swizzle(const struct tgsi_dst_register dst, const struct tgsi_src_register src) { return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) && (!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) && (!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) && (!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W); } /* Pass -- optimize outputs * Mesa tends to generate code like this at the end if their shaders * MOV OUT[1], TEMP[2] * MOV OUT[0], TEMP[0] * MOV OUT[2], TEMP[1] * Recognize if * a) there is only a single assignment to an output register and * b) the temporary is not used after that * Also recognize direct assignment of IN to OUT (passthrough) **/ static void etna_compile_pass_optimize_outputs(struct etna_compile_data *cd, const struct tgsi_token *tokens) { struct tgsi_parse_context ctx = {}; unsigned status = TGSI_PARSE_OK; status = tgsi_parse_init(&ctx, tokens); assert(status == TGSI_PARSE_OK); int inst_idx = 0; while(!tgsi_parse_end_of_tokens(&ctx)) { tgsi_parse_token(&ctx); switch(ctx.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: { const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction; /* iterate over operands */ switch(inst->Instruction.Opcode) { case TGSI_OPCODE_MOV: { uint out_idx = inst->Dst[0].Register.Index; uint in_idx = inst->Src[0].Register.Index; /* assignment of temporary to output -- * and the output doesn't yet have a native register assigned * and the last use of the temporary is this instruction * and the MOV does not do a swizzle */ if(inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Src[0].Register.File == TGSI_FILE_TEMPORARY && !cd->file[TGSI_FILE_OUTPUT][out_idx].native.valid && cd->file[TGSI_FILE_TEMPORARY][in_idx].last_use == inst_idx && etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register)) { cd->file[TGSI_FILE_OUTPUT][out_idx].native = cd->file[TGSI_FILE_TEMPORARY][in_idx].native; /* prevent temp from being re-used for the rest of the shader */ cd->file[TGSI_FILE_TEMPORARY][in_idx].last_use = ETNA_MAX_TOKENS; /* mark this MOV instruction as a no-op */ cd->dead_inst[inst_idx] = true; } /* direct assignment of input to output -- * and the input or output doesn't yet have a native register assigned * and the output is only used in this instruction, * allocate a new register, and associate both input and output to it * and the MOV does not do a swizzle */ if(inst->Dst[0].Register.File == TGSI_FILE_OUTPUT && inst->Src[0].Register.File == TGSI_FILE_INPUT && !cd->file[TGSI_FILE_INPUT][in_idx].native.valid && !cd->file[TGSI_FILE_OUTPUT][out_idx].native.valid && cd->file[TGSI_FILE_OUTPUT][out_idx].last_use == inst_idx && cd->file[TGSI_FILE_OUTPUT][out_idx].first_use == inst_idx && etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register)) { cd->file[TGSI_FILE_OUTPUT][out_idx].native = cd->file[TGSI_FILE_INPUT][in_idx].native = alloc_new_native_reg(cd); /* mark this MOV instruction as a no-op */ cd->dead_inst[inst_idx] = true; } } break; default: ; } inst_idx += 1; } break; } } tgsi_parse_free(&ctx); } /* Get a temporary to be used within one TGSI instruction. * The first time that this function is called the temporary will be allocated. * Each call to this function will return the same temporary. */ static struct etna_native_reg etna_compile_get_inner_temp(struct etna_compile_data *cd) { if(cd->inner_temps) BUG("Multiple inner temporaries (%i) requested in one instruction", cd->inner_temps + 1); if(!cd->inner_temp.valid) cd->inner_temp = alloc_new_native_reg(cd); cd->inner_temps += 1; return cd->inner_temp; } /* Emit instruction and append it to program */ static void emit_inst(struct etna_compile_data *cd, struct etna_inst *inst) { assert(cd->inst_ptr <= ETNA_MAX_INSTRUCTIONS); /* Check for uniform conflicts (each instruction can only access one uniform), * if detected, use an intermediate temporary */ unsigned uni_rgroup = -1; unsigned uni_reg = -1; for(int src=0; srcsrc[src].rgroup)) { if(uni_reg == -1) /* first unique uniform used */ { uni_rgroup = inst->src[src].rgroup; uni_reg = inst->src[src].reg; } else { /* second or later; check that it is a re-use */ if(uni_rgroup != inst->src[src].rgroup || uni_reg != inst->src[src].reg) { DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that accesses different uniforms, need to generate extra MOV"); struct etna_native_reg inner_temp = etna_compile_get_inner_temp(cd); /* Generate move instruction to temporary */ etna_assemble(&cd->code[cd->inst_ptr*4], &(struct etna_inst) { .opcode = INST_OPCODE_MOV, .dst.use = 1, .dst.comps = INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W, .dst.reg = inner_temp.id, .src[2] = inst->src[src] }); cd->inst_ptr ++; /* Modify instruction to use temp register instead of uniform */ inst->src[src].use = 1; inst->src[src].rgroup = INST_RGROUP_TEMP; inst->src[src].reg = inner_temp.id; inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */ inst->src[src].neg = 0; /* negation happens on MOV */ inst->src[src].abs = 0; /* abs happens on MOV */ inst->src[src].amode = 0; /* amode effects happen on MOV */ } } } } /* Finally assemble the actual instruction */ etna_assemble(&cd->code[cd->inst_ptr*4], inst); cd->inst_ptr ++; } /* convert destination operand */ static struct etna_inst_dst convert_dst(struct etna_compile_data *cd, const struct tgsi_full_dst_register *in) { struct etna_inst_dst rv = { /// XXX .amode .use = 1, .comps = in->Register.WriteMask, }; struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; assert(native_reg.valid && !native_reg.is_tex && native_reg.rgroup == INST_RGROUP_TEMP); /* can only assign to temporaries */ rv.reg = native_reg.id; return rv; } /* convert texture operand */ static struct etna_inst_tex convert_tex(struct etna_compile_data *cd, const struct tgsi_full_src_register *in, const struct tgsi_instruction_texture *tex) { struct etna_inst_tex rv = { // XXX .amode (to allow for an array of samplers?) .swiz = INST_SWIZ_IDENTITY }; struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; assert(native_reg.is_tex && native_reg.valid); rv.id = native_reg.id; return rv; } /* convert source operand */ static struct etna_inst_src convert_src(struct etna_compile_data *cd, const struct tgsi_full_src_register *in, uint32_t swizzle) { struct etna_inst_src rv = { .use = 1, .swiz = inst_swiz_compose( INST_SWIZ(in->Register.SwizzleX, in->Register.SwizzleY, in->Register.SwizzleZ, in->Register.SwizzleW), swizzle), .neg = in->Register.Negate, .abs = in->Register.Absolute, // XXX .amode }; struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; assert(native_reg.valid && !native_reg.is_tex); rv.rgroup = native_reg.rgroup; rv.reg = native_reg.id; return rv; } /* convert destination to source operand (for operation in place) * i.e, * MUL dst0.x__w, src0.xyzw, 2/PI * SIN dst0.x__w, dst0.xyzw */ static struct etna_inst_src convert_dst_to_src(struct etna_compile_data *cd, const struct tgsi_full_dst_register *in) { struct etna_inst_src rv = { .use = 1, .swiz = INST_SWIZ_IDENTITY, /* no swizzle needed, destination does selection */ .neg = 0, .abs = 0, }; struct etna_native_reg native_reg = cd->file[in->Register.File][in->Register.Index].native; assert(native_reg.valid && !native_reg.is_tex); rv.rgroup = native_reg.rgroup; rv.reg = native_reg.id; return rv; } /* create a new label */ static struct etna_compile_label *alloc_new_label(struct etna_compile_data *cd) { assert(cd->num_labels < ETNA_MAX_LABELS); struct etna_compile_label *rv = &cd->labels[cd->num_labels++]; rv->inst_idx = -1; /* start by point to no specific instruction */ return rv; } /* place label at current instruction pointer */ static void label_place(struct etna_compile_data *cd, struct etna_compile_label *label) { label->inst_idx = cd->inst_ptr; } /* mark label use at current instruction. * target of the label will be filled in in the marked instruction's src2.imm slot as soon * as the value becomes known. */ static void label_mark_use(struct etna_compile_data *cd, struct etna_compile_label *label) { assert(cd->inst_ptr < ETNA_MAX_INSTRUCTIONS); cd->lbl_usage[cd->inst_ptr] = label; } /* Pass -- compile instructions */ static void etna_compile_pass_generate_code(struct etna_compile_data *cd, const struct tgsi_token *tokens) { struct tgsi_parse_context ctx = {}; unsigned status = TGSI_PARSE_OK; status = tgsi_parse_init(&ctx, tokens); assert(status == TGSI_PARSE_OK); int inst_idx = 0; while(!tgsi_parse_end_of_tokens(&ctx)) { tgsi_parse_token(&ctx); const struct tgsi_full_instruction *inst = 0; /* No inner temps used yet for this instruction, clear counter */ cd->inner_temps = 0; switch(ctx.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_INSTRUCTION: /* iterate over operands */ inst = &ctx.FullToken.FullInstruction; if(cd->dead_inst[inst_idx]) /* skip dead instructions */ { inst_idx++; continue; } assert(inst->Instruction.Saturate != TGSI_SAT_MINUS_PLUS_ONE); int sat = (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); /* Use a naive switch statement to get up and running, later on when we have more experience with * Vivante instructions generation, this may be shortened greatly by using lookup in a table with patterns. */ switch(inst->Instruction.Opcode) { case TGSI_OPCODE_MOV: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MOV, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_LIT: { /* LOG tmp.x, void, void, src.yyyy MUL tmp.x, tmp.xxxx, src.wwww, void LITP dst, src.xxyy, src.xxxx, tmp.xxxx */ struct etna_native_reg inner_temp = etna_compile_get_inner_temp(cd); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_LOG, .sat = 0, .dst.use = 1, .dst.comps = INST_COMPS_X, /* tmp.x */ .dst.reg = inner_temp.id, .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(1)), /* src.yyyy */ }); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MUL, .sat = 0, .dst.use = 1, .dst.comps = INST_COMPS_X, .dst.reg = inner_temp.id, .src[0].use = 1, .src[0].swiz = INST_SWIZ_BROADCAST(0), /* tmp.xxxx */ .src[0].neg = 0, .src[0].abs = 0, .src[0].rgroup = inner_temp.rgroup, .src[0].reg = inner_temp.id, .src[1] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(3)), /* src.wwww */ }); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_LITP, .sat = 0, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ(0,0,1,1)), /* src.xxyy */ .src[1] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(0)), /* src.xxxx */ .src[2].use = 1, .src[2].swiz = INST_SWIZ_BROADCAST(0), /* tmp.xxxx */ .src[2].neg = 0, .src[2].abs = 0, .src[2].rgroup = inner_temp.rgroup, .src[2].reg = inner_temp.id, }); } break; case TGSI_OPCODE_RCP: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_RCP, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_RSQ: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_RSQ, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_MUL: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MUL, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_ADD: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_ADD, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[2] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_DP3: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_DP3, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_DP4: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_DP4, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_MIN: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_MAX: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_SLT: case TGSI_OPCODE_SGE: case TGSI_OPCODE_SEQ: case TGSI_OPCODE_SGT: case TGSI_OPCODE_SLE: case TGSI_OPCODE_SNE: case TGSI_OPCODE_STR: { uint cond = 0; switch(inst->Instruction.Opcode) { case TGSI_OPCODE_SLT: cond = INST_CONDITION_LT; break; case TGSI_OPCODE_SGE: cond = INST_CONDITION_GE; break; case TGSI_OPCODE_SEQ: cond = INST_CONDITION_EQ; break; case TGSI_OPCODE_SGT: cond = INST_CONDITION_GT; break; case TGSI_OPCODE_SLE: cond = INST_CONDITION_LE; break; case TGSI_OPCODE_SNE: cond = INST_CONDITION_NE; break; case TGSI_OPCODE_STR: cond = INST_CONDITION_TRUE; break; } emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_SET, .cond = cond, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), }); } break; case TGSI_OPCODE_MAD: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MAD, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), .src[2] = convert_src(cd, &inst->Src[2], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_SUB: { /* ADD with negated SRC1 */ struct etna_inst inst_out = { .opcode = INST_OPCODE_ADD, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[2] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), }; inst_out.src[2].neg = !inst_out.src[2].neg; emit_inst(cd, &inst_out); } break; case TGSI_OPCODE_SQRT: /* only generated if HAS_SQRT_TRIG */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_SQRT, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_FRC: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_FRC, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_FLR: /* XXX HAS_SIGN_FLOOR_CEIL */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_FLOOR, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_CEIL: /* XXX HAS_SIGN_FLOOR_CEIL */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_CEIL, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_SSG: /* XXX HAS_SIGN_FLOOR_CEIL */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_SIGN, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_EX2: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_EXP, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_LG2: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_LOG, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_ABS: /* XXX can be propagated into uses of destination operand */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MOV, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[2].abs = 1 }); break; case TGSI_OPCODE_COS: /* fall through */ case TGSI_OPCODE_SIN: if(cd->specs->has_sin_cos_sqrt) { /* add divide by PI/2, re-use dest register, this works even in src=dst case * because second instruction only uses output of first. */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MUL, .sat = 0, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), /* any swizzling happens here */ .src[1] = alloc_imm_u32(cd, 2.0f/M_PI), }); emit_inst(cd, &(struct etna_inst) { .opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS ? INST_OPCODE_COS : INST_OPCODE_SIN, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[2] = convert_dst_to_src(cd, &inst->Dst[0]), }); } else { /* XXX fall back to Taylor series if not HAS_SQRT_TRIG, * see i915_fragprog.c for a good example. */ assert(0); } break; case TGSI_OPCODE_DDX: case TGSI_OPCODE_DDY: emit_inst(cd, &(struct etna_inst) { .opcode = inst->Instruction.Opcode == TGSI_OPCODE_DDX ? INST_OPCODE_DSX : INST_OPCODE_DSY, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_KILL_IF: /* discard if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0) */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL, .cond = INST_CONDITION_LZ, .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY) }); break; case TGSI_OPCODE_KILL: /* discard always */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL, .cond = INST_CONDITION_TRUE }); break; case TGSI_OPCODE_TEX: emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_TEXLD, .sat = (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE), .dst = convert_dst(cd, &inst->Dst[0]), .tex = convert_tex(cd, &inst->Src[1], &inst->Texture), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */ struct etna_native_reg temp = etna_compile_get_inner_temp(cd); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_RCP, .sat = 0, .dst.use = 1, .dst.comps = INST_COMPS_W, /* tmp.w */ .dst.reg = temp.id, .src[2] = convert_src(cd, &inst->Src[0], INST_SWIZ_BROADCAST(3)), }); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MUL, .sat = 0, .dst.use = 1, .dst.comps = INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z, /* tmp.xyz */ .dst.reg = temp.id, .src[0].use = 1, /* tmp.wwww */ .src[0].swiz = INST_SWIZ_BROADCAST(3), .src[0].neg = 0, .src[0].abs = 0, .src[0].rgroup = temp.rgroup, .src[0].reg = temp.id, .src[1] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), /* src.xyzw */ }); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_TEXLD, .sat = (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE), .dst = convert_dst(cd, &inst->Dst[0]), .tex = convert_tex(cd, &inst->Src[1], &inst->Texture), .src[0].use = 1, /* tmp.xyzw */ .src[0].swiz = INST_SWIZ_IDENTITY, .src[0].neg = 0, .src[0].abs = 0, .src[0].rgroup = temp.rgroup, .src[0].reg = temp.id, }); } break; case TGSI_OPCODE_CMP: /* componentwise dst = (src0 < 0) ? src1 : src2 */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_SELECT, .cond = INST_CONDITION_LZ, .sat = sat, .dst = convert_dst(cd, &inst->Dst[0]), .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = convert_src(cd, &inst->Src[1], INST_SWIZ_IDENTITY), .src[2] = convert_src(cd, &inst->Src[2], INST_SWIZ_IDENTITY), }); break; case TGSI_OPCODE_IF: { struct etna_compile_frame *f = &cd->frame_stack[cd->frame_sp++]; /* push IF to stack */ f->type = ETNA_COMPILE_FRAME_IF; /* create "else" label */ f->lbl_else = alloc_new_label(cd); f->lbl_endif = NULL; /* mark position in instruction stream of label reference so that it can be filled in in next pass */ label_mark_use(cd, f->lbl_else); /* create conditional branch to label if src0 EQ 0 */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_BRANCH, .cond = INST_CONDITION_EQ, .src[0] = convert_src(cd, &inst->Src[0], INST_SWIZ_IDENTITY), .src[1] = alloc_imm_f32(cd, 0.0f), /* imm is filled in later */ }); } break; case TGSI_OPCODE_ELSE: { assert(cd->frame_sp>0); struct etna_compile_frame *f = &cd->frame_stack[cd->frame_sp-1]; assert(f->type == ETNA_COMPILE_FRAME_IF); /* create "endif" label, and branch to endif label */ f->lbl_endif = alloc_new_label(cd); label_mark_use(cd, f->lbl_endif); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_BRANCH, .cond = INST_CONDITION_TRUE, /* imm is filled in later */ }); /* mark "else" label at this position in instruction stream */ label_place(cd, f->lbl_else); } break; case TGSI_OPCODE_ENDIF: { assert(cd->frame_sp>0); struct etna_compile_frame *f = &cd->frame_stack[--cd->frame_sp]; assert(f->type == ETNA_COMPILE_FRAME_IF); /* assign "endif" or "else" (if no ELSE) label to current position in instruction stream, pop IF */ if(f->lbl_endif != NULL) label_place(cd, f->lbl_endif); else label_place(cd, f->lbl_else); } break; case TGSI_OPCODE_NOP: break; case TGSI_OPCODE_END: /* Nothing to do */ break; case TGSI_OPCODE_PK2H: case TGSI_OPCODE_PK2US: case TGSI_OPCODE_PK4B: case TGSI_OPCODE_PK4UB: case TGSI_OPCODE_RFL: case TGSI_OPCODE_RCC: case TGSI_OPCODE_DPH: /* src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w */ case TGSI_OPCODE_POW: /* lowered by mesa to ex2(y*lg2(x)) */ case TGSI_OPCODE_XPD: case TGSI_OPCODE_ROUND: case TGSI_OPCODE_CLAMP: case TGSI_OPCODE_DP2A: case TGSI_OPCODE_LRP: /* lowered by mesa to (op2 * (1.0f - op0)) + (op1 * op0) */ case TGSI_OPCODE_CND: case TGSI_OPCODE_SFL: /* SET to 0 */ case TGSI_OPCODE_DST: /* XXX INST_OPCODE_DST */ case TGSI_OPCODE_DP2: /* Either MUL+MAD or DP3 with a zeroed channel, but we don't have a 'zero' swizzle */ case TGSI_OPCODE_EXP: case TGSI_OPCODE_LOG: case TGSI_OPCODE_TXB: /* XXX INST_OPCODE_TEXLDB */ case TGSI_OPCODE_TXL: /* XXX INST_OPCODE_TEXLDL */ case TGSI_OPCODE_UP2H: case TGSI_OPCODE_UP2US: case TGSI_OPCODE_UP4B: case TGSI_OPCODE_UP4UB: case TGSI_OPCODE_X2D: case TGSI_OPCODE_ARL: /* floor */ case TGSI_OPCODE_ARR: /* round */ case TGSI_OPCODE_ARA: /* to be removed according to doc */ case TGSI_OPCODE_BRA: /* to be removed according to doc */ case TGSI_OPCODE_CAL: /* XXX INST_OPCODE_CALL */ case TGSI_OPCODE_RET: /* XXX INST_OPCODE_RET */ case TGSI_OPCODE_BRK: /* break from loop */ case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_ENDLOOP: case TGSI_OPCODE_BGNSUB: case TGSI_OPCODE_ENDSUB: default: BUG("Unhandled instruction %s", tgsi_get_opcode_name(inst->Instruction.Opcode)); assert(0); } inst_idx += 1; break; } } tgsi_parse_free(&ctx); } /* Look up register by semantic */ static struct etna_reg_desc *find_decl_by_semantic(struct etna_compile_data *cd, uint file, uint name, uint index) { for(int idx=0; idxfile_size[file]; ++idx) { struct etna_reg_desc *reg = &cd->file[file][idx]; if(reg->semantic.Name == name && reg->semantic.Index == index) { return reg; } } return NULL; /* not found */ } /** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed: * - this is a vertex shader * - and this is an older GPU */ static void etna_compile_add_z_div_if_needed(struct etna_compile_data *cd) { if(cd->processor == TGSI_PROCESSOR_VERTEX && cd->specs->vs_need_z_div) { /* find position out */ struct etna_reg_desc *pos_reg = find_decl_by_semantic(cd, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0); if(pos_reg != NULL) { /* * ADD tX.__z_, tX.zzzz, void, tX.wwww * MUL tX.__z_, tX.zzzz, 0.5, void */ emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_ADD, .dst.use = 1, .dst.reg = pos_reg->native.id, .dst.comps = INST_COMPS_Z, .src[0].use = 1, .src[0].reg = pos_reg->native.id, .src[0].swiz = INST_SWIZ_BROADCAST(INST_SWIZ_COMP_Z), .src[2].use = 1, .src[2].reg = pos_reg->native.id, .src[2].swiz = INST_SWIZ_BROADCAST(INST_SWIZ_COMP_W), }); emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_MUL, .dst.use = 1, .dst.reg = pos_reg->native.id, .dst.comps = INST_COMPS_Z, .src[0].use = 1, .src[0].reg = pos_reg->native.id, .src[0].swiz = INST_SWIZ_BROADCAST(INST_SWIZ_COMP_Z), .src[1] = alloc_imm_f32(cd, 0.5f), }); } } } /** add a NOP to the shader if * a) the shader is empty * or * b) there is a label at the end of the shader */ static void etna_compile_add_nop_if_needed(struct etna_compile_data *cd) { bool label_at_last_inst = false; for(int idx=0; idxnum_labels; ++idx) { if(cd->labels[idx].inst_idx == (cd->inst_ptr-1)) { label_at_last_inst = true; } } if(cd->inst_ptr == 0 || label_at_last_inst) { emit_inst(cd, &(struct etna_inst) { .opcode = INST_OPCODE_NOP }); } } /* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x). * CONST must be consecutive as const buffers are supposed to be consecutive, and before IMM, as this is * more convenient because is possible for the compilation process itself to generate extra * immediates for constants such as pi, one, zero. */ static void assign_constants_and_immediates(struct etna_compile_data *cd) { for(int idx=0; idxfile_size[TGSI_FILE_CONSTANT]; ++idx) { cd->file[TGSI_FILE_CONSTANT][idx].native.valid = 1; cd->file[TGSI_FILE_CONSTANT][idx].native.rgroup = INST_RGROUP_UNIFORM_0; cd->file[TGSI_FILE_CONSTANT][idx].native.id = idx; } /* immediates start after the constants */ cd->imm_base = cd->file_size[TGSI_FILE_CONSTANT] * 4; for(int idx=0; idxfile_size[TGSI_FILE_IMMEDIATE]; ++idx) { cd->file[TGSI_FILE_IMMEDIATE][idx].native.valid = 1; cd->file[TGSI_FILE_IMMEDIATE][idx].native.rgroup = INST_RGROUP_UNIFORM_0; cd->file[TGSI_FILE_IMMEDIATE][idx].native.id = cd->imm_base/4 + idx; } DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", cd->imm_base, cd->imm_size); } /* Assign declared samplers to native texture units */ static void assign_texture_units(struct etna_compile_data *cd) { uint tex_base = 0; if(cd->processor == TGSI_PROCESSOR_VERTEX) { tex_base = cd->specs->vertex_sampler_offset; } for(int idx=0; idxfile_size[TGSI_FILE_SAMPLER]; ++idx) { cd->file[TGSI_FILE_SAMPLER][idx].native.valid = 1; cd->file[TGSI_FILE_SAMPLER][idx].native.is_tex = 1; // overrides rgroup cd->file[TGSI_FILE_SAMPLER][idx].native.id = tex_base + idx; } } /* Additional pass to fill in branch targets. This pass should be last * as no instruction reordering or removing/addition can be done anymore * once the branch targets are computed. */ static void etna_compile_fill_in_labels(struct etna_compile_data *cd) { for(int idx=0; idxinst_ptr ; ++idx) { if(cd->lbl_usage[idx]) { etna_assemble_set_imm(&cd->code[idx * 4], cd->lbl_usage[idx]->inst_idx); } } } /* compare two etna_native_reg structures, return true if equal */ static bool cmp_etna_native_reg(const struct etna_native_reg to, const struct etna_native_reg from) { return to.valid == from.valid && to.is_tex == from.is_tex && to.rgroup == from.rgroup && to.id == from.id; } /* go through all declarations and swap native registers *to* and *from* */ static void swap_native_registers(struct etna_compile_data *cd, const struct etna_native_reg to, const struct etna_native_reg from) { if(cmp_etna_native_reg(from, to)) return; /* Nothing to do */ for(int idx=0; idxtotal_decls; ++idx) { if(cmp_etna_native_reg(cd->decl[idx].native, from)) { cd->decl[idx].native = to; } else if(cmp_etna_native_reg(cd->decl[idx].native, to)) { cd->decl[idx].native = from; } } } /* For PS we need to permute so that inputs are always in temporary 0..N-1. * Semantic POS is always t0. If that semantic is not used, avoid t0. */ static void permute_ps_inputs(struct etna_compile_data *cd) { /* Special inputs: * gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION * gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD */ uint native_idx = 1; for(int idx=0; idxfile_size[TGSI_FILE_INPUT]; ++idx) { struct etna_reg_desc *reg = &cd->file[TGSI_FILE_INPUT][idx]; uint input_id; assert(reg->has_semantic); if(!reg->active || reg->semantic.Name == TGSI_SEMANTIC_POSITION) continue; input_id = native_idx++; swap_native_registers(cd, (struct etna_native_reg) { .valid = 1, .rgroup = INST_RGROUP_TEMP, .id = input_id }, cd->file[TGSI_FILE_INPUT][idx].native); } cd->num_varyings = native_idx-1; if(native_idx > cd->next_free_native) cd->next_free_native = native_idx; } /* fill in ps inputs into shader object */ static void fill_in_ps_inputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) { sobj->num_inputs = cd->num_varyings; assert(sobj->num_inputs < ETNA_NUM_INPUTS); for(int idx=0; idxfile_size[TGSI_FILE_INPUT]; ++idx) { struct etna_reg_desc *reg = &cd->file[TGSI_FILE_INPUT][idx]; if(reg->native.id > 0) { int input_id = reg->native.id - 1; sobj->inputs[input_id].reg = reg->native.id; sobj->inputs[input_id].semantic = reg->semantic; if(reg->semantic.Name == TGSI_SEMANTIC_COLOR) /* colors affected by flat shading */ sobj->inputs[input_id].pa_attributes = 0x200; else /* texture coord or other bypasses flat shading */ sobj->inputs[input_id].pa_attributes = 0x2f1; /* convert usage mask to number of components (*=wildcard) * .r (0..1) -> 1 component * .*g (2..3) -> 2 component * .**b (4..7) -> 3 components * .***a (8..15) -> 4 components */ sobj->inputs[input_id].num_components = util_last_bit(reg->usage_mask); } } sobj->input_count_unk8 = 31; /* XXX what is this */ } /* fill in output mapping for ps into shader object */ static void fill_in_ps_outputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) { sobj->num_outputs = 0; for(int idx=0; idxfile_size[TGSI_FILE_OUTPUT]; ++idx) { struct etna_reg_desc *reg = &cd->file[TGSI_FILE_OUTPUT][idx]; switch(reg->semantic.Name) { case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */ sobj->ps_color_out_reg = reg->native.id; break; case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */ sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */ break; default: assert(0); /* only outputs supported are COLOR and POSITION at the moment */ } } } /* fill in inputs for vs into shader object */ static void fill_in_vs_inputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) { sobj->num_inputs = 0; for(int idx=0; idxfile_size[TGSI_FILE_INPUT]; ++idx) { struct etna_reg_desc *reg = &cd->file[TGSI_FILE_INPUT][idx]; assert(sobj->num_inputs < ETNA_NUM_INPUTS); /* XXX exclude inputs with special semantics such as gl_frontFacing */ sobj->inputs[sobj->num_inputs].reg = reg->native.id; sobj->inputs[sobj->num_inputs].semantic = reg->semantic; sobj->inputs[sobj->num_inputs].num_components = util_last_bit(reg->usage_mask); sobj->num_inputs++; } sobj->input_count_unk8 = (sobj->num_inputs + 19)/16; /* XXX what is this */ } /* build two-level output index [Semantic][Index] for fast linking */ static void build_output_index(struct etna_shader_object *sobj) { int total = 0; int offset = 0; for(int name=0; nameoutput_count_per_semantic[name]; } sobj->output_per_semantic_list = CALLOC(total, sizeof(struct etna_shader_inout *)); for(int name=0; nameoutput_per_semantic[name] = &sobj->output_per_semantic_list[offset]; offset += sobj->output_count_per_semantic[name]; } for(int idx=0; idxnum_outputs; ++idx) { sobj->output_per_semantic[sobj->outputs[idx].semantic.Name] [sobj->outputs[idx].semantic.Index] = &sobj->outputs[idx]; } } /* fill in outputs for vs into shader object */ static void fill_in_vs_outputs(struct etna_shader_object *sobj, struct etna_compile_data *cd) { sobj->num_outputs = 0; for(int idx=0; idxfile_size[TGSI_FILE_OUTPUT]; ++idx) { struct etna_reg_desc *reg = &cd->file[TGSI_FILE_OUTPUT][idx]; assert(sobj->num_inputs < ETNA_NUM_INPUTS); switch(reg->semantic.Name) { case TGSI_SEMANTIC_POSITION: sobj->vs_pos_out_reg = reg->native.id; break; case TGSI_SEMANTIC_PSIZE: sobj->vs_pointsize_out_reg = reg->native.id; break; default: sobj->outputs[sobj->num_outputs].reg = reg->native.id; sobj->outputs[sobj->num_outputs].semantic = reg->semantic; sobj->outputs[sobj->num_outputs].num_components = 4; // XXX reg->num_components; sobj->num_outputs++; sobj->output_count_per_semantic[reg->semantic.Name] = MAX2( reg->semantic.Index + 1, sobj->output_count_per_semantic[reg->semantic.Name]); } } /* build two-level index for linking */ build_output_index(sobj); /* fill in "mystery meat" load balancing value. This value determines how work is scheduled between VS and PS * in the unified shader architecture. More precisely, it is determined from the number of VS outputs, as well as chip-specific * vertex output buffer size, vertex cache size, and the number of shader cores. * * XXX this is a conservative estimate, the "optimal" value is only known for sure at link time because some * outputs may be unused and thus unmapped. Then again, in the general use case with GLSL the vertex and fragment * shaders are linked already before submitting to Gallium, thus all outputs are used. */ int half_out = (cd->file_size[TGSI_FILE_OUTPUT] + 1) / 2; assert(half_out); uint32_t b = ((20480/(cd->specs->vertex_output_buffer_size-2*half_out*cd->specs->vertex_cache_size))+9)/10; uint32_t a = (b+256/(cd->specs->shader_core_count*half_out))/2; sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a,255)) | VIVS_VS_LOAD_BALANCING_B(MIN2(b,255)) | VIVS_VS_LOAD_BALANCING_C(0x3f) | VIVS_VS_LOAD_BALANCING_D(0x0f); } static bool etna_compile_check_limits(struct etna_compile_data *cd) { int max_uniforms = (cd->processor == TGSI_PROCESSOR_VERTEX) ? cd->specs->max_vs_uniforms : cd->specs->max_ps_uniforms; /* round up number of uniforms, including immediates, in units of four */ int num_uniforms = cd->imm_base/4 + (cd->imm_size+3)/4; if(cd->inst_ptr > cd->specs->max_instructions) { DBG("Number of instructions (%d) exceeds maximum %d", cd->inst_ptr, cd->specs->max_instructions); return false; } if(cd->next_free_native > cd->specs->max_registers) { DBG("Number of registers (%d) exceeds maximum %d", cd->next_free_native, cd->specs->max_registers); return false; } if(num_uniforms > max_uniforms) { DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms, max_uniforms); return false; } if(cd->num_varyings > cd->specs->max_varyings) { DBG("Number of varyings (%d) exceeds maximum %d", cd->num_varyings, cd->specs->max_varyings); return false; } return true; } int etna_compile_shader_object(const struct etna_pipe_specs *specs, const struct tgsi_token *tokens, struct etna_shader_object **out) { /* Create scratch space that may be too large to fit on stack * XXX don't forget to free this on all exit paths. */ struct etna_compile_data *cd = CALLOC_STRUCT(etna_compile_data); cd->specs = specs; /* Build a map from gallium register to native registers for files * CONST, SAMP, IMM, OUT, IN, TEMP. * SAMP will map as-is for fragment shaders, there will be a +8 offset for vertex shaders. */ /* Pass one -- check register file declarations and immediates */ etna_compile_parse_declarations(cd, tokens); etna_allocate_decls(cd); /* Pass two -- check usage of temporaries, inputs, outputs */ etna_compile_pass_check_usage(cd, tokens); assign_special_inputs(cd); /* Assign native temp register to TEMPs */ assign_temporaries_to_native(cd, cd->file[TGSI_FILE_TEMPORARY], cd->file_size[TGSI_FILE_TEMPORARY]); /* optimize outputs */ etna_compile_pass_optimize_outputs(cd, tokens); /* XXX assign special inputs: gl_FrontFacing (VARYING_SLOT_FACE) * this is part of RGROUP_INTERNAL */ /* assign inputs: last usage of input should be <= first usage of temp */ /* potential optimization case: * if single MOV TEMP[y], IN[x] before which temp y is not used, and after which IN[x] * is not read, temp[y] can be used as input register as-is */ /* sort temporaries by first use * sort inputs by last usage * iterate over inputs, temporaries * if last usage of input <= first usage of temp: * assign input to temp * advance input, temporary pointer * else * advance temporary pointer * * potential problem: instruction with multiple inputs of which one is the temp and the other is the input; * however, as the temp is not used before this, how would this make sense? uninitialized temporaries have an undefined * value, so this would be ok */ assign_inouts_to_temporaries(cd, TGSI_FILE_INPUT); /* assign outputs: first usage of output should be >= last usage of temp */ /* potential optimization case: * if single MOV OUT[x], TEMP[y] (with full write mask, or at least writing all components that are used in * the shader) after which temp y is no longer used temp[y] can be used as output register as-is * * potential problem: instruction with multiple outputs of which one is the temp and the other is the output; * however, as the temp is not used after this, how would this make sense? could just discard the output value */ /* sort temporaries by last use * sort outputs by first usage * iterate over outputs, temporaries * if first usage of output >= last usage of temp: * assign output to temp * advance output, temporary pointer * else * advance temporary pointer */ assign_inouts_to_temporaries(cd, TGSI_FILE_OUTPUT); assign_constants_and_immediates(cd); assign_texture_units(cd); /* list declarations */ for(int x=0; xtotal_decls; ++x) { DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i last_use=%i native=%i usage_mask=%x has_semantic=%i", x, tgsi_file_name(cd->decl[x].file), cd->decl[x].idx, cd->decl[x].active, cd->decl[x].first_use, cd->decl[x].last_use, cd->decl[x].native.valid?cd->decl[x].native.id:-1, cd->decl[x].usage_mask, cd->decl[x].has_semantic); if(cd->decl[x].has_semantic) DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", tgsi_semantic_names[cd->decl[x].semantic.Name], cd->decl[x].semantic.Index); } /* XXX for PS we need to permute so that inputs are always in temporary 0..N-1. * There is no "switchboard" for varyings (AFAIK!). The output color, however, can be routed * from an arbitrary temporary. */ if(cd->processor == TGSI_PROCESSOR_FRAGMENT) { permute_ps_inputs(cd); } /* list declarations */ for(int x=0; xtotal_decls; ++x) { DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i last_use=%i native=%i usage_mask=%x has_semantic=%i", x, tgsi_file_name(cd->decl[x].file), cd->decl[x].idx, cd->decl[x].active, cd->decl[x].first_use, cd->decl[x].last_use, cd->decl[x].native.valid?cd->decl[x].native.id:-1, cd->decl[x].usage_mask, cd->decl[x].has_semantic); if(cd->decl[x].has_semantic) DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i", tgsi_semantic_names[cd->decl[x].semantic.Name], cd->decl[x].semantic.Index); } /* pass 3: generate instructions */ etna_compile_pass_generate_code(cd, tokens); etna_compile_add_z_div_if_needed(cd); etna_compile_add_nop_if_needed(cd); etna_compile_fill_in_labels(cd); if(!etna_compile_check_limits(cd)) { FREE(cd); *out = NULL; return -1; } /* fill in output structure */ struct etna_shader_object *sobj = CALLOC_STRUCT(etna_shader_object); sobj->processor = cd->processor; sobj->code_size = cd->inst_ptr * 4; sobj->code = mem_dup(cd->code, cd->inst_ptr * 16); sobj->num_temps = cd->next_free_native; sobj->const_base = 0; sobj->const_size = cd->imm_base; sobj->imm_base = cd->imm_base; sobj->imm_size = cd->imm_size; sobj->imm_data = mem_dup(cd->imm_data, cd->imm_size * 4); sobj->vs_pos_out_reg = -1; sobj->vs_pointsize_out_reg = -1; sobj->ps_color_out_reg = -1; sobj->ps_depth_out_reg = -1; if(cd->processor == TGSI_PROCESSOR_VERTEX) { fill_in_vs_inputs(sobj, cd); fill_in_vs_outputs(sobj, cd); } else if(cd->processor == TGSI_PROCESSOR_FRAGMENT) { fill_in_ps_inputs(sobj, cd); fill_in_ps_outputs(sobj, cd); } *out = sobj; FREE(cd); return 0; } extern const char *tgsi_swizzle_names[]; void etna_dump_shader_object(const struct etna_shader_object *sobj) { if(sobj->processor == TGSI_PROCESSOR_VERTEX) { printf("VERT\n"); } else { printf("FRAG\n"); } for(int x=0; xcode_size/4; ++x) { printf("| %08x %08x %08x %08x\n", sobj->code[x*4+0], sobj->code[x*4+1], sobj->code[x*4+2], sobj->code[x*4+3]); } printf("num temps: %i\n", sobj->num_temps); printf("num const: %i\n", sobj->const_size); printf("immediates:\n"); for(int idx=0; idximm_size; ++idx) { printf(" [%i].%s = %f (0x%08x)\n", (idx+sobj->imm_base)/4, tgsi_swizzle_names[idx%4], *((float*)&sobj->imm_data[idx]), sobj->imm_data[idx]); } printf("inputs:\n"); for(int idx=0; idxnum_inputs; ++idx) { printf(" [%i] name=%s index=%i pa=%08x comps=%i\n", sobj->inputs[idx].reg, tgsi_semantic_names[sobj->inputs[idx].semantic.Name], sobj->inputs[idx].semantic.Index, sobj->inputs[idx].pa_attributes, sobj->inputs[idx].num_components); } printf("outputs:\n"); for(int idx=0; idxnum_outputs; ++idx) { printf(" [%i] name=%s index=%i pa=%08x comps=%i\n", sobj->outputs[idx].reg, tgsi_semantic_names[sobj->outputs[idx].semantic.Name], sobj->outputs[idx].semantic.Index, sobj->outputs[idx].pa_attributes, sobj->outputs[idx].num_components); } printf("special:\n"); if(sobj->processor == TGSI_PROCESSOR_VERTEX) { printf(" vs_pos_out_reg=%i\n", sobj->vs_pos_out_reg); printf(" vs_pointsize_out_reg=%i\n", sobj->vs_pointsize_out_reg); printf(" vs_load_balancing=0x%08x\n", sobj->vs_load_balancing); } else { printf(" ps_color_out_reg=%i\n", sobj->ps_color_out_reg); printf(" ps_depth_out_reg=%i\n", sobj->ps_depth_out_reg); } printf(" input_count_unk8=0x%08x\n", sobj->input_count_unk8); } void etna_destroy_shader_object(struct etna_shader_object *sobj) { if(sobj != NULL) { FREE(sobj->code); FREE(sobj->imm_data); FREE(sobj->output_per_semantic_list); FREE(sobj); } } int etna_link_shader_objects(struct etna_shader_link_info *info, const struct etna_shader_object *vs, const struct etna_shader_object *fs) { /* For each fs input we need to find the associated ps input, which can be found by matching on * semantic name and index. * A binary search can be used because the vs outputs are sorted by semantic in fill_in_vs_outputs. */ assert(fs->num_inputs < ETNA_NUM_INPUTS); for(int idx=0; idxnum_inputs; ++idx) { struct tgsi_declaration_semantic semantic = fs->inputs[idx].semantic; if(semantic.Name == TGSI_SEMANTIC_PCOORD) { info->varyings_vs_reg[idx] = 0; /* replaced by point coord -- doesn't matter */ continue; } struct etna_shader_inout *match = NULL; if(semantic.Index < vs->output_count_per_semantic[semantic.Name]) { match = vs->output_per_semantic[semantic.Name][semantic.Index]; } if(match == NULL) return 1; /* not found -- link error */ info->varyings_vs_reg[idx] = match->reg; } return 0; }