view leo-obj/tool/disasm.c @ 401:4b6b595ae0a0

compal/boot/code-deriv: new analysis
author Mychaela Falconia <falcon@freecalypso.org>
date Sat, 14 Jan 2023 23:59:23 +0000
parents 71e25510f5af
children
line wrap: on
line source

/*
 * Putting it all together: section-, symbol- and reloc-aware disassembly
 */

#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "intstruct.h"
#include "coffconst.h"
#include "globals.h"

extern unsigned get_u16(), get_u32();
extern char *storage_class_to_string();

int auto_xlat_section_relocs = 1;
int disasm_richsym, disasm_lineno;
extern int richsym_print_bitsize;

static void
find_better_symbol(sec, symp, addp)
	struct internal_scnhdr *sec;
	struct internal_syment **symp;
	unsigned *addp;
{
	unsigned addr, delta;
	struct internal_syment *sym;
	unsigned n;

	addr = *addp;
	for (n = 0; n < sec->nsymbols; n++) {
		sym = sec->sorted_symbols[n];
		if (sym->value > addr)
			return;
		if (sym->class != C_EXT && sym->class != C_STAT)
			continue;
		delta = addr - sym->value;
		if (sym->name[0] == '_' && !delta ||
		    sym->name[0] == '$' && delta <= 1) {
			*symp = sym;
			*addp = delta;
		}
	}
}

void
disasm_reloc_target(sec, rel, addend)
	struct internal_scnhdr *sec;
	struct internal_reloc *rel;
	unsigned addend;
{
	struct internal_syment *sym = rel->sym;

	if (sym)
		addend -= sym->value;
	if (auto_xlat_section_relocs &&
	    (!sym || sym->section && !strcmp(sym->name, sym->section->name)))
		find_better_symbol(sym ? sym->section : sec, &sym, &addend);
	if (sym)
		fputs(sym->name, stdout);
	else
		fputs(sec->name, stdout);
	if (addend >= 10)
		printf("+0x%x", addend);
	else if (addend)
		printf("+%u", addend);
}

void
disasm_word32_reloc(sec, rel)
	struct internal_scnhdr *sec;
	struct internal_reloc *rel;
{
	unsigned word;

	word = get_u32(filemap + sec->data_offset + rel->location);
	printf("%08x R\t.word\t", word);
	disasm_reloc_target(sec, rel, word);
	putchar('\n');
}

static void
handle_symbol(sym, statep, linebrkp)
	struct internal_syment *sym;
	int *statep, *linebrkp;
{
	char *sym_comment, *cpref;

	if (sym->class == C_FCN && !strcmp(sym->name, ".ef")) {
		printf("; End function\n");
		return;
	}
	if (!*linebrkp) {
		putchar('\n');
		*linebrkp = 1;
	}
	if (sym->class == C_FCN && !strcmp(sym->name, ".bf")) {
		printf("; Begin function\n");
		if (disasm_richsym)
			if (richsym_function_locals(sym))
				putchar('\n');
		return;
	}
	switch (sym->class) {
	case C_EXT:
		sym_comment = "Global";
		cpref = "; ";
		break;
	case C_STAT:
		sym_comment = "static";
		cpref = "; static ";
		break;
	case C_LABEL:
		sym_comment = "label";
		if (!strcmp(sym->name, "$CODE16"))
			*statep = 1;
		else if (!strcmp(sym->name, "$CODE32"))
			*statep = 0;
		break;
	default:
		sym_comment = "unexpected class!";
	}
	printf("%s:\t; %s\n", sym->name, sym_comment);
	if (!disasm_richsym || sym->class != C_EXT && sym->class != C_STAT)
		return;
	if (!sym->aux || sym->name[0] != '_' && sym->name[0] != '$')
		return;
	richsym_print_in_c(cpref, sym, 0);
}

void
disasm_emit_asciz(sec, pos, len)
	struct internal_scnhdr *sec;
	unsigned pos, len;
{
	int c;
	unsigned endpos = pos + len;

	fputs("\t.asciz\t\"", stdout);
	for (; pos < endpos; pos++) {
		c = filemap[sec->data_offset + pos];
		switch (c) {
		case '\b':
			fputs("\\b", stdout);
			continue;
		case '\t':
			fputs("\\t", stdout);
			continue;
		case '\n':
			fputs("\\n", stdout);
			continue;
		case '\r':
			fputs("\\r", stdout);
			continue;
		case '"':
			fputs("\\\"", stdout);
			continue;
		case '\\':
			fputs("\\\\", stdout);
			continue;
		}
		if (c >= ' ' && c <= '~')
			putchar(c);
		else
			printf("\\%03o", c);
	}
	putchar('"');
	putchar('\n');
}

void
disasm_codedata_section(sec)
	struct internal_scnhdr *sec;
{
	unsigned symnum, relnum;
	unsigned pos, incr, headroom;
	int state = -1, linebrk = 0, gothint;
	struct internal_syment *sym;
	struct internal_reloc *rel;
	struct hint *hint = sec->hints;
	u_char *asciz_end;
	unsigned asciz_len;
	struct internal_lineno *lineno_arr;
	unsigned lineno_cur, lineno_total;

	if (sec->nreloc)
		get_relocs_of_sec(sec);
	if (disasm_lineno && sec->nlineent)
		get_lineno_array(sec, &lineno_arr, &lineno_total,
				 disasm_lineno >= 2);
	else {
		lineno_arr = 0;
		lineno_total = 0;
	}
	symnum = relnum = 0;
	lineno_cur = 0;
	for (pos = 0; pos < sec->size; pos += incr) {
		headroom = sec->size - pos;
		while (symnum < sec->nsymbols) {
			sym = sec->sorted_symbols[symnum];
			if (sym->value > pos) {
				if (sym->value - pos < headroom)
					headroom = sym->value - pos;
				break;
			}
			/* hit symbol */
			handle_symbol(sym, &state, &linebrk);
			symnum++;
		}
		if (relnum < sec->nreloc) {
			rel = sec->int_relocs + relnum;
			if (rel->location == pos)
				relnum++;	/* it's ours */
			else {
				if (rel->location - pos < headroom)
					headroom = rel->location - pos;
				rel = 0;	/* no reloc for current pos */
			}
		} else
			rel = 0;
		if (hint) {
			if (pos >= hint->pos)
				gothint++;
			else {
				gothint = 0;
				if (hint->pos - pos < headroom)
					headroom = hint->pos - pos;
			}
		} else
			gothint = 0;
		if (gothint && pos == hint->pos && hint->linebrk)
			putchar('\n');
		while (lineno_cur < lineno_total) {
			if (pos >= lineno_arr[lineno_cur].location) {
				if (disasm_lineno >= 2)
					printf("; line %u\n",
						lineno_arr[lineno_cur].lineno);
				else
					puts("; line");
				lineno_cur++;
			} else {
				if (lineno_arr[lineno_cur].location - pos <
				    headroom)
					headroom =
					  lineno_arr[lineno_cur].location - pos;
				break;
			}
		}
		printf("%8x:\t", pos);
		if (gothint && hint->type) {
			if (rel) {
				printf("error: hint/reloc conflict\n");
				return;
			}
			switch (hint->type) {
			case HINT_D8:
				printf("%02x\n",
					filemap[sec->data_offset + pos]);
				incr = 1;
				break;
			case HINT_D16:
				printf("%04x\n",
					get_u16(filemap+sec->data_offset+pos));
				incr = 2;
				break;
			case HINT_D32:
				printf("%08x\n",
					get_u32(filemap+sec->data_offset+pos));
				incr = 4;
				break;
			case HINT_ASCIZ:
				asciz_end = memchr(filemap+sec->data_offset+pos,
							0, headroom);
				if (!asciz_end) {
					printf("bad asciz hint: no 0 found\n");
					return;
				}
				asciz_len = asciz_end - filemap -
						sec->data_offset - pos;
				disasm_emit_asciz(sec, pos, asciz_len);
				incr = asciz_len + 1;
			}
			goto next;
		}
		if (rel) {
			if (rel->type == RTYPE_LONG) {
				if (pos & 3) {
			printf("MISALIGNED pos for word32 reloc, aborting\n");
					return;
				}
				disasm_word32_reloc(sec, rel);
				incr = 4;
				goto next;
			} else if (sec->disasm_mode == DISASM_MODE_DATA) {
		printf("error: reloc other than word32 in data section\n");
				return;
			}
		}
		if (pos & 1 || headroom < 2) {
			if (rel) {
				printf("error: reloc at byte pos, aborting\n");
				return;
			}
			printf("%02x\n", filemap[sec->data_offset + pos]);
			incr = 1;
			goto next;
		}
		if (sec->disasm_mode == DISASM_MODE_DATA) {
			if (pos & 2 || headroom < 4) {
				printf("%04x\n",
					get_u16(filemap+sec->data_offset+pos));
				incr = 2;
			} else {
				printf("%08x\n",
					get_u32(filemap+sec->data_offset+pos));
				incr = 4;
			}
			goto next;
		}
		switch (state) {
		case 0:		/* ARM */
			if (pos & 3) {
			   printf("MISALIGNED pos in CODE32 state, aborting\n");
				return;
			}
			if (rel) {
				if (rel->type != RTYPE_ARM_B) {
			printf("Wrong reloc type in CODE32 state, aborting\n");
					return;
				}
				arm_branch_reloc(sec, rel);
			} else
				arm_disasm_line(sec, pos);
			incr = 4;
			break;
		case 1:		/* Thumb */
			if (pos & 1) {
			   printf("MISALIGNED pos in CODE16 state, aborting\n");
				return;
			}
			if (rel) {
				if (rel->type != RTYPE_THUMB_BL) {
			printf("Wrong reloc type in CODE16 state, aborting\n");
					return;
				}
				thumb_bl_reloc(sec, rel);
				incr = 4;
			} else if (headroom >= 4 && thumb_check_bl(sec, pos))
				incr = 4;
			else {
				thumb_disasm_line(sec, pos);
				incr = 2;
			}
			break;
		default:
			printf("UNKNOWN T state, aborting\n");
			return;
		}
next:		linebrk = 0;
		if (incr > headroom) {
			printf("error: increment %u > headroom %u, aborting\n",
				incr, headroom);
			return;
		}
		if (hint && pos >= hint->endpos)
			hint = hint->next;
	}
	while (symnum < sec->nsymbols) {
		sym = sec->sorted_symbols[symnum];
		if (sym->value != sec->size) {
			printf("error: expecting symbol at end of section\n");
			return;
		}
		handle_symbol(sym, &state, &linebrk);
		symnum++;
	}
	if (linebrk)
		printf("%8x:\t<end of section>\n", sec->size);
	if (lineno_arr)
		free(lineno_arr);
}

void
disasm_bss(sec)
	struct internal_scnhdr *sec;
{
	unsigned m;
	struct internal_syment *sym;
	char classbuf[8], *cpref;

	putchar('\n');
	for (m = 0; m < sec->nsymbols; m++) {
		sym = sec->sorted_symbols[m];
		printf("%08X %-7s %s\n", sym->value,
			storage_class_to_string(sym->class, classbuf),
			sym->name);
		if (!disasm_richsym || !sym->aux || sym->name[0] != '_')
			continue;
		switch (sym->class) {
		case C_EXT:
			cpref = "; ";
			break;
		case C_STAT:
			cpref = "; static ";
			break;
		default:
			continue;
		}
		richsym_print_in_c(cpref, sym, 0);
	}
	printf("%08X <end of section>\n", sec->size);
}

void
disasm_sec_by_type(sec)
	struct internal_scnhdr *sec;
{
	switch (sec->disasm_mode) {
	case DISASM_MODE_CODE:
		printf("Disassembling code section:\n");
		disasm_codedata_section(sec);
		return;
	case DISASM_MODE_DATA:
		printf("Disassembling data section:\n");
		disasm_codedata_section(sec);
		return;
	case DISASM_MODE_BSS:
		disasm_bss(sec);
		return;
	default:
		printf("Unrecognized section type, skipped\n");
	}
}

cmd_disasm(argc, argv)
	char **argv;
{
	extern char *optarg;
	char *hintsfile = 0;
	struct internal_scnhdr *sec;
	unsigned secnum;
	int c;

	while ((c = getopt(argc, argv, "bgh:ls")) != EOF)
		switch (c) {
		case 'b':
			richsym_print_bitsize++;
			continue;
		case 'g':
			disasm_richsym++;
			continue;
		case 'h':
			hintsfile = optarg;
			continue;
		case 'l':
			disasm_lineno++;
			continue;
		case 's':
			auto_xlat_section_relocs = 0;
			continue;
		default:
			/* error msg already printed */
			exit(1);
		}

	printf("%s:\n", objfilename);
	dump_filehdr_info();
	putchar('\n');
	get_int_section_table();
	get_int_symbol_table();
	if (hintsfile)
		read_hints_file(hintsfile);
	extern_profile_report("Module");
	sort_symbols_of_all_sec();
	if (disasm_richsym)
		richsym_initial_preen();
	for (secnum = 0; secnum < nsections; secnum++) {
		sec = sections + secnum;
		printf("=== %s ===\n", sec->name);
		disasm_sec_by_type(sec);
		putchar('\n');
	}
	exit(0);
}