Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/ppcvletoolchain #338

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions .github/workflows/test-all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,12 @@ jobs:
- name: Build base image
# Always rebuild the base image when the scheduled workflow runs
if: inputs.flush_cache || steps.cache-image.outputs.cache-hit != 'true' || github.event_name == 'schedule'
env:
NXP_EMAIL: ${{ secrets.NXP_EMAIL }}
NXP_PASSWORD: ${{ secrets.NXP_PASSWORD }}
run: |
python3 -m pip install PyYAML
DOCKER_BUILDKIT=1 python3 build_image.py --config ofrak-core-dev.yml --base
DOCKER_BUILDKIT=1 python3 build_image.py --config ofrak-core-dev.yml --base --nxp-email "${NXP_EMAIL}" --nxp-password "${NXP_PASSWORD}"
- name: Export base image
if: inputs.flush_cache || steps.cache-image.outputs.cache-hit != 'true' || github.event_name == 'schedule'
run: |
Expand Down Expand Up @@ -113,14 +116,19 @@ jobs:
| docker load
docker images
- name: Build Ghidra image
env:
NXP_EMAIL: ${{ secrets.NXP_EMAIL }}
NXP_PASSWORD: ${{ secrets.NXP_PASSWORD }}
run: |
python3 -m pip install PyYAML
DOCKER_BUILDKIT=1 \
python3 build_image.py \
--config ofrak-ghidra.yml \
--base \
--finish \
--cache-from redballoonsecurity/ofrak/core-dev-base:latest
--cache-from redballoonsecurity/ofrak/core-dev-base:latest \
--nxp-email "${NXP_EMAIL}" \
--nxp-password "${NXP_PASSWORD}"
- name: Test documentation
run: |
docker run \
Expand Down
29 changes: 29 additions & 0 deletions build_image.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import tempfile
from dataclasses import dataclass
from enum import Enum
from typing import List, Optional
Expand Down Expand Up @@ -35,6 +36,8 @@ class OfrakImageConfig:
install_target: InstallTarget
cache_from: List[str]
entrypoint: Optional[str]
nxp_email: Optional[str]
nxp_password: Optional[str]

def validate_serial_txt_existence(self):
"""
Expand Down Expand Up @@ -81,11 +84,27 @@ def main():
for cache in config.cache_from:
cache_args.append("--cache-from")
cache_args.append(cache)
nxp_args = []
email_file = password_file = None
if config.nxp_email and config.nxp_password:
email_file = tempfile.NamedTemporaryFile(suffix=".txt", mode="w+")
email_file.write(config.nxp_email)
email_file.flush()
password_file = tempfile.NamedTemporaryFile(suffix=".txt", mode="w+")
password_file.write(config.nxp_password)
password_file.flush()
nxp_args = [
"--secret",
f"id=nxp_email,src={email_file.name}",
"--secret",
f"id=nxp_password,src={password_file.name}",
]
base_command = [
"docker",
"build",
"--build-arg",
"BUILDKIT_INLINE_CACHE=1",
*nxp_args,
"--cache-from",
f"{full_base_image_name}:master",
*cache_args,
Expand All @@ -108,6 +127,9 @@ def main():
print(f"Error running command: '{' '.join(error.cmd)}'")
print(f"Exit status: {error.returncode}")
sys.exit(error.returncode)
if email_file and password_file:
email_file.close()
password_file.close()

if config.build_finish:
full_image_name = "/".join((config.registry, config.image_name))
Expand Down Expand Up @@ -146,7 +168,12 @@ def parse_args() -> OfrakImageConfig:
default=InstallTarget.DEVELOP.value,
)
parser.add_argument("--cache-from", action="append")
parser.add_argument("--nxp-email")
parser.add_argument("--nxp-password")
args = parser.parse_args()
if (not not args.nxp_email) ^ (not not args.nxp_password):
raise RuntimeError("Must include the NXP email and password!")

with open(args.config) as file_handle:
config_dict = yaml.safe_load(file_handle)
image_config = OfrakImageConfig(
Expand All @@ -161,6 +188,8 @@ def parse_args() -> OfrakImageConfig:
InstallTarget(args.target),
args.cache_from,
config_dict.get("entrypoint"),
args.nxp_email,
args.nxp_password,
)
image_config.validate_serial_txt_existence()
return image_config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Tuple, Dict, Union, List, Iterable

from ofrak.core.architecture import ProgramAttributes
from ofrak_type.architecture import InstructionSet, InstructionSetMode
from ofrak_type.architecture import InstructionSet, InstructionSetMode, SubInstructionSet
from ofrak.core.basic_block import BasicBlockUnpacker, BasicBlock
from ofrak.core.instruction import Instruction
from ofrak.resource import ResourceFactory, Resource
Expand Down Expand Up @@ -174,6 +174,18 @@ def _asm_fixups(
operand = re.sub(r"a([0-7])", r"%A\1", operand)
operand = re.sub(r"d([0-7])[bw]?", r"%D\1", operand)
operands += operand
elif program_attrs.sub_isa is SubInstructionSet.PPCVLE:
# in Ghidra, offsets from a register like in `se_stw r0,0x9(r1)` are expressed in words.
# so in this example r0 is stored at r1+0x9*4=r1+0x24
# But it is more natural to express it in bytes, to get the instruction `se_stw r0,0x24(r1)`
# (this is also the convention used by the VLE assembler)

mnemonic = base_mnemonic
operands = re.sub(
r"(.*, )(0x[0-9]+)(\(r[0-9]+\))",
lambda match: match.group(1) + f"0x{int(match.group(2), 0)*4:x}" + match.group(3),
operands,
)
else:
mnemonic = base_mnemonic
return mnemonic, operands
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Optional, List

from ofrak import ResourceFilter
from ofrak.core import CodeRegion
from ofrak.core import CodeRegion, ProgramAttributes, Elf, ElfUnpacker
from ofrak.component.analyzer import Analyzer
from ofrak.component.modifier import Modifier
from ofrak.model.component_model import ComponentConfig
Expand All @@ -33,6 +33,7 @@
OfrakGhidraMixin,
GhidraComponentException,
)
from ofrak_type import InstructionSet, SubInstructionSet

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -106,7 +107,16 @@ async def analyze(

ghidra_project = f"{GHIDRA_REPOSITORY_HOST}:{GHIDRA_REPOSITORY_PORT}/ofrak"

program_name = await self._do_ghidra_import(ghidra_project, full_fname)
program_attributes = None
if resource.has_tag(Elf):
await resource.run(ElfUnpacker)
program_attributes = await resource.analyze(ProgramAttributes)
else:
logging.warning(
f"Could not get ProgramAttributes for resource {resource.get_id()} because it doesn't have the Elf tag."
)

program_name = await self._do_ghidra_import(ghidra_project, full_fname, program_attributes)
await self._do_ghidra_analyze_and_serve(
ghidra_project, program_name, skip_analysis=config is not None
)
Expand All @@ -116,7 +126,7 @@ async def analyze(

return GhidraProject(ghidra_project, f"{GHIDRA_SERVER_HOST}:{GHIDRA_SERVER_PORT}")

async def _do_ghidra_import(self, ghidra_project: str, full_fname: str):
async def _do_ghidra_import(self, ghidra_project: str, full_fname: str, program_attributes):
args = [
ghidra_project,
"-connect",
Expand All @@ -127,6 +137,14 @@ async def _do_ghidra_import(self, ghidra_project: str, full_fname: str):
"-overwrite",
]

if (
program_attributes is not None
and program_attributes.isa == InstructionSet.PPC
and program_attributes.sub_isa == SubInstructionSet.PPCVLE
):
args.extend(["-scriptPath", "'" + (";".join(self._script_directories)) + "'"])
args.extend(["-preScript", "PreAnalyzePPCVLE.java"])

cmd_str = " ".join([GHIDRA_HEADLESS_EXEC] + args)
LOGGER.debug(f"Running command: {cmd_str}")
ghidra_proc = await asyncio.create_subprocess_exec(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import ghidra.app.script.GhidraScript;
import ghidra.program.model.mem.*;
import ghidra.program.model.lang.*;
import ghidra.program.model.pcode.*;
import ghidra.program.model.util.*;
import ghidra.program.model.reloc.*;
import ghidra.program.model.data.*;
import ghidra.program.model.block.*;
import ghidra.program.model.symbol.*;
import ghidra.program.model.scalar.*;
import ghidra.program.model.listing.*;
import ghidra.program.model.address.*;

import java.math.BigInteger;

public class PreAnalyzePPCVLE extends GhidraScript {

@Override
public void run() throws Exception {
try {
// Set the language to PPC VLE
Language language = (Language) getLanguage(new LanguageID("PowerPC:BE:64:VLE-32addr"));
Program p = currentProgram;
p.setLanguage(language, language.getDefaultCompilerSpec().getCompilerSpecID(), false, monitor);
ProgramContext programContext = p.getProgramContext();
// Set the vle bit (Ghidra has a "vle" register for that, but on real devices, the VLE
EdwardLarson marked this conversation as resolved.
Show resolved Hide resolved
// bit is defined per memory page, as a page attribute bit) so that instructions are
// decoded correctly.
for (Register register : programContext.getContextRegisters()) {
if (register.getName().equals("vle")){
RegisterValue newValue = new RegisterValue(programContext.getBaseContextRegister());
BigInteger value = BigInteger.ONE;
newValue = setRegisterValue(newValue, register, value);
programContext.setDefaultDisassemblyContext(newValue);
println("Set the vle bit.");
}
}
} catch(Exception e) {
println(e.toString());
e.printStackTrace(System.out);
throw e;
}
}

private RegisterValue setRegisterValue(RegisterValue registerValue, Register register,
BigInteger value) {
RegisterValue newValue = new RegisterValue(register, value);
return registerValue.combineValues(newValue);
}

}
3 changes: 2 additions & 1 deletion ofrak_core/ofrak/core/elf/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,7 @@ class ElfProgramAttributesAnalyzer(Analyzer[None, ProgramAttributes]):
async def analyze(
self, resource: Resource, config: Optional[ComponentConfig] = None
) -> ProgramAttributes:
elf_resource = await resource.view_as(Elf)
elf_header = await resource.get_only_descendant_as_view(
ElfHeader, r_filter=ResourceFilter.with_tags(ElfHeader)
)
Expand All @@ -379,7 +380,7 @@ async def analyze(

return ProgramAttributes(
elf_header.get_isa(),
None,
await elf_resource.get_sub_isa(),
elf_basic_header.get_bitwidth(),
elf_basic_header.get_endianness(),
None,
Expand Down
25 changes: 24 additions & 1 deletion ofrak_core/ofrak/core/elf/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from ofrak.model.viewable_tag_model import AttributesType

from ofrak_type.architecture import InstructionSet
from ofrak_type.architecture import InstructionSet, SubInstructionSet
from ofrak.core.program import Program
from ofrak.core.program_section import NamedProgramSection, ProgramSegment
from ofrak.model.resource_model import index
Expand Down Expand Up @@ -863,5 +863,28 @@ async def get_program_header_by_index(self, index: int) -> ElfProgramHeader:
),
)

async def get_sub_isa(self) -> Optional[SubInstructionSet]:
elf_header = await self.get_header()
isa = elf_header.get_isa()
if isa == InstructionSet.PPC:
# We can detect whether the elf is PPC VLE by looking at the section header flags, as described here: https://reverseengineering.stackexchange.com/questions/20863/powerpc-elf32-detecting-vle
PF_PPC_VLE = 0x10000000
SHF_PPC_VLE = 0x10000000
ppc_vle = False
program_headers = await self.get_program_headers()
for program_header in program_headers:
if program_header.p_flags & PF_PPC_VLE != 0:
ppc_vle = True
break
if not ppc_vle:
section_headers = await self.get_section_headers()
for section_header in section_headers:
if section_header.sh_flags & SHF_PPC_VLE != 0:
ppc_vle = True
break
if ppc_vle:
return SubInstructionSet.PPCVLE
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not 100% sold on this representation of VLE. We already have it as a "mode" similar to Thumb. Similar to Thumb, the whole binary is not necessarily VLE. More info here: https://www.nxp.com/docs/en/engineering-bulletin/EB687.pdf

The VLE extension may be used globally within an application, or applied only to specific sections of the application. The e200z0 core is an exception, it supports only the VLE instruction set.

We could consider it as both a "mode" and a sub-isa because it sort of acts like both. But maybe we should only consider the binary to have the sub-isa of "PPCVLE" if ALL of the (executable) sections/segments are VLE. Otherwise, it's normal PPC, with some areas that are PPC VLE.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose to use a SubInstructionSet for several reasons:

  • I've never seen an actual target running both VLE and normal PPC instructions. Even though, indeed, the mode can be set per page of memory (https://www.nxp.com/docs/en/supporting-information/VLEPIM.pdf):

    This alternate encoding set is selected on an instruction page basis. A single page attribute bit selects between standard PowerPC Book E instruction encodings and the VLE instructions for the particular page of memory. This page attribute is an extension to the existing PowerPC Book E page attributes. Pages can be freely intermixed, allowing for a mixture of code with both types of encodings

  • tagging a whole executable as PPC VLE, we can force Ghidra to analyze everything as VLE instructions (it has a tendency to prefer decoding normal PPC instructions when it can)
  • even on a target whare all instructions are VLE, not all executable sections and segments would have this VLE flag

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there binaries with executable code in a section which is not marked with that VLE flag?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've only seen ELF files where all code sections are PPC VLE. But I think that in theory an ELF file could contain both PPC and PPC VLE code.

return None


MagicDescriptionIdentifier.register(Elf, lambda s: s.startswith("ELF "))
20 changes: 20 additions & 0 deletions ofrak_patch_maker/Dockerstub
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,23 @@ RUN apt-get update && apt-get install -y gcc-avr binutils-avr avr-libc
RUN if [ "$TARGETARCH" = "amd64" ]; then \
apt-get update && apt-get install -y gcc-10-powerpc-linux-gnu; \
fi;

#PPCVLE 4 NXP GCC Fork
# Only runs if the NXP email and password to log in and download the toolchain are passed to build_image.py via the CLI flags
ARG OFRAK_DIR=.
COPY $OFRAK_DIR/ofrak_patch_maker/download_ppcvle.py /tmp/
RUN --mount=type=secret,id=nxp_email,dst=/tmp/nxp_email.txt \
--mount=type=secret,id=nxp_password,dst=/tmp/nxp_password.txt \
test -f /tmp/nxp_email.txt && \
test -f /tmp/nxp_password.txt && \
python3 -m pip install playwright && \
playwright install --with-deps chromium && \
python3 /tmp/download_ppcvle.py "$(cat /tmp/nxp_email.txt)" "$(cat /tmp/nxp_password.txt)" && \
cd /tmp && \
unzip -q gcc-4.9.4-Ee200-eabivle-x86_64-linux-g2724867.zip && \
mv powerpc-eabivle-4_9 /opt/rbs/toolchain/ && \
rm gcc-4.9.4-Ee200-eabivle-x86_64-linux-g2724867.zip && \
dpkg --add-architecture i386 && \
apt-get update && \
apt-get install -y libc6:i386 libncurses5:i386 libstdc++6:i386 lib32z1 \
|| true
53 changes: 53 additions & 0 deletions ofrak_patch_maker/download_ppcvle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import argparse
import os

from playwright.sync_api import sync_playwright


START_URL = "https://www.nxp.com/design/software/development-software/s32-design-studio-ide/s32-design-studio-for-power-architecture:S32DS-PA"


def run(page, email: str, password: str, outfile: str) -> None:
print(f"Going to page {START_URL}", flush=True)
page.goto(START_URL)
page.get_by_role("listitem").filter(
has_text="Build Tools NXP Embedded GCC for Power Architecture"
).filter(has_text="Linux").get_by_role("link", name="Download", exact=True).click()

print("Signing in", flush=True)
page.locator("#username").click()
page.keyboard.type(email)
page.locator("#password").click()
page.keyboard.type(password)
page.get_by_role("button", name="SIGN IN").click()

print("Accepting terms and conditions", flush=True)
page.get_by_role("button", name="I Accept").click()

print("Waiting for download", flush=True)
with page.expect_download() as download_info:
# Download begins when the page is loaded
pass
os.rename(download_info.value.path(), outfile)

print(f"Complete! Saved to {outfile}", flush=True)


def main(args):
with sync_playwright() as playwright:
browser = playwright.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
run(page, args.email, args.password, args.outfile)
context.close()
browser.close()


if __name__ == "__main__":
argument_parser = argparse.ArgumentParser()
argument_parser.add_argument("email")
argument_parser.add_argument("password")
argument_parser.add_argument(
"-o", "--outfile", default="/tmp/gcc-4.9.4-Ee200-eabivle-x86_64-linux-g2724867.zip"
)
main(argument_parser.parse_args())
7 changes: 7 additions & 0 deletions ofrak_patch_maker/ofrak_patch_maker/toolchain.conf
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,17 @@ COMPILER = /usr/bin/powerpc-linux-gnu-gcc-10
LINKER = /usr/bin/powerpc-linux-gnu-ld
BIN_PARSER = /usr/bin/powerpc-linux-gnu-objdump

[GNU_PPCVLE_4]
PREPROCESSOR = /opt/rbs/toolchain/powerpc-eabivle-4_9/bin/powerpc-eabivle-gcc
COMPILER = /opt/rbs/toolchain/powerpc-eabivle-4_9/bin/powerpc-eabivle-gcc
LINKER = /opt/rbs/toolchain/powerpc-eabivle-4_9/bin/powerpc-eabivle-ld
BIN_PARSER = /opt/rbs/toolchain/powerpc-eabivle-4_9/bin/powerpc-eabivle-objdump

[ASM]
ARM_ASM_PATH = /opt/rbs/toolchain/gcc-arm-none-eabi-10-2020-q4-major/bin/arm-none-eabi-as
X86_64_ASM_PATH = /opt/rbs/toolchain/binutils-2.34/gas/as-new
M68K_ASM_PATH = /usr/bin/m68k-linux-gnu-as
AARCH64_ASM_PATH = /usr/bin/aarch64-linux-gnu-as
AVR_ASM_PATH = /usr/bin/avr-as
PPC_ASM_PATH = /usr/bin/powerpc-linux-gnu-as
PPCVLE_ASM_PATH = /opt/rbs/toolchain/powerpc-eabivle-4_9/bin/powerpc-eabivle-as
Loading