From 693c32daedd583e6cb6457cd922ce35b5f5f6237 Mon Sep 17 00:00:00 2001 From: Pavel Pisa <pisa@cmp.felk.cvut.cz> Date: Fri, 11 Mar 2022 23:05:45 +0100 Subject: [PATCH] qtrvsim/apo-sort: RISC-V version of the task to tune cache and program together. Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz> --- seminaries/qtrvsim/apo-sort/.gitignore | 8 ++ seminaries/qtrvsim/apo-sort/Makefile | 106 ++++++++++++++++++ .../qtrvsim/apo-sort/apo-sort-template.S | 58 ++++++++++ seminaries/qtrvsim/apo-sort/array_data.in | 15 +++ .../qtrvsim/apo-sort/d-cache-template.par | 1 + seminaries/qtrvsim/buble-sort/Makefile | 2 +- .../qtrvsim/buble-sort/bubble-sort-template.S | 2 +- 7 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 seminaries/qtrvsim/apo-sort/.gitignore create mode 100644 seminaries/qtrvsim/apo-sort/Makefile create mode 100644 seminaries/qtrvsim/apo-sort/apo-sort-template.S create mode 100644 seminaries/qtrvsim/apo-sort/array_data.in create mode 100644 seminaries/qtrvsim/apo-sort/d-cache-template.par diff --git a/seminaries/qtrvsim/apo-sort/.gitignore b/seminaries/qtrvsim/apo-sort/.gitignore new file mode 100644 index 0000000..a32b5ef --- /dev/null +++ b/seminaries/qtrvsim/apo-sort/.gitignore @@ -0,0 +1,8 @@ +*.o +depend +apo-sort +array_data.out +array_size.in +array_data.ref + + diff --git a/seminaries/qtrvsim/apo-sort/Makefile b/seminaries/qtrvsim/apo-sort/Makefile new file mode 100644 index 0000000..996107d --- /dev/null +++ b/seminaries/qtrvsim/apo-sort/Makefile @@ -0,0 +1,106 @@ +ARCH=riscv64-unknown-elf + +SOURCES = apo-sort.S +TARGET_EXE = apo-sort + +CC=$(ARCH)-gcc +CXX=$(ARCH)-g++ +AS=$(ARCH)-as +LD=$(ARCH)-ld +OBJCOPY=$(ARCH)-objcopy + +ARCHFLAGS += -mabi=ilp32 +ARCHFLAGS += -march=rv32i +ARCHFLAGS += -fno-lto + +CFLAGS += -ggdb -Os -Wall +CXXFLAGS+= -ggdb -Os -Wall +AFLAGS += -ggdb +LDFLAGS += -ggdb +LDFLAGS += -nostartfiles +LDFLAGS += -nostdlib +LDFLAGS += -static +#LDFLAGS += -specs=/opt/musl/riscv64-linux-gnu/lib/musl-gcc.specs + +CFLAGS += $(ARCHFLAGS) +CXXFLAGS+= $(ARCHFLAGS) +AFLAGS += $(ARCHFLAGS) +LDFLAGS += $(ARCHFLAGS) + +OBJECTS += $(filter %.o,$(SOURCES:%.S=%.o)) +OBJECTS += $(filter %.o,$(SOURCES:%.c=%.o)) +OBJECTS += $(filter %.o,$(SOURCES:%.cpp=%.o)) + +all : default + +.PHONY : default clean dep all run_test + +%.o:%.S + $(CC) -D__ASSEMBLY__ $(AFLAGS) -c $< -o $@ + +%.o:%.c + $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ + +%.o:%.cpp + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -c $< + +%.s:%.c + $(CC) $(CFLAGS) $(CPPFLAGS) -S $< -o $@ + +#default : $(TARGET_EXE) +default : run_test + +$(TARGET_EXE) : $(OBJECTS) + $(CC) $(LDFLAGS) $^ -o $@ + +dep: depend + +depend: $(SOURCES) $(glob *.h) + echo '# autogenerated dependencies' > depend +ifneq ($(filter %.S,$(SOURCES)),) + $(CC) -D__ASSEMBLY__ $(AFLAGS) -w -E -M $(filter %.S,$(SOURCES)) \ + >> depend +endif +ifneq ($(filter %.c,$(SOURCES)),) + $(CC) $(CFLAGS) $(CPPFLAGS) -w -E -M $(filter %.c,$(SOURCES)) \ + >> depend +endif +ifneq ($(filter %.cpp,$(SOURCES)),) + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -w -E -M $(filter %.cpp,$(SOURCES)) \ + >> depend +endif + +clean: + rm -f *.o *.a $(OBJECTS) $(TARGET_EXE) depend array_data.out array_data.ref array_size.in + +#riscv64-unknown-elf-objdump --source bubble-sort + +ARRAY_DATA_IN_FILE=array_data.in +ARRAY_SIZE:=$(words $(shell cat $(ARRAY_DATA_IN_FILE))) +ARRAY_BYTES:=$(words $(shell seq 1 $(ARRAY_SIZE)) $(shell seq 1 $(ARRAY_SIZE)) $(shell seq 1 $(ARRAY_SIZE)) $(shell seq 1 $(ARRAY_SIZE))) +D_CACHE_PAR:=$(shell sed -n -e 's/^[a-z]*,[0-9]*,[0-9]*,[0-9]*,[a-z]*/&/p' d-cache.par) +ifeq ($(D_CACHE_PAR),) +$(error cache parameters cannot be parsed $(D_CACHE_PAR)) +endif +D_CACHE_SIZE_OK:=$(shell echo $(D_CACHE_PAR) | \ + sed -n -e 's/^[a-z]*,\([0-9]*\),\([0-9]*\),\([0-9]*\),[a-z]*/\1*\2*\3<=16/p' | bc) +ifneq ($(D_CACHE_SIZE_OK),1) +$(error cache parameters probably request too many words $(D_CACHE_PAR)) +endif + + +run_test: $(TARGET_EXE) $(ARRAY_DATA_IN_FILE) + echo $(ARRAY_SIZE) >array_size.in + sort <array_data.in >array_data.ref + qtrvsim_cli --dump-cycles $< \ + --dump-cache-stats \ + --load-range array_size,array_size.in \ + --load-range array_start,$(ARRAY_DATA_IN_FILE) \ + --dump-range array_start,$(ARRAY_BYTES),array_data.out \ + --d-cache "$(D_CACHE_PAR)" \ + --read-time 10 \ + --write-time 10 \ + --burst-time 2 + diff -u -B -b array_data.out array_data.ref + +-include depend diff --git a/seminaries/qtrvsim/apo-sort/apo-sort-template.S b/seminaries/qtrvsim/apo-sort/apo-sort-template.S new file mode 100644 index 0000000..eed2974 --- /dev/null +++ b/seminaries/qtrvsim/apo-sort/apo-sort-template.S @@ -0,0 +1,58 @@ +// apo-sort.S file template, rename and implement the algorithm +// Test algorithm in qtrvsim_gui program +// Select the CPU core configuration with delay-slot +// This setups requires (for simplicity) one NOP instruction after +// each branch and jump instruction (more during lecture about pipelining) +// The code will be compiled and tested by external riscv64-unknown-elf-gcc +// compiler by teachers, you can try make in addition, but testing +// by internal assembler should be enough + +// copy directory with the project to your repository to +// the directory work/apo-sort +// critical is location of the file work/apo-sort/apo-sort.S +// and cache parameters work/apo-sort/d-cache.par +// which is checked by the scripts + +// The file d-cache.par specifies D cache parameters in the form +// <policy>,<#sets>,<#words in block>,<#ways>,<write method> +// The example is +// lru,1,1,1,wb +// The cache size is limited to 16 words maximum. + +// Directives to make interesting windows visible +#pragma qtrvsim show registers +#pragma qtrvsim show memory + +.option norelax + +.globl array_size +.globl array_start + +.text +.globl _start + +_start: + + la a0, array_start + la a1, array_size + lw a1, 0(a1) // number of elements in the array + +//Insert your code there + +//Final infinite loop +end_loop: + fence // flush cache memory + ebreak // stop the simulator + j end_loop + + +.data +// .align 2 // not supported by qtrvsim yet + +array_size: +.word 15 +array_start: +.word 5, 3, 4, 1, 15, 8, 9, 2, 10, 6, 11, 1, 6, 9, 12 + +// Specify location to show in memory window +#pragma qtrvsim focus memory array_size diff --git a/seminaries/qtrvsim/apo-sort/array_data.in b/seminaries/qtrvsim/apo-sort/array_data.in new file mode 100644 index 0000000..d8b2010 --- /dev/null +++ b/seminaries/qtrvsim/apo-sort/array_data.in @@ -0,0 +1,15 @@ +0x00000022 +0x00000055 +0x00000060 +0x12345678 +0x12345676 +0x00000012 +0x00000008 +0x000000ac +0x33333333 +0x02000010 +0x00008382 +0x12375310 +0x00012340 +0x00020202 +0x00028288 diff --git a/seminaries/qtrvsim/apo-sort/d-cache-template.par b/seminaries/qtrvsim/apo-sort/d-cache-template.par new file mode 100644 index 0000000..89c0fd2 --- /dev/null +++ b/seminaries/qtrvsim/apo-sort/d-cache-template.par @@ -0,0 +1 @@ +lru,1,1,1,wb diff --git a/seminaries/qtrvsim/buble-sort/Makefile b/seminaries/qtrvsim/buble-sort/Makefile index 17d987c..50337ba 100644 --- a/seminaries/qtrvsim/buble-sort/Makefile +++ b/seminaries/qtrvsim/buble-sort/Makefile @@ -79,7 +79,7 @@ ARRAY_DATA_IN_FILE=array_data.in ARRAY_SIZE:=$(words $(shell cat $(ARRAY_DATA_IN_FILE))) ARRAY_BYTES:=$(words $(shell seq 1 $(ARRAY_SIZE)) $(shell seq 1 $(ARRAY_SIZE)) $(shell seq 1 $(ARRAY_SIZE)) $(shell seq 1 $(ARRAY_SIZE))) -run_test: $(TARGET_EXE) +run_test: $(TARGET_EXE) $(ARRAY_DATA_IN_FILE) echo $(ARRAY_SIZE) >array_size.in sort <array_data.in >array_data.ref qtrvsim_cli --dump-cycles $< \ diff --git a/seminaries/qtrvsim/buble-sort/bubble-sort-template.S b/seminaries/qtrvsim/buble-sort/bubble-sort-template.S index b0a9e3d..1e71ed5 100644 --- a/seminaries/qtrvsim/buble-sort/bubble-sort-template.S +++ b/seminaries/qtrvsim/buble-sort/bubble-sort-template.S @@ -3,7 +3,7 @@ // Select the CPU core configuration with delay-slot // This setups requires (for simplicity) one NOP instruction after // each branch and jump instruction (more during lecture about pipelining) -// The code will be compiled and tested by external mips-elf-gcc +// The code will be compiled and tested by external riscv64-unknown-elf-gcc // compiler by teachers, you can try make in addition, but testing // by internal assembler should be enough -- GitLab