ab_contracts_tooling/
convert.rs

1//! Convert RISC-V ELF `cdylib` into Abundance contract file format
2
3use ab_aligned_buffer::SharedAlignedBuffer;
4use ab_contract_file::{CONTRACT_FILE_MAGIC, ContractFileHeader, ContractFileMethodMetadata};
5use ab_contracts_common::metadata::decode::MetadataDecoder;
6use ab_contracts_common::{HOST_CALL_FN, HOST_CALL_FN_IMPORT, METADATA_STATIC_NAME_PREFIX};
7use ab_io_type::trivial_type::TrivialType;
8use anyhow::Context;
9use object::elf::{
10    EF_RISCV_RVE, ELFCLASS64, ELFDATA2LSB, ELFMAG, ELFOSABI_GNU, EM_RISCV, ET_DYN, FileHeader64,
11    Ident, R_RISCV_JUMP_SLOT, SHN_LORESERVE, STB_GLOBAL, STV_DEFAULT,
12};
13use object::read::elf::{ElfFile, ElfFile64};
14use object::{
15    CompressedData, CompressionFormat, LittleEndian, Object, ObjectSection, ObjectSymbol,
16    ObjectSymbolTable, RelocationFlags, RelocationTarget, SymbolKind, SymbolSection, U16, U32, U64,
17};
18use std::collections::HashMap;
19use std::iter;
20use tracing::{debug, trace};
21
22fn is_correct_header(header: &FileHeader64<LittleEndian>) -> bool {
23    let expected_header = FileHeader64 {
24        e_ident: Ident {
25            magic: ELFMAG,
26            class: ELFCLASS64,
27            data: ELFDATA2LSB,
28            version: 1,
29            os_abi: ELFOSABI_GNU,
30            abi_version: 0,
31            padding: [0; _],
32        },
33        e_type: U16::new(LittleEndian, ET_DYN),
34        e_machine: U16::new(LittleEndian, EM_RISCV),
35        e_version: U32::new(LittleEndian, 1),
36        e_entry: U64::new(LittleEndian, 0),
37        e_phoff: header.e_phoff,
38        e_shoff: header.e_shoff,
39        e_flags: U32::new(LittleEndian, EF_RISCV_RVE),
40        e_ehsize: U16::new(LittleEndian, 64),
41        e_phentsize: header.e_phentsize,
42        e_phnum: header.e_phnum,
43        e_shentsize: header.e_shentsize,
44        e_shnum: header.e_shnum,
45        e_shstrndx: header.e_shstrndx,
46    };
47
48    // Should have been just `==`, but https://github.com/gimli-rs/object/issues/830
49    object::pod::bytes_of(header) == object::pod::bytes_of(&expected_header)
50}
51
52fn check_relocations(elf: &ElfFile<'_, FileHeader64<LittleEndian>>) -> anyhow::Result<()> {
53    let mut dynamic_relocations = elf.dynamic_relocations().into_iter().flatten();
54    let maybe_first_relocation = dynamic_relocations.next();
55
56    if dynamic_relocations.next().is_some() {
57        return Err(anyhow::anyhow!(
58            "Only a single PLT relocation for host function call import is allowed, make sure to \
59            build an optimized cdylib"
60        ));
61    }
62
63    let Some((address, relocation)) = maybe_first_relocation else {
64        return Ok(());
65    };
66
67    debug!(
68        %address,
69        ?relocation,
70        "Found a single relocation"
71    );
72
73    // TODO: There is no such relocation in `object` crate yet:
74    //  https://github.com/gimli-rs/object/issues/833
75    if relocation.flags()
76        != (RelocationFlags::Elf {
77            r_type: R_RISCV_JUMP_SLOT,
78        })
79    {
80        return Err(anyhow::anyhow!("Unexpected relocation: {relocation:?}"));
81    }
82
83    let RelocationTarget::Symbol(symbol_index) = relocation.target() else {
84        return Err(anyhow::anyhow!(
85            "Only a single PLT relocation for host function call import is allowed, make sure to \
86            build an optimized cdylib"
87        ));
88    };
89
90    let sym = elf
91        .dynamic_symbol_table()
92        .context("Failed to get dynamic symbol table")?
93        .symbol_by_index(symbol_index)
94        .context("Failed to get relocation symbol by its index")?;
95
96    let name = sym
97        .name()
98        .with_context(|| format!("Failed to get relocation symbol name: {relocation:?} {sym:?}"))?;
99    debug!(
100        %name,
101        "PLT relocation name"
102    );
103
104    if name != HOST_CALL_FN_IMPORT {
105        return Err(anyhow::anyhow!(
106            "Unexpected PLT relocation name {name}: {relocation:?} {sym:?}"
107        ));
108    }
109
110    if relocation.addend() != 0 || relocation.has_implicit_addend() {
111        return Err(anyhow::anyhow!(
112            "Unexpected PLT relocation {name}: {relocation:?} {sym:?}"
113        ));
114    }
115
116    Ok(())
117}
118
119#[derive(Debug, Copy, Clone)]
120struct ParsedSections {
121    /// Offset of the `ab-contract-metadata` section in the input file, relative to the beginning
122    /// of the file
123    metadata_section_offset: u64,
124    /// Size of the metadata section in the input file
125    metadata_section_size: u64,
126    /// Offset of the `.rodata` section in the input file, relative to the beginning of the file
127    rodata_section_offset: u64,
128    /// Size of the read-only data section in the input file
129    rodata_section_size: u64,
130    /// Padding between `ab-contract-metadata` and `.rodata` sections (if any) in the input file
131    metadata_rodata_padding: u64,
132    /// Size of the read-only memory region once `ab-contract-metadata` and `.rodata` sections are
133    /// loaded into memory, includes possible padding at the end before the `.text` section
134    ro_memory_size: u64,
135    /// Offset of the `.text` section in the input file, relative to the beginning of the file
136    code_section_offset: u64,
137    /// Size of the `.text` section in the input file
138    code_section_size: u64,
139}
140
141fn parse_sections(elf: &ElfFile<'_, FileHeader64<LittleEndian>>) -> anyhow::Result<ParsedSections> {
142    let mut maybe_metadata_section = None;
143    let mut maybe_rodata_section = None;
144    let mut maybe_code_section = None;
145
146    for section in elf.sections() {
147        // TODO: This log is not very usable right now:
148        //  https://github.com/gimli-rs/object/issues/834
149        // trace!(?section, "Processing section");
150        trace!(name = %section.name().unwrap_or_default(), "Processing section");
151
152        match section.name().context("Failed to get section name")? {
153            "ab-contract-metadata" => {
154                let CompressedData {
155                    format,
156                    data: _,
157                    uncompressed_size,
158                } = section
159                    .compressed_data()
160                    .context("Failed to get section data")?;
161                if !matches!(format, CompressionFormat::None) {
162                    return Err(anyhow::anyhow!(
163                        "Section `ab-contract-metadata` is compressed with {format:?}, but shouldn't be"
164                    ));
165                }
166                if uncompressed_size != section.size() {
167                    return Err(anyhow::anyhow!(
168                        "Section `ab-contract-metadata` has unexpected paddings: file size \
169                        {uncompressed_size} != in-memory size {}",
170                        section.size()
171                    ));
172                }
173                maybe_metadata_section.replace(section);
174            }
175            ".rodata" => {
176                let CompressedData {
177                    format,
178                    data: _,
179                    uncompressed_size,
180                } = section
181                    .compressed_data()
182                    .context("Failed to get section data")?;
183                if !matches!(format, CompressionFormat::None) {
184                    return Err(anyhow::anyhow!(
185                        "Section `.rodata` is compressed with {format:?}, but shouldn't be"
186                    ));
187                }
188                if uncompressed_size != section.size() {
189                    return Err(anyhow::anyhow!(
190                        "Section `.rodata` has unexpected paddings: file size \
191                        {uncompressed_size} != in-memory size {}",
192                        section.size()
193                    ));
194                }
195                maybe_rodata_section.replace(section);
196            }
197            ".text" => {
198                let CompressedData {
199                    format,
200                    data: _,
201                    uncompressed_size,
202                } = section
203                    .compressed_data()
204                    .context("Failed to get section data")?;
205                if !matches!(format, CompressionFormat::None) {
206                    return Err(anyhow::anyhow!(
207                        "Section `.text` is compressed with {format:?}, but shouldn't be"
208                    ));
209                }
210                if uncompressed_size != section.size() {
211                    return Err(anyhow::anyhow!(
212                        "Section `.text` has unexpected paddings: file size \
213                        {uncompressed_size} != in-memory size {}",
214                        section.size()
215                    ));
216                }
217                maybe_code_section.replace(section);
218            }
219            _ => {
220                // Ignore everything else
221            }
222        }
223    }
224
225    let Some(metadata_section) = maybe_metadata_section else {
226        return Err(anyhow::anyhow!("Section `ab-contract-metadata` not found"));
227    };
228    let Some(code_section) = maybe_code_section else {
229        return Err(anyhow::anyhow!("Section `.text` not found"));
230    };
231
232    let metadata_section_address = metadata_section.address();
233    let (metadata_section_offset, metadata_section_size) = metadata_section
234        .file_range()
235        .context("Failed to get `ab-contract-metadata` section range")?;
236    let code_section_address = code_section.address();
237    let (code_section_offset, code_section_size) = code_section
238        .file_range()
239        .context("Failed to get `.text` section range")?;
240
241    let (rodata_section_address, (rodata_section_offset, rodata_section_size)) =
242        match maybe_rodata_section {
243            Some(rodata_section) => (
244                rodata_section.address(),
245                rodata_section
246                    .file_range()
247                    .context("Failed to get `.rodata` section range")?,
248            ),
249            None => (metadata_section_address, (metadata_section_offset, 0)),
250        };
251
252    if metadata_section_offset.max(rodata_section_offset) > code_section_offset {
253        return Err(anyhow::anyhow!(
254            "`.text` section must be after `.rodata` and `ab-contract-metadata` sections: \
255            metadata_section_offset={metadata_section_offset}, \
256            rodata_section_offset={rodata_section_offset}, \
257            code_section_offset={code_section_offset}"
258        ));
259    }
260
261    // Calculate in-memory read-only data size from addresses, such that after loading everything is
262    // correct relatively to each other, even though some bytes may, technically, not belong to the
263    // original read-only memory as such
264    let Some(ro_memory_size) =
265        code_section_address.checked_sub(metadata_section_address.min(rodata_section_address))
266    else {
267        return Err(anyhow::anyhow!(
268            "`.text` section must be after `.rodata` and `ab-contract-metadata` sections: \
269            metadata_section_address={metadata_section_address}, \
270            rodata_section_address={rodata_section_address}, \
271            code_section_address={code_section_address}"
272        ));
273    };
274
275    let metadata_rodata_padding = if metadata_section_address < rodata_section_address {
276        (rodata_section_address - metadata_section_address) - metadata_section_size
277    } else {
278        (metadata_section_address - rodata_section_address) - rodata_section_size
279    };
280
281    Ok(ParsedSections {
282        metadata_section_offset,
283        metadata_section_size,
284        rodata_section_offset,
285        rodata_section_size,
286        metadata_rodata_padding,
287        ro_memory_size,
288        code_section_offset,
289        code_section_size,
290    })
291}
292
293fn check_imports(elf: &ElfFile<'_, FileHeader64<LittleEndian>>) -> anyhow::Result<()> {
294    let imports = elf.imports().context("Failed to get imports")?;
295
296    if imports.len() > 1 {
297        return Err(anyhow::anyhow!(
298            "Expected at most one import, got {}",
299            imports.len()
300        ));
301    }
302
303    if let Some(import) = imports.into_iter().next()
304        && import.name() != HOST_CALL_FN_IMPORT.as_bytes()
305    {
306        return Err(anyhow::anyhow!(
307            "Expected import `{HOST_CALL_FN_IMPORT}`, got `{}`",
308            String::from_utf8_lossy(import.name())
309        ));
310    }
311
312    Ok(())
313}
314
315#[derive(Debug, Copy, Clone)]
316struct ParsedExport {
317    offset: u64,
318    size: u64,
319}
320
321fn parse_exports<'a>(
322    elf: &'a ElfFile<'a, FileHeader64<LittleEndian>>,
323) -> anyhow::Result<HashMap<&'a str, ParsedExport>> {
324    elf.dynamic_symbols()
325        .enumerate()
326        .filter_map(|(index, symbol)| {
327            // TODO: This log is not very usable right now:
328            //  https://github.com/gimli-rs/object/issues/834
329            // trace!(
330            //     %index,
331            //     ?symbol,
332            //     "Processing symbol"
333            // );
334
335            let name = match symbol.name() {
336                Ok(name) => name,
337                Err(error) => return Some(Err(error).context("Failed to get symbol name")),
338            };
339            let elf_symbol = symbol.elf_symbol();
340
341            if elf_symbol.st_bind() != STB_GLOBAL {
342                return Some(Err(anyhow::anyhow!(
343                    "Non-STB_GLOBAL symbol {name}: {symbol:?}"
344                )));
345            }
346            if elf_symbol.st_other != STV_DEFAULT {
347                return Some(Err(anyhow::anyhow!(
348                    "Non-STV_DEFAULT symbol {name}: {symbol:?}"
349                )));
350            }
351            if elf_symbol.st_shndx.get(LittleEndian) >= SHN_LORESERVE {
352                return Some(Err(anyhow::anyhow!(
353                    "Unexpected reserved section index for symbol {name}: {symbol:?}"
354                )));
355            }
356
357            match symbol.kind() {
358                SymbolKind::Unknown => {
359                    if !(symbol.size() == 0 && name == HOST_CALL_FN_IMPORT) {
360                        return Some(Err(anyhow::anyhow!(
361                            "Unexpected unknown symbol {name}: {symbol:?}"
362                        )));
363                    }
364
365                    None
366                }
367                SymbolKind::Text => {
368                    let SymbolSection::Section(section_index) = symbol.section() else {
369                        return Some(Err(anyhow::anyhow!(
370                            "Unexpected section type for symbol {name}: {symbol:?}"
371                        )));
372                    };
373                    let section = match elf.section_by_index(section_index) {
374                        Ok(section) => section,
375                        Err(error) => {
376                            return Some(Err(error).context(format!(
377                                "Failed to get section {section_index} for symbol {name}"
378                            )));
379                        }
380                    };
381                    let Some(offset_within_section) =
382                        symbol.address().checked_sub(section.address())
383                    else {
384                        return Some(Err(anyhow::anyhow!(
385                            "Invalid offset calculation for symbol {name}: \
386                            address {} < section address {}",
387                            symbol.address(),
388                            section.address()
389                        )));
390                    };
391
392                    let Some((section_offset, _section_size)) = section.file_range() else {
393                        return Some(Err(anyhow::anyhow!(
394                            "Failed to get file range for section {section_index} for symbol {name}"
395                        )));
396                    };
397                    let offset = section_offset + offset_within_section;
398                    let size = symbol.size();
399                    debug!(
400                        %index,
401                        %name,
402                        %offset,
403                        %size,
404                        "Found export function"
405                    );
406
407                    Some(Ok((name, ParsedExport { offset, size })))
408                }
409                SymbolKind::Data => {
410                    if !name.starts_with(METADATA_STATIC_NAME_PREFIX) {
411                        return Some(Err(anyhow::anyhow!(
412                            "Unexpected STT_OBJECT {name}: {symbol:?}"
413                        )));
414                    }
415
416                    None
417                }
418                _ => Some(Err(anyhow::anyhow!("Unexpected symbol {name}: {symbol:?}"))),
419            }
420        })
421        .collect()
422}
423
424fn extract_host_call_fn_offset(
425    input_file: &[u8],
426    parsed_exports: &mut HashMap<&str, ParsedExport>,
427) -> anyhow::Result<u64> {
428    let Some(host_call_fn) = parsed_exports.remove(HOST_CALL_FN) else {
429        return Ok(0);
430    };
431
432    if host_call_fn.size != size_of::<[u32; 2]>() as u64 {
433        return Err(anyhow::anyhow!(
434            "Host call function {HOST_CALL_FN} has invalid size {}",
435            host_call_fn.size
436        ));
437    }
438    let host_call_fn_offset = host_call_fn.offset;
439    input_file
440        .get(host_call_fn_offset as usize..)
441        .with_context(|| {
442            format!(
443                "Host call address {host_call_fn_offset} out of range of input file ({} bytes)",
444                input_file.len()
445            )
446        })?
447        .get(..size_of::<[u32; 2]>())
448        .context("Not enough bytes to get instructions of host call function")?;
449
450    Ok(host_call_fn_offset)
451}
452
453fn parse_metadata_methods(
454    parsed_exports: &mut HashMap<&str, ParsedExport>,
455    metadata_bytes: &[u8],
456) -> anyhow::Result<Vec<ParsedExport>> {
457    let mut metadata_methods = Vec::new();
458
459    let mut metadata_decoder = MetadataDecoder::new(metadata_bytes);
460
461    while let Some(maybe_metadata_item) = metadata_decoder.decode_next() {
462        let metadata_item = maybe_metadata_item.map_err(|error| {
463            anyhow::Error::msg(error.to_string()).context("Failed to decode metadata item")
464        })?;
465        debug!(?metadata_item, "Decoded metadata item");
466
467        let mut methods_metadata_decoder = metadata_item.into_decoder();
468        while let Some(method_metadata_decoder) = methods_metadata_decoder.decode_next() {
469            let (_, method_metadata_item) =
470                method_metadata_decoder.decode_next().map_err(|error| {
471                    anyhow::Error::msg(error.to_string())
472                        .context("Failed to decode method metadata")
473                })?;
474
475            trace!(?method_metadata_item, "Decoded method metadata item");
476
477            let method_name =
478                str::from_utf8(method_metadata_item.method_name).with_context(|| {
479                    format!(
480                        "Non-UTF-8 method name: {:?}",
481                        method_metadata_item.method_name
482                    )
483                })?;
484            let symbol = parsed_exports
485                .remove(method_name)
486                .with_context(|| anyhow::anyhow!("Method {method_name} not found in symbols"))?;
487
488            metadata_methods.push(symbol);
489        }
490    }
491
492    Ok(metadata_methods)
493}
494
495/// Convert RISC-V ELF `cdylib` into Abundance contract file format
496pub fn convert(input_file: &[u8]) -> anyhow::Result<Vec<u8>> {
497    let buffer = SharedAlignedBuffer::from_bytes(input_file);
498    let elf =
499        ElfFile64::<LittleEndian>::parse(buffer.as_slice()).context("Failed to parse ELF file")?;
500
501    if !is_correct_header(elf.elf_header()) {
502        return Err(anyhow::anyhow!(
503            "Invalid ELF header: {:?}",
504            elf.elf_header()
505        ));
506    }
507
508    check_relocations(&elf)?;
509    let ParsedSections {
510        metadata_section_offset,
511        metadata_section_size,
512        rodata_section_offset,
513        rodata_section_size,
514        metadata_rodata_padding,
515        ro_memory_size,
516        code_section_offset,
517        code_section_size,
518    } = parse_sections(&elf)?;
519
520    if metadata_section_size == 0 {
521        return Err(anyhow::anyhow!("Metadata not found"));
522    }
523
524    check_imports(&elf)?;
525
526    let mut parsed_exports = parse_exports(&elf)?;
527
528    let host_call_fn_offset = extract_host_call_fn_offset(input_file, &mut parsed_exports)?;
529
530    if host_call_fn_offset != 0 && host_call_fn_offset < code_section_offset {
531        return Err(anyhow::anyhow!(
532            "Host call function offset {host_call_fn_offset} is before `.text` section offset \
533            {code_section_offset}"
534        ));
535    }
536
537    let metadata_bytes = input_file
538        .get(metadata_section_offset as usize..)
539        .with_context(|| {
540            format!(
541                "Metadata offset {metadata_section_offset} out of range of input file ({} bytes)",
542                input_file.len()
543            )
544        })?
545        .get(..metadata_section_size as usize)
546        .with_context(|| format!("Metadata size {metadata_section_size} is invalid"))?;
547
548    let metadata_methods = parse_metadata_methods(&mut parsed_exports, metadata_bytes)?;
549
550    if !parsed_exports.is_empty() {
551        return Err(anyhow::anyhow!("Found unused exports: {parsed_exports:?}"));
552    }
553
554    let header_size = size_of::<ContractFileHeader>();
555    let methods_metadata_size = size_of::<ContractFileMethodMetadata>() * metadata_methods.len();
556    let header_with_methods_metadata_size = (header_size + methods_metadata_size) as u64;
557
558    let mut output_file = Vec::new();
559
560    // Write file header
561    let contract_file_header = ContractFileHeader {
562        magic: CONTRACT_FILE_MAGIC,
563        read_only_section_file_size: (metadata_section_size
564            + rodata_section_size
565            + metadata_rodata_padding)
566            .try_into()
567            .context("Read-only section size is over 32-bit")?,
568        read_only_section_memory_size: ro_memory_size
569            .try_into()
570            .context("Read-only section size is over 32-bit")?,
571        metadata_offset: {
572            let metadata_offset = if metadata_section_offset < rodata_section_offset {
573                header_with_methods_metadata_size
574            } else {
575                header_with_methods_metadata_size + rodata_section_size + metadata_rodata_padding
576            };
577
578            metadata_offset
579                .try_into()
580                .context("Metadata offset is over 32-bit")?
581        },
582        metadata_size: metadata_section_size
583            .try_into()
584            .context("Metadata size is over 16-bit")?,
585        num_methods: metadata_methods
586            .len()
587            .try_into()
588            .context("Number of methods is over 16-bit")?,
589        host_call_fn_offset: {
590            let host_call_fn_offset = if host_call_fn_offset == 0 {
591                0
592            } else {
593                header_with_methods_metadata_size
594                    + (metadata_section_size + rodata_section_size + metadata_rodata_padding)
595                    + (host_call_fn_offset - code_section_offset)
596            };
597
598            host_call_fn_offset
599                .try_into()
600                .context("Host call offset is over 32-bit")?
601        },
602    };
603    output_file.extend_from_slice(contract_file_header.as_bytes());
604
605    // Write metadata of each method
606    for metadata_method in metadata_methods {
607        let offset = header_with_methods_metadata_size
608            + (metadata_section_size + rodata_section_size + metadata_rodata_padding)
609            + (metadata_method.offset - code_section_offset);
610        let contract_file_function_metadata = ContractFileMethodMetadata {
611            offset: offset.try_into().context("Method offset is over 32-bit")?,
612            size: metadata_method
613                .size
614                .try_into()
615                .context("Method size is over 32-bit")?,
616        };
617        output_file.extend_from_slice(contract_file_function_metadata.as_bytes());
618    }
619
620    // Write `ab-contract-metadata` and `.rodata` sections with possible padding between them
621    if metadata_section_offset < rodata_section_offset {
622        output_file.extend_from_slice(
623            &input_file[metadata_section_offset as usize..][..metadata_section_size as usize],
624        );
625        output_file.extend(iter::repeat_n(0, metadata_rodata_padding as usize));
626        output_file.extend_from_slice(
627            &input_file[rodata_section_offset as usize..][..rodata_section_size as usize],
628        );
629    } else {
630        output_file.extend_from_slice(
631            &input_file[rodata_section_offset as usize..][..rodata_section_size as usize],
632        );
633        output_file.extend(iter::repeat_n(0, metadata_rodata_padding as usize));
634        output_file.extend_from_slice(
635            &input_file[metadata_section_offset as usize..][..metadata_section_size as usize],
636        );
637    }
638
639    // Write `.text` section
640    output_file.extend_from_slice(
641        &input_file[code_section_offset as usize..][..code_section_size as usize],
642    );
643
644    // TODO: Compress with zstd? If so, then read-only data can be expanded to the real size from
645    //  the very beginning, such that after decompression it'll already have correct layout.
646    Ok(output_file)
647}