2026-01-22 00:07:29 +07:00

181 lines
5.6 KiB
C++

#pragma once
#include <SdFat.h>
#include <cstring>
#include <iostream>
// Debug logging for serialization corruption investigation
// Set to 1 to enable validation of Thai UTF-8 strings during serialize/deserialize
#define SERIALIZATION_THAI_VALIDATION 0
#if SERIALIZATION_THAI_VALIDATION
#include <Arduino.h>
#endif
namespace serialization {
#if SERIALIZATION_THAI_VALIDATION
// Check if a byte sequence contains Thai UTF-8 with potential corruption
// Returns true if corruption is detected (null bytes within Thai sequences)
static bool checkThaiCorruption(const std::string& s, const char* context) {
const uint8_t* ptr = reinterpret_cast<const uint8_t*>(s.data());
const uint8_t* end = ptr + s.size();
while (ptr < end) {
// Check for Thai UTF-8 start byte (0xE0 for Thai block U+0E00-U+0EFF)
if (*ptr == 0xE0 && (ptr + 3) <= end) {
// Thai characters are 0xE0 0xB8/0xB9 XX
if (ptr[1] == 0x00 || ptr[2] == 0x00) {
// Null byte in Thai sequence = corruption
Serial.printf("[SER] %s CORRUPT Thai @ offset %u: %02X %02X %02X\n",
context, (uint32_t)(ptr - reinterpret_cast<const uint8_t*>(s.data())),
ptr[0], ptr[1], ptr[2]);
return true;
}
}
ptr++;
}
return false;
}
#endif
template <typename T>
static void writePod(std::ostream& os, const T& value) {
os.write(reinterpret_cast<const char*>(&value), sizeof(T));
}
template <typename T>
static void writePod(FsFile& file, const T& value) {
file.write(reinterpret_cast<const uint8_t*>(&value), sizeof(T));
}
template <typename T>
static void readPod(std::istream& is, T& value) {
is.read(reinterpret_cast<char*>(&value), sizeof(T));
}
template <typename T>
static void readPod(FsFile& file, T& value) {
file.read(reinterpret_cast<uint8_t*>(&value), sizeof(T));
}
static void writeString(std::ostream& os, const std::string& s) {
const uint32_t len = s.size();
writePod(os, len);
os.write(s.data(), len);
}
static void writeString(FsFile& file, const std::string& s) {
const uint32_t len = s.size();
#if SERIALIZATION_THAI_VALIDATION
// Check for corruption BEFORE writing to file
checkThaiCorruption(s, "WRITE_BEFORE");
#endif
// Remember position for read-back verification
uint64_t writePos = file.curPosition();
writePod(file, len);
file.write(reinterpret_cast<const uint8_t*>(s.data()), len);
#if SERIALIZATION_THAI_VALIDATION
// Read-back verification: immediately read what we just wrote to detect SD card issues
if (len > 0 && len <= 64) {
uint64_t afterWritePos = file.curPosition();
file.seek(writePos + sizeof(uint32_t)); // Skip the length prefix
uint8_t verifyBuf[64];
size_t bytesRead = file.read(verifyBuf, len);
// Check if read-back matches what we wrote
bool mismatch = false;
if (bytesRead == len) {
for (size_t i = 0; i < len; i++) {
if (verifyBuf[i] != static_cast<uint8_t>(s[i])) {
mismatch = true;
Serial.printf("[SER] READBACK MISMATCH @ %u: wrote %02X, read %02X\n",
(uint32_t)i, static_cast<uint8_t>(s[i]), verifyBuf[i]);
}
}
} else {
Serial.printf("[SER] READBACK: partial read %u/%u\n", (uint32_t)bytesRead, len);
}
// Restore file position
file.seek(afterWritePos);
}
#endif
}
static void readString(std::istream& is, std::string& s) {
uint32_t len;
readPod(is, len);
s.resize(len);
is.read(&s[0], len);
}
static void readString(FsFile& file, std::string& s) {
uint32_t len;
readPod(file, len);
// Sanity check: prevent unreasonably large allocations (max 64KB per string)
if (len > 65536) {
Serial.printf("[SER] readString: length %u exceeds maximum, truncating\n", len);
len = 0;
}
s.resize(len);
if (len > 0) {
#if SERIALIZATION_THAI_VALIDATION
// Use a temporary stack buffer to isolate file.read() from std::string memory
// This helps determine if corruption is in file.read() or in memory management
uint8_t tempBuf[64];
bool useTempBuf = (len <= sizeof(tempBuf));
if (useTempBuf) {
// Read into temporary buffer first
size_t bytesRead = file.read(tempBuf, len);
if (bytesRead != len) {
Serial.printf("[SER] readString: partial read %u/%u bytes\n", (uint32_t)bytesRead, len);
}
// Check temp buffer IMMEDIATELY after read
bool corruptInTemp = false;
for (size_t i = 0; i + 2 < len; i++) {
if (tempBuf[i] == 0xE0 && (tempBuf[i+1] == 0x00 || tempBuf[i+2] == 0x00)) {
Serial.printf("[SER] READ_TEMPBUF CORRUPT @ %u: %02X %02X %02X\n",
(uint32_t)i, tempBuf[i], tempBuf[i+1], tempBuf[i+2]);
corruptInTemp = true;
}
}
// Copy to string
memcpy(&s[0], tempBuf, len);
// Check string after copy
if (!corruptInTemp) {
checkThaiCorruption(s, "READ_AFTER_COPY");
}
} else {
// Large string - read directly
size_t bytesRead = file.read(&s[0], len);
if (bytesRead != len) {
Serial.printf("[SER] readString: partial read %u/%u bytes\n", (uint32_t)bytesRead, len);
if (bytesRead < len) {
memset(&s[bytesRead], 0, len - bytesRead);
}
}
checkThaiCorruption(s, "READ_AFTER");
}
#else
size_t bytesRead = file.read(&s[0], len);
if (bytesRead != len) {
Serial.printf("[SER] readString: partial read %u/%u bytes\n", (uint32_t)bytesRead, len);
// Zero-fill any unread portion to avoid garbage data
if (bytesRead < len) {
memset(&s[bytesRead], 0, len - bytesRead);
}
}
#endif
}
}
} // namespace serialization