使用string_view而不是手动管理内存

近期在调试代码的时候发现了个有意思的事情

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


#include<cstdio>
#include<iostream>
#include<fstream>

int main(){
    //test.txt内容为： 啊这可海星
    std::ifstream test("test.txt",std::ifstream::binary);
    int length = 10;
    char* buffer = new char[length];
    test.read(buffer, length);
    std::cout << buffer;
    return 0;
}

编译并运行，可以看到，输出结果末尾多了一些字符…

这很奇怪，于是我拿着这些代码去问GPT，它给出的修改意见是

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


#include<cstdio>
#include<iostream>
#include<fstream>

int main(){
    std::ifstream test("test.txt",std::ifstream::binary);
    int length = 10;
    char* buffer = new char[length + 1]; // 分配11字节的空间
    buffer[length] = '\0'; // 在第11个字节手动加上字符串结束符
    test.read(buffer, length);
    std::cout << buffer;
    return 0;
}

buffer 仅分配了10字节的空间，且没有添加字符串结束符 '\0'。当你将 buffer 输出到控制台时，cout 会继续输出缓冲区后面的内存，直到遇到一个偶然的 '\0' 为止，这会导致输出内容可能包含随机字符或超出预期的内容。

据GPT所说要使我们的结果输出正确，我们需要在代码的结尾添加'\0'，使得cout可以正常识别到字符串的结尾。

后来我又去询问了湊妈，他问我，为什么不用 std::array 呢？

于是我改用以下代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12


#include <array>
#include <iostream>
#include <fstream>

int main() {
    int length = 10;
    std::array<char, 10> buffer = {}; 
    std::ifstream test("test.txt", std::ios::binary); 
    test.read(buffer.data(), length); 
    std::cout << buffer.data(); 
    return 0;
}

编译运行

非常神奇，我们并没有在给字符串的结尾指定'\0'，但是代码却可以正常输出结果

询问GPT得知

未初始化的内存：在第一段代码中，buffer 使用 new char[length] 创建，但并未初始化。如果 test.txt 文件内容不足 length个字节，test.read(buffer, length) 读取后会留下部分未定义的内容在 buffer 中。这些未定义的内存内容会导致输出不一致。 std::array 的默认初始化：在第二段代码中，std::array<char, 10> buffer = {} 用 {} 初始化了所有元素为空字符 '\0'，因此未读满的部分会自动填充为 '\0'，输出更为稳定。

其实原本事情到这里应该就结束了，直到我偶然间尝试了

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


#include<cstdio>
#include<iostream>
#include<fstream>

int main(){
    //此处删除了 std::ios::binary
    std::ifstream test("test.txt");
    int length = 10;
    char* buffer = new char[length];
    test.read(buffer, length);
    std::cout << buffer;
    return 0;
}

编译运行可以发现，输出结果居然是正确的，我们并没有指定结束字符

这太奇怪了，为什么我们第一次的代码会输出异常，而这次不会

继续询问GPT

在 std::ifstream 中，如果没有显式指定 std::ifstream::binary，则默认情况下会使用文本模式来打开文件。而文本模式会自动在读取的数据末尾添加 '\0'

GPT总结：

在C++中，更推荐使用像 std::array、std::string 这样的容器来避免手动管理内存。这些容器可以自动处理大小和内存边界问题，从而减少了手动管理内存带来的风险。而直接操作原始指针（如 char*）更符合C语言的编程风格，但需要手动维护内存边界，容易出现错误。因此，内存管理哲学的不同也解释了为何现代C++更推荐使用 std::array 和 std::string。

其实原本到这里应该就结束了，但是过了几天，我写了这样的代码。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63


#include <opencv2/opencv.hpp>
#include <filesystem>
#include <vector>
#include <cstdio>
#include <thread>

void getFileNames(const std::string& path, std::vector<std::string>& files) {
    for (const auto& entry : std::filesystem::recursive_directory_iterator(path)) {
        if (entry.is_regular_file()) {
            files.push_back(entry.path().string());
        }
    }
}

cv::Mat blendImagesWithAlpha(const cv::Mat& foreground, const cv::Mat& background) {
    cv::Mat result = background.clone(); 
    for (int y = 0; y < foreground.rows; ++y) {
        for (int x = 0; x < foreground.cols; ++x) {
            cv::Vec4b fgPixel = foreground.at<cv::Vec4b>(y, x);
            cv::Vec4b bgPixel = background.at<cv::Vec4b>(y, x);
            float alpha = fgPixel[3] / 255.0f;
            if (alpha > 0.0f) {
                float invAlpha = 1.0f - alpha;
                cv::Vec4b& resultPixel = result.at<cv::Vec4b>(y, x);
                resultPixel[0] = static_cast<uchar>(fgPixel[0] * alpha + bgPixel[0] * invAlpha); 
                resultPixel[1] = static_cast<uchar>(fgPixel[1] * alpha + bgPixel[1] * invAlpha); 
                resultPixel[2] = static_cast<uchar>(fgPixel[2] * alpha + bgPixel[2] * invAlpha); 
                resultPixel[3] = static_cast<uchar>(std::min(255.0f, fgPixel[3] + bgPixel[3] * invAlpha));
            }
        }
    }
    return result;
}


void processImage(const std::string& file, const cv::Mat& background, const std::string& output_path) {
    cv::Mat foreground = cv::imread(file, cv::IMREAD_UNCHANGED);
    cv::Mat result = blendImagesWithAlpha(foreground, background);
    std::string output_file = output_path + "\\" + std::filesystem::path(file).filename().string();
    cv::imwrite(output_file, result);
}


void processImagesInParallel(const std::vector<std::string>& files, const cv::Mat& background, const std::string& output_path) {
    std::vector<std::thread> threads;
    for (const auto& file : files) {
        threads.emplace_back(processImage, file, std::cref(background), output_path);
    }
    for (auto& thread : threads) {
        thread.join();
    }
}

int main() {
    std::string folder_path = ".\\face";
    std::vector<std::string> files;
    getFileNames(folder_path, files);
    cv::Mat background = cv::imread("base.png", cv::IMREAD_UNCHANGED);
    std::string output_path = ".\\output";
    std::filesystem::create_directory(output_path);
    processImagesInParallel(files, background, output_path);
    return 0;
}

湊妈告诉我，应该使用string_view和std::filesystem::path并且提出了把\\修改为/的建议，顺带给了我几条 prompt

1.函数声明用 auto fn -> type

2.变量声明统一用 auto var = type{};

3.int -> std::int32_t

4.#define fn auto fn xxx() -> type

然后函数参数换个行，就变成了这样（

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79


#include <opencv2/opencv.hpp>
#include <filesystem>
#include <vector>
#include <thread>
#include <cstdint>

#define fn auto

fn getFileNames(
        std::filesystem::path path, 
        std::vector<std::filesystem::path>& files
    ) {
    for (const auto& entry : std::filesystem::recursive_directory_iterator{path}) {
        if (entry.is_regular_file()) {
            files.push_back(entry.path());
        }
    }
}

fn blendImagesWithAlpha(
        const cv::Mat& foreground, 
        const cv::Mat& background
    ) -> cv::Mat {
    auto result = background.clone();
    for (auto y = 0; y < foreground.rows; ++y) {
        for (auto x = 0; x < foreground.cols; ++x) {
            auto fgPixel = foreground.at<cv::Vec4b>(y, x);
            auto bgPixel = background.at<cv::Vec4b>(y, x);
            auto alpha = fgPixel[3] / 255.0f;
            if (alpha > 0.0f) {
                auto invAlpha = 1.0f - alpha;
                auto& resultPixel = result.at<cv::Vec4b>(y, x);
                resultPixel[0] = static_cast<uchar>(fgPixel[0] * alpha + bgPixel[0] * invAlpha);
                resultPixel[1] = static_cast<uchar>(fgPixel[1] * alpha + bgPixel[1] * invAlpha);
                resultPixel[2] = static_cast<uchar>(fgPixel[2] * alpha + bgPixel[2] * invAlpha);
                resultPixel[3] = static_cast<uchar>(std::min(255.0f, fgPixel[3] + bgPixel[3] * invAlpha));
            }
        }
    }
    return result;
}

fn processImage(
        const std::filesystem::path& file, 
        const cv::Mat& background, 
        const std::filesystem::path& output_path
    )  {
    auto foreground = cv::imread(file.string(), cv::IMREAD_UNCHANGED);
    auto result = blendImagesWithAlpha(foreground, background);
    auto output_file = output_path / file.filename();
    cv::imwrite(output_file.string(), result);
}

fn processImagesInParallel(
        const std::vector<std::filesystem::path>& files, 
        const cv::Mat& background, 
        const std::filesystem::path& output_path
        )  {
    auto threads = std::vector<std::thread>{};
    for (const auto& file : files) {
        threads.emplace_back(processImage, file, std::cref(background), output_path);
    }
    for (auto& thread : threads) {
        thread.join();
    }
}

fn main() -> std::int32_t {
    auto path = std::filesystem::path{"./face"};
    auto files = std::vector<std::filesystem::path>{};
    getFileNames(path, files);

    auto background = cv::imread("base.png", cv::IMREAD_UNCHANGED);
    auto output_path = std::filesystem::path{"./output"};
    std::filesystem::create_directory(output_path);

    processImagesInParallel(files, background, output_path);
    return 0;
}

先暂且不谈论为什么这个代码的风格这么像Rust，至少比我之前的屎山相比进步了不少。

PS: 经过修改又没有用到string_view了（

用GPT的话来说就是：

现代C++引入了 std::string_view、std::filesystem::path、std::array、std::vector 等特性，帮助开发者写出更简洁、类型安全、效率更高的代码。通过使用这些工具，可以避免很多传统C++代码中的陷阱，比如手动管理内存或路径拼接出错等。

不过我注意到了这个东西，std::string_view（于是我们最开始的那坨屎山可以改成这样

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


#include <iostream>
#include <fstream>
#include <string_view>
#include <vector>

int main() {
    auto file = std::ifstream("test.txt", std::ios::binary); 
    std::vector<char> buffer(10);
    file.read(buffer.data(), buffer.size());
    std::string_view data_view(buffer.data(), file.gcount());
    std::cout<<data_view;
    return 0;
}

询问湊妈可以知道，这么修改有以下好处

内存管理更安全：
- 原始版本使用 new char[] 需要手动管理内存，容易忘记 delete[] 导致内存泄漏
- 使用 std::vector 会自动管理内存，当 vector 离开作用域时会自动释放
更准确的数据长度处理：
- 原始版本直接输出 buffer 可能会输出超出实际读取长度的数据
- 使用 string_view 配合 gcount() 可以精确控制只查看实际读取到的数据长度
异常安全：
- vector 提供了异常安全保证
- 如果分配内存失败，不会有内存泄漏的风险
更现代的 C++ 风格：
- 使用 RAII 原则（通过 vector）
- 使用轻量级的视图类型（string_view）
- 避免了裸指针的使用
性能方面：
- string_view 是一个轻量级的查看，不会发生数据拷贝
- 对于需要传递这个数据的场景，string_view 可以高效传递，而不需要拷贝数据

PS: 最终改版

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


#include <iostream>
#include <fstream>
#include <string>
#include <string_view>

int main() {
    auto file = std::ifstream("test.txt", std::ios::binary);
    auto buffer = std::string(10, '\0');
    file.read(buffer.data(), buffer.size());
    const auto data_view = std::string_view(buffer.data(), file.gcount());
    std::cout << data_view;
    return 0;
}

总结，我应该多注意现代C++里面类似string_view,array这样的工具，而不是盲目的手动使用内存进行管理，这样很容易引起很多问题