C++实现基于KNN的手写体识别

2023年8月7日08:08:47

一系统结构

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

二、数据获取与预处理

在这里插入图片描述
在这里插入图片描述

三、KNN算法与K折交叉验证

在这里插入图片描述
在这里插入图片描述
源:

#include "pch.h"
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
using namespace std;
//#define k 10 //KNN关键参数
#define mn 10
#define K_flod 15 //K折交叉验证
int k;
double train_weigh[8] = {1,1,1,1,1,1,1,1};//训练集不同汉字的权重值。
struct node {
string nn, name, shuxing,sbname;
//nn:以字符串类型存储每个 txt 文件产生的 01 矩阵
//name:存储训练集的类别名称
//shuxing:存储测试集最终判别的类别名字
//sbname:测试字识别结果
double dis, x1, x2, diss;
bool s;
//dis:存储测试字和训练字的欧式距离
//diss:存储测试字和训练字的曼哈顿距离
//x1:识别为当前测试字的k个欧式(曼哈顿)距离的比例
//x2:识别为其他种类字体的k个欧式(曼哈顿)距离的比例
node() {
nn = "000"; name = "000"; shuxing = "000"; sbname = "000";
dis = 0.0; x1 = 0.0; x2 = 0.0;
s = false;
}
};
node dis_o[720];//存储欧式(曼哈顿)距离
node ceshi[720];//存储测试集
double tru[720];//K折交叉验证每次验证的正确率
double err[720];//K折交叉验证每次验证的误差率
double K_tru[8];//K近邻时,每个汉字的最终正确率
//double K_err[8];//K近邻时,每个汉字最终错误率
string typp[8] = { "bei","jing","xin","xi","ke","ji","da","xue" };//可以识别汉字的种类
node a[720];//全部数据集
//将全部数据(720个128*128的二值化矩阵读入存储在a这个结构体数组中)
void read() {
int i, j;
string nn;
string mi[90] = { "01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26" ,"27","28","29","30",
"31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56" ,"57","58","59","60" ,
"61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85","86" ,"87","88","89","90" };
for (i = 0, j = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\bei_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "bei";
rf.close();
}
for (i = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\jing_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "jing";
rf.close();
}
for (i = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xin_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "xin";
rf.close();
}
for (i = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xi_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "xi";
rf.close();
}
for (i = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ke_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "ke";
rf.close();
}
for (i = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ji_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "ji";
rf.close();
}
for (i = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\da_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "da";
rf.close();
}
for (i = 0; i < 90; i++, j++) {
string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xue_" + mi[i] + ".txt";
ifstream rf(ader);
while (rf >> nn) {
a[j].nn = a[j].nn + nn;
}
a[j].name = "xue";
rf.close();
}
}
//初始化训练集不同汉字的权重值。
void InitTrWei() {
cout << "请分别输入8个汉字的权重值: ";
cin >> train_weigh[0];
cin >> train_weigh[1];
cin >> train_weigh[2];
cin >> train_weigh[3];
cin >> train_weigh[4];
cin >> train_weigh[5];
cin >> train_weigh[6];
cin >> train_weigh[7];
}
//计算欧式距离
double dis (node x1,node x2) {
const char *ar1,*ar2;
double sum = 0;
int j;
ar1 = x1.nn.c_str();
ar2 = x2.nn.c_str();
for (j = 0; j < 1024; j++) {
sum += (ar1[j] - ar2[j])*(ar1[j] - ar2[j]);
}
return sqrt(sum);
}
//计算曼哈顿距离
double diss(node x1, node x2) {
const char *ar1, *ar2;
double sum = 0;
int j;
ar1 = x1.nn.c_str();
ar2 = x2.nn.c_str();
for (j = 0; j < 1024; j++) {
sum += fabs(ar1[j] - ar2[j]);
}
return sum;
}
//快速排序
int Par(node *a,int low,int high) {
node p = a[low];
while(low<high){
while (low < high&&a[high].dis >= p.dis)--high;
a[low].name = a[high].name;
a[low].dis = a[high].dis;
while (low < high&&a[low].dis <= p.dis)++low;
a[high].dis = a[low].dis;
a[high].name = a[low].name;
}
a[low].dis = p.dis;
a[low].name = p.name;
return low;
}
void Qsort(node *a,int low ,int high) {
if (low < high) {
int p = Par(a, low, high);
Qsort(a, low, p - 1);
Qsort(a, p + 1, high);
}
}
//判断某被测汉字的k最近邻测试结果
void K_select(node& p) {
double a[8] = {0};
int i;
Qsort(dis_o, 0, (90 - 90 / K_flod)*8-1);
for (i = 0;i<k;i++) {
if (dis_o[i].name=="bei") {
a[0]++;
}
if (dis_o[i].name == "jing") {
a[1]++;
}
if (dis_o[i].name == "xin") {
a[2]++;
}
if (dis_o[i].name == "xi") {
a[3]++;
}
if (dis_o[i].name == "ke") {
a[4]++;
}
if (dis_o[i].name == "ji") {
a[5]++;
}
if (dis_o[i].name == "da") {
a[6]++;
}
if (dis_o[i].name == "xue") {
a[7]++;
}
}
//乘上权重
for (i = 0; i < 8; i++) {
a[i] *= train_weigh[i];
}
double max = 0;
int t=0;
for(i=0;i<8;i++){
if (a[i] > max) {
max = a[i];
t = i;
}
}
switch (t) {
case 0:p.sbname = "bei"; break;
case 1:p.sbname = "jing"; break;
case 2:p.sbname = "xin"; break;
case 3:p.sbname = "xi"; break;
case 4:p.sbname = "ke"; break;
case 5:p.sbname = "ji"; break;
case 6:p.sbname = "da"; break;
case 7:p.sbname = "xue"; break;
}
}
//分析测试集结果
double ceshiFenxi() {
int i,j;
double err=0, tru=0;
for (i = 0; i < (90/K_flod)*8; i++) {
if (ceshi[i].name == ceshi[i].sbname)
tru++;
else
err++;
}
return tru / (tru + err);
}
//K折交叉验证
void k_K_flod(){
int x1 = 0;//记录交叉验证第几折
int i,j,t,jj,e;
double b[8] = { 0 };
for (i = 0; i < 720; i++) {
a[i].s = false;
}
for (x1 = 0; x1 < K_flod;x1++) {
t = 0;
//选出第x1折交叉验证的测试集
for (e = 0; e < 8; e++) {
for (j = 90 - (K_flod - x1)*
  • 作者:没有姓的梓轩
  • 原文链接:https://blog.csdn.net/weixin_42529594/article/details/113120418
    更新时间:2023年8月7日08:08:47 ,共 4710 字。