In a previous post we saw the differences between K-means and K-NN. Here is step by step on how to compute K-nearest neighbors KNN algorithm.

1. Determine parameter K = number of nearest neighbors
2. Calculate the distance between the query-instance and all the training samples
3. Sort the distance and determine nearest neighbors based on the K-th minimum distance
4. Gather the category of the nearest neighbors
5. Use simple majority of the category of nearest neighbors as the prediction value of the query instance

Github Repository : https://github.com/devcoons/simple-knn

```[ ERROR LOADING SOURCE CODE FROM GITLAB. ]
[ First Version available from cache memory. ]

#define _CRT_SECURE_NO_WARNINGS

/*	---	---	---	---	---	---	---	---	---	---	 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <math.h>

/*	---	---	---	---	---	---	---	---	---	---	 */

#define DATATYPE double

/*	---	---	---	---	---	---	---	---	---	---	 */

struct sample
{
DATATYPE * dim;
uint32_t group;
DATATYPE tmp_distance;
};

struct knn_data
{
uint32_t k;
struct sample ** best_voters;
struct sample * samples[2];
uint32_t samples_count[2];
uint32_t samples_dimensions[2];
};

/*	---	---	---	---	---	---	---	---	---	---	 */

void parse_string_to_sample(struct sample *, char *, uint32_t, uint8_t);

void parse_file_to_samples(struct knn_data *, char *);

void parse_samples_to_file(struct knn_data *, char *);

void knn_algorithm(struct knn_data *);

/*	---	---	---	---	---	---	---	---	---	---	 */

int main()
{
char tmp_str[256];

struct knn_data knn;

printf("Set number of voters (k=):");

scanf("%d", &knn.k);

knn.best_voters = (struct samples **) malloc(knn.k * sizeof(struct samples *));

printf("Provide categorized samples file (train data):");

scanf("%s", tmp_str);

parse_file_to_samples(&knn, tmp_str);

printf("Provide uncategorized samples file (new data):");

scanf("%s", tmp_str);

parse_file_to_samples(&knn, tmp_str);

printf("Perform k-nn algorithm\n");

knn_algorithm(&knn);

printf("Do you want to save the output (yes/no)?");

scanf("%s", tmp_str);

if (strcmp(tmp_str, "yes") == 0 || strcmp(tmp_str, "y") == 0)
{
printf("Where do you want to save the newly categorized data (filepath)?");

scanf("%s", tmp_str);

parse_samples_to_file(&knn, tmp_str);

printf("Completed.\n");
}

printf("Training Samples:\n");

for (int i = 0; i < knn.samples_count[0]; i++)
{
printf("Sample %d -",i);

for (int j = 0; j < knn.samples_dimensions[0]; j++)

printf(" %f", (knn.samples[0] + i)->dim[j]);

printf(" | %d\n",(knn.samples[0]+i)->group);
}

printf("New Categorized Samples:\n");

for (int i = 0; i < knn.samples_count[1]; i++)
{
printf("Sample %d -",i);

for (int j = 0; j < knn.samples_dimensions[1]; j++)

printf(" %f", (knn.samples[1] + i)->dim[j]);

printf(" | %d\n", (knn.samples[1] + i)->group);
}

return 0;
}

/*	---	---	---	---	---	---	---	---	---	---	 */

void parse_string_to_sample(struct sample * sample, char * string, uint32_t max_dimensions,
uint8_t has_group)
{
int tmp_cnt = has_group == 0 ? 0 : 1;

char * tmp_ptr = strtok(string, ",");

if(has_group == 0)

sample->group = atoi(tmp_ptr);

else

sample->dim[0] = atof(tmp_ptr);

while ((tmp_ptr = strtok(NULL, ",")) != NULL)

sample->dim[tmp_cnt++] = atof(tmp_ptr);
}

/*	---	---	---	---	---	---	---	---	---	---	 */

void parse_file_to_samples(struct knn_data * knn, char * filepath)
{
int tmp_cnt;

char line[256];

FILE *file_pointer;

file_pointer = fopen(filepath, "r");

fgets(line, 128, file_pointer);

tmp_cnt = strstr(line, "uncategorized") == NULL ? 0 : 1;

fgets(line, 128, file_pointer);

knn->samples_count[tmp_cnt] = atoi(line);

knn->samples[tmp_cnt] =
(struct sample *) malloc(knn->samples_count[tmp_cnt] * sizeof(struct sample));

fgets(line, 128, file_pointer);

knn->samples_dimensions[tmp_cnt] = atoi(line);

for (uint32_t i = 0; i < knn->samples_count[tmp_cnt]; i++)
{
(knn->samples[tmp_cnt]+i)->dim =
(DATATYPE *)malloc(knn->samples_dimensions[tmp_cnt] * sizeof(DATATYPE));

fgets(line, 128, file_pointer);

parse_string_to_sample(knn->samples[tmp_cnt]+i, line,
knn->samples_dimensions[tmp_cnt], tmp_cnt);
}
}

/*	---	---	---	---	---	---	---	---	---	---	 */

void parse_samples_to_file(struct knn_data * knn, char * filepath)
{
printf(":) not working yet...");
}

/*	---	---	---	---	---	---	---	---	---	---	 */

void knn_algorithms_sort_asc_voters(struct knn_data * knn)
{
struct sample * tmp_smpl = NULL;

for (int i = 0; i < knn->k; i++)
{
for (int j = 0; j < knn->k - 1; j++)
{
if (knn->best_voters[j]->tmp_distance > knn->best_voters[j + 1]->tmp_distance)
{
tmp_smpl = knn->best_voters[j];

knn->best_voters[j] = knn->best_voters[j + 1];

knn->best_voters[j + 1] = tmp_smpl;
}
}
}
}

void knn_algorithm(struct knn_data * knn)
{
double euclidean_distance;

uint32_t * most_common[2],selected_group_pos;

most_common[0] = (uint32_t *)malloc(knn->k * sizeof(uint32_t));

most_common[1] = (uint32_t *)malloc(knn->k * sizeof(uint32_t));

for (int i = 0; i < knn->samples_count[1]; i++)
{
for (int q = 0; q < knn->k; q++)

knn->best_voters[q] = NULL;

for (int j = 0; j < knn->samples_count[0]; j++)
{
euclidean_distance = 0;

for (int q = 0; q < knn->samples_dimensions[0]; q++)
euclidean_distance += pow(
(knn->samples[0] + j)->dim[q] - (knn->samples[1] + i)->dim[q]
, 2);

(knn->samples[0]+j)->tmp_distance = sqrt(euclidean_distance);

if (j < knn->k)
{
knn->best_voters[j] = (knn->samples[0] + j);
}
else
{
if(j == knn->k)

knn_algorithms_sort_asc_voters(knn);

for (int q = 0; q < knn->k; q++)

if (knn->best_voters[q]->tmp_distance > (knn->samples[0] + j)->tmp_distance)
{
for (int z = knn->k - 1; z >= q+1 ; z--)

knn->best_voters[z] = knn->best_voters[z - 1];

knn->best_voters[q] = (knn->samples[0] + j);

break;
}
}
}

memset(most_common[0], 0, knn->k * sizeof(uint32_t));

memset(most_common[1], 0, knn->k * sizeof(uint32_t));

for (int j = 0; j < knn->k; j++)
{
for(int q = 0 ; q < knn->k ; q++)

if (*(most_common[0] + q) == knn->best_voters[j]->group)
{
*(most_common[1] + q) += 1;

break;
}
else if (*(most_common[0] + q) == 0)
{
*(most_common[0] + q) = knn->best_voters[j]->group;

*(most_common[1] + q) += 1;
break;
}
}

selected_group_pos = 0;

for (int j = 1; j < knn->k; j++)
{
if (*(most_common[1] + j) == 0)
break;

if (*(most_common[0] + j) > *(most_common[0] + selected_group_pos))
selected_group_pos = j;
}

(knn->samples[1] + i)->group = *(most_common[0] + selected_group_pos);
}
}

/*	---	---	---	---	---	---	---     ---	---	---	 */```

Training Samples

```categorized
6
2
0,1,1
0,2,1
0,2,2
1,7,5
1,7,6
1,8,5```

New Samples to categorize

```uncategorized
5
2
9,6
1,2
3,1
6,6
3,3```