nemea-common 1.6.3
prefix_tree.h
Go to the documentation of this file.
1
7/*
8 * Copyright (C) 2014 CESNET
9 *
10 * LICENSE TERMS
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
20 * distribution.
21 * 3. Neither the name of the Company nor the names of its contributors
22 * may be used to endorse or promote products derived from this
23 * software without specific prior written permission.
24 *
25 * ALTERNATIVELY, provided that this notice is retained in full, this
26 * product may be distributed under the terms of the GNU General Public
27 * License (GPL) version 2 or later, in which case the provisions
28 * of the GPL apply INSTEAD OF those given above.
29 *
30 * This software is provided ``as is'', and any express or implied
31 * warranties, including, but not limited to, the implied warranties of
32 * merchantability and fitness for a particular purpose are disclaimed.
33 * In no event shall the company or contributors be liable for any
34 * direct, indirect, incidental, special, exemplary, or consequential
35 * damages (including, but not limited to, procurement of substitute
36 * goods or services; loss of use, data, or profits; or business
37 * interruption) however caused and on any theory of liability, whether
38 * in contract, strict liability, or tort (including negligence or
39 * otherwise) arising in any way out of the use of this software, even
40 * if advised of the possibility of such damage.
41 *
42 */
43#ifndef _PREFIX_TREE_
44#define _PREFIX_TREE_
45
46
47
48#include <stdio.h>
49#include <stdint.h>
50#include <stdlib.h>
51#include <string.h>
52#include <getopt.h>
53#include <time.h>
54#include <inttypes.h>
55#include <sys/types.h>
56#include <sys/stat.h>
57//#include "tunnel_detection_dns_structs.h"
58
59
60
65#define COUNT_OF_LETTERS_IN_DOMAIN 256 /*< Max count of letter in domain. */
66#define MAX_SIZE_OF_DOMAIN 256 /*< Max length of domain */
67#define MAX_SIZE_OF_DEGREE 5 /*< Max size of stored information */
68#define ADD_TO_LIST_FROM_COUNT_OF_SEARCH 20 /*< Default number of histogram size. */
69#define ADD_TO_LIST_FROM_COUNT_OF_DIFFERENT_SUBDOMAINS 10 /*< Default number of histogram size. */
70#define MAX_COUNT_TO_BE_IN_JUST_ONE_SEARCHER 10 /*< Default number of histogram size. */
71#define PREFIX 1
72#define SUFFIX 0
73/* /} */
74
78typedef enum {
82
86typedef enum {
90
93
99 unsigned char length; /*< length of stored string */
100 unsigned int count_of_string; /*< count of different substrings */
101 unsigned char count_of_children;
102 char * string; /*< stored string in reverse way (end of string is on postion 0)*/
103 prefix_tree_inner_node_t * parent; /*< Pointer to parent */
104 prefix_tree_domain_t * parent_is_domain; /*< Pointer to parent, if NULL parent is inner node, else parent is domain name node*/
105 prefix_tree_inner_node_t ** child; /*< Pointer to descendants*/
106 prefix_tree_domain_t * domain; /*< if this string is domain, this variable conatin poiter to domain name structure*/
107 void * value; /*< pointer to user value */
108};
109
115 prefix_tree_domain_t * most_used_domain_less; /*< position in linked list, pointer to next domain, which has lower count of seatching*/
116 prefix_tree_domain_t * most_used_domain_more; /*< position in linked list, pointer to previous domain, which has higher count of seatching*/
117 prefix_tree_domain_t * most_subdomains_less; /*< position in linked list, pointer to next domain, which has lower count of descendants*/
118 prefix_tree_domain_t * most_subdomains_more; /*< position in linked list, pointer to previous domain, which has higher count of descendants*/
120
126 unsigned char exception; /*< 1 for exception in detection, 0 for classic domain */
127 unsigned char degree; /*< degree of domain */
128 unsigned int count_of_insert; /*< count of searching this domain name */
129 unsigned int count_of_different_subdomains; /*< count of descendants - subdomains */
130 prefix_tree_inner_node_t * parent; /*< pointer to parent (last character of domain) */
131 prefix_tree_domain_t * parent_domain; /*< pointer to parent (domain name) */
132 prefix_tree_inner_node_t *child; /*< pointer to descendats */
133 void * value; /*< pointer to user value - specified by init function */
134 node_domain_extension_t * domain_extension; /*< pointer to structure with linked list of domains*/
135};
136
143 prefix_tree_domain_t * list_of_most_used_domains; /*< list of most searched domains*/
144 prefix_tree_domain_t * list_of_most_used_domains_end; /*< list of most searched domains - pointer to end of the list*/
145 prefix_tree_domain_t * list_of_most_unused_domains; /*< list of most searched domains end - most unsearched domains*/
146 prefix_tree_domain_t ** list_of_most_subdomains; /*< list of domains, which has most subdomains, it is 2D linked list. 1.D is degree of domain name, 2. is linked list*/
147 prefix_tree_domain_t ** list_of_most_subdomains_end; /*< list of domains, which has most subdomains, it is 2D linked list. Pointers on the end of lists*/
149
155typedef struct prefix_tree_t {
156 prefix_tree_inner_node_t * root; /*< Pointer to root node. */
157 unsigned int size_of_value; /*< size of value stored for every domain node */
158 int domain_separator; /*< separator in text, which creares domain node */
159 unsigned char prefix_suffix; /*< prefix or suffix tree */
160 unsigned int count_of_domain_searched_just_ones; /*< Count of domain, witch were searcehd just ones */
161 unsigned int count_of_inserting; /*< Count of inserting and searching (all domains) */
162 unsigned int count_of_inserting_for_just_ones; /*< Count of inserting, but for percent value with variable count_of_domain_searched_just_ones*/
163 unsigned int count_of_different_domains; /*< Count of unique domains*/
164 tree_domain_extension_t * domain_extension; /*< Lists of most used domains etc.*/
167
168
175int prefix_tree_map_character_to_number(unsigned char letter);
176
186prefix_tree_t * prefix_tree_initialize(unsigned char prefix_suffix, unsigned int size_of_value, int domain_separator, int domain_extension, relaxation_after_delete relaxation);
194void prefix_tree_decrease_counters_deleted_inner_node(prefix_tree_inner_node_t * node, int deleted_strings, int deleted_domains);
195
203
212
221
228
236
246
255
263
271
281int prefix_tree_count_to_domain_separator(const char *string, int length, int domain_separator, char prefix);
282
294
295
305
314char * prefix_tree_read_string(prefix_tree_t * tree, prefix_tree_domain_t * domain, char * string);
315
316
317
327
340
341
354
363prefix_tree_domain_t * prefix_tree_insert(prefix_tree_t * tree, const char *string, int length);
364
373prefix_tree_domain_t * prefix_tree_search(prefix_tree_t * tree, const char *string, int length);
374
384
393int prefix_tree_is_string_in_exception(prefix_tree_t * tree, const char *string, int length);
394
403
404
412
413
414
415
416
417
418#endif /* _PREFIX_TREE_ */
prefix_tree_inner_node_t * prefix_tree_most_substring(prefix_tree_inner_node_t *node)
Returns inner node with most of different strings Function returns pointer to inner node,...
domain_extension
Definition prefix_tree.h:78
@ DOMAIN_EXTENSION_NO
Definition prefix_tree.h:80
@ DOMAIN_EXTENSION_YES
Definition prefix_tree.h:79
prefix_tree_inner_node_t * join_nodes(prefix_tree_inner_node_t *node)
Function joins two nodes into one. If parent has just one child, this function join them into one nod...
int prefix_tree_count_to_domain_separator(const char *string, int length, int domain_separator, char prefix)
Count length of string to dot Function counts length of string to dot.
prefix_tree_domain_t * prefix_tree_add_domain_recursive_suffix(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain_parent, const char *string, int length, prefix_tree_t *tree)
Add domain recursive (suffix tree) Function adds domain to the prefix tree. This function is called f...
prefix_tree_t * prefix_tree_initialize(unsigned char prefix_suffix, unsigned int size_of_value, int domain_separator, int domain_extension, relaxation_after_delete relaxation)
Init function for prefix tree Function that incialize prefix tree.
prefix_tree_domain_t * prefix_tree_add_domain_recursive_prefix(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain_parent, const char *string, int length, prefix_tree_t *tree)
Add domain recursive (prefix tree) Function adds domain to the prefix tree. This function is called f...
prefix_tree_domain_t * prefix_tree_insert(prefix_tree_t *tree, const char *string, int length)
Add domain to prefix tree Function adds domain to the prefix tree.
prefix_tree_inner_node_t * prefix_tree_new_node_parent_is_domain(prefix_tree_domain_t *domain)
Create descendant of domain Function creates descendant of domain, (domain has other subdomains).
prefix_tree_inner_node_t * prefix_tree_add_children_array(prefix_tree_inner_node_t *parent)
Alloc memory for descendats in inner node Function allocs memory for descendats in inner node.
double prefix_tree_most_used_domain_percent_of_subdomains(prefix_tree_t *tree, int depth)
Statistic function percent od subdomains in certain depth Function returns percent of subdomains in m...
int prefix_tree_destroy_recursive(prefix_tree_t *tree, prefix_tree_inner_node_t *node)
Destroy all items in prefix tree Function destroy recursively destroies all nodes.
char * prefix_tree_read_string(prefix_tree_t *tree, prefix_tree_domain_t *domain, char *string)
Read domain from tree Function return string with the domain name.
void prefix_tree_destroy(prefix_tree_t *tree)
Destroy function for prefix tree Function destroy prefix tree and all item inside.
prefix_tree_domain_t * prefix_tree_new_domain(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain_parent, prefix_tree_t *tree)
Create domain node structure Function Create domian and connects it to the tree.
prefix_tree_domain_t * prefix_tree_search(prefix_tree_t *tree, const char *string, int length)
Seacrh domain in prefix tree Function adds domain to the prefix tree.
void prefix_tree_decrease_counters_deleted_inner_node(prefix_tree_inner_node_t *node, int deleted_strings, int deleted_domains)
Decrease counters in prefix tree, if deleting a node. Function that goes from the node to the root an...
prefix_tree_domain_t * prefix_tree_add_new_item(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain, const char *string, int length, prefix_tree_t *tree)
Add new item to prefix tree Function add new item to prefix tree (place where to add new domain has t...
prefix_tree_inner_node_t * prefix_tree_new_node(prefix_tree_inner_node_t *parent, int map_number)
Create inner node structure Function Create inned node and connects it to the tree.
void prefix_tree_recursive_plus_domain(prefix_tree_domain_t *domain_parent, prefix_tree_t *tree)
Recursive change info about parent doimains Function actualize information in parent domains.
prefix_tree_inner_node_t * prefix_tree_split_node_into_two(prefix_tree_inner_node_t *node, int index)
Split node into two nodes Function splits node into two nodes, on the given position....
prefix_tree_domain_t * prefix_tree_add_string_exception(prefix_tree_t *tree, const char *string, int length)
Add domain to prefix tree and set it to the exception state Function adds domain to the prefix tree a...
relaxation_after_delete
Definition prefix_tree.h:86
@ RELAXATION_AFTER_DELETE_NO
Definition prefix_tree.h:88
@ RELAXATION_AFTER_DELETE_YES
Definition prefix_tree.h:87
int prefix_tree_is_string_in_exception(prefix_tree_t *tree, const char *string, int length)
Test domain if is in exception state Function tests domain if is in exception state.
char * prefix_tree_read_inner_node(prefix_tree_t *tree, prefix_tree_inner_node_t *node, char *string)
Read string from inner node Function return string stored in given inner node.
int prefix_tree_map_character_to_number(unsigned char letter)
Map character to index Function maps character to index in descendants.
void prefix_tree_delete_inner_node(prefix_tree_t *tree, prefix_tree_inner_node_t *node)
Function which removes inner node and his descendats. Function will erase specified node and all his ...
Structure - domain extension Structure keeps pointers in linked list to other domain nodes.
prefix_tree_domain_t * most_used_domain_more
prefix_tree_domain_t * most_subdomains_more
prefix_tree_domain_t * most_subdomains_less
prefix_tree_domain_t * most_used_domain_less
Structure - Prefix tree - domain name structure Structure used to keep information about domain names...
prefix_tree_inner_node_t * child
prefix_tree_inner_node_t * parent
node_domain_extension_t * domain_extension
unsigned int count_of_different_subdomains
prefix_tree_domain_t * parent_domain
unsigned char exception
unsigned char degree
unsigned int count_of_insert
Structure - Prefix tree - inner node structure Structure used to keep information about inner node of...
Definition prefix_tree.h:98
prefix_tree_domain_t * domain
prefix_tree_domain_t * parent_is_domain
unsigned char count_of_children
prefix_tree_inner_node_t ** child
prefix_tree_inner_node_t * parent
unsigned int count_of_string
Structure - Prefix tree main structure Structure used to keep information about prefix tree....
unsigned int count_of_domain_searched_just_ones
tree_domain_extension_t * domain_extension
unsigned int size_of_value
prefix_tree_inner_node_t * root
unsigned int count_of_different_domains
unsigned int count_of_inserting
unsigned char prefix_suffix
relaxation_after_delete relaxation
unsigned int count_of_inserting_for_just_ones
Structure - Domain names extension Structure used to keep lists of most searched domains,...
prefix_tree_domain_t ** list_of_most_subdomains_end
prefix_tree_domain_t * list_of_most_used_domains_end
prefix_tree_domain_t * list_of_most_used_domains
prefix_tree_domain_t ** list_of_most_subdomains
prefix_tree_domain_t * list_of_most_unused_domains