satyr 0.43
Loading...
Searching...
No Matches
distance.h
Go to the documentation of this file.
1/*
2 distance.h
3
4 Copyright (C) 2010 Red Hat, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License along
17 with this program; if not, write to the Free Software Foundation, Inc.,
18 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19*/
20#ifndef SATYR_DISTANCE_H
21#define SATYR_DISTANCE_H
22
27
28#ifdef __cplusplus
29extern "C" {
30#endif
31
32#include <stdbool.h>
33#include <stdint.h>
34#include <stdlib.h>
35
36struct sr_thread;
37
38enum sr_distance_type
39{
40 /* Jaro-Winkler distance:
41 *
42 * Gets number of matching function names(match_count) from both
43 * threads and number of transpositions in place(trans_count) if
44 * the transpositioned function names are not farther away than
45 * frame_count/2 - 1. Then computes the Jaro-Winkler distance
46 * according to the formula. NOTE: The Jaro-Winkler distance is
47 * not a metric distance as it does not satisfy the triangle
48 * inequality. Returns a number between 0 and 1: 0 = no
49 * similarity, 1 = similar threads.
50 */
51 SR_DISTANCE_JARO_WINKLER,
52
53 /* Jaccard distance:
54 *
55 * Gives a number representing the difference of the size of the
56 * intersection and union divided by the size of the union of two
57 * threads. Function names positions in the thread are not taken
58 * into account. Returns a number between 0 and 1: 0 = similar
59 * threads, 1 = no similarity.
60 */
61 SR_DISTANCE_JACCARD,
62
63 /* Levenshtein distance:
64 *
65 * Computes the distance of two threads creating a matrix of
66 * distances between all the frames in each thread. Computes a
67 * number representing how many function names need to be
68 * different in one thread to match all the function names in
69 * second thread at each respective positions or how many function
70 * names need to be different to have a match. The number is
71 * always between 0 and n, where n is the frame count of longer
72 * thread 0 = similar threads, n = no similar function names.
73 * This is normalized to a result between 0 and 1 that is
74 * returned.
75 */
76 SR_DISTANCE_LEVENSHTEIN,
77
78 /* Damerau-Levenshtein distance:
79 *
80 * Like the Levenshtein distance, but with transpositions. NOTE:
81 * The triangle inequality does not hold.
82 */
83 SR_DISTANCE_DAMERAU_LEVENSHTEIN,
84
85 /* Sentinel, keep it the last entry. */
86 SR_DISTANCE_NUM
87};
88
89float
90sr_distance(enum sr_distance_type distance_type,
91 struct sr_thread *thread1,
92 struct sr_thread *thread2);
93
101{
102 int m;
103 int n;
104 float *distances;
105};
106
117struct sr_distances *
118sr_distances_new(int m, int n);
119
128struct sr_distances *
130
136void
138
151float
152sr_distances_get_distance(struct sr_distances *distances, int i, int j);
153
165void
167 int i,
168 int j,
169 float d);
170
184struct sr_distances *
185sr_threads_compare(struct sr_thread **threads, int m, int n,
186 enum sr_distance_type dist_type);
187
195{
196 /* Same as for struct sr_distances. */
197 int m;
198 int n;
199 /* First coordinates of this part. */
200 int m_begin;
201 int n_begin;
202 /* Length of the part. */
203 size_t len;
204 /* Distance type. */
205 enum sr_distance_type dist_type;
206 /* Checksum of the threads used to compute this part to catch situations
207 * when sr_distances_part_compute is called with wrong array of threads. */
208 uint32_t checksum;
209 /* The actual result. */
210 float *distances;
211 /* Make it possible to create lists of sr_distances_part. */
212 struct sr_distances_part *next;
213};
214
219struct sr_distances_part *
220sr_distances_part_new(int m, int n, enum sr_distance_type dist_type,
221 int m_begin, int n_begin, size_t len);
222
236struct sr_distances_part *
237sr_distances_part_create(int m, int n, enum sr_distance_type dist_type,
238 unsigned nparts);
239
247void
249 struct sr_thread **threads);
250
259struct sr_distances *
261
269void
270sr_distances_part_free(struct sr_distances_part *part, bool follow_links);
271
272#ifdef __cplusplus
273}
274#endif
275
276#endif
struct sr_distances_part * sr_distances_part_create(int m, int n, enum sr_distance_type dist_type, unsigned nparts)
struct sr_distances * sr_distances_dup(struct sr_distances *distances)
void sr_distances_part_compute(struct sr_distances_part *part, struct sr_thread **threads)
struct sr_distances * sr_distances_part_merge(struct sr_distances_part *parts)
struct sr_distances * sr_distances_new(int m, int n)
struct sr_distances * sr_threads_compare(struct sr_thread **threads, int m, int n, enum sr_distance_type dist_type)
struct sr_distances_part * sr_distances_part_new(int m, int n, enum sr_distance_type dist_type, int m_begin, int n_begin, size_t len)
void sr_distances_set_distance(struct sr_distances *distances, int i, int j, float d)
void sr_distances_free(struct sr_distances *distances)
void sr_distances_part_free(struct sr_distances_part *part, bool follow_links)
float sr_distances_get_distance(struct sr_distances *distances, int i, int j)
A part of a distance matrix to be computed (possibly in different threads/processes and even differen...
Definition distance.h:195
A distance matrix of stack trace threads.
Definition distance.h:101