Arkanjo 0.1
A tool for find code duplicated functions in codebases
Loading...
Searching...
No Matches
similarity_table.hpp
Go to the documentation of this file.
1
12
13#ifndef SIMILARITY_TABLE_HPP
14#define SIMILARITY_TABLE_HPP
15
16#include <bits/stdc++.h>
17
18#include "path.hpp"
19#include "utils.hpp"
20#include "function.hpp"
21using namespace std;
22
30 private:
31 string SIMILARITY_TABLE_FILE_NAME = "tmp/output_parsed.txt";
32 double DEFAULT_SIMILARITY = 100.00;
33 double EPS_ERROR_MARGIN = 1e-6;
34 double MAXIMUM_SIMILARITY = 100.00;
35 double MINIMUM_SIMILARITY = 0.00;
36
37 double similarity_threshold;
38 vector<Path> paths;
39 map<Path,int> path_id;
40 vector<vector<pair<int,double>>> similarity_graph;
41 map<pair<int,int>,double> similarity_table;
42
48 int find_id_path(Path path);
49
54 void read_comparation(ifstream &table_file);
55
60 void read_file_table(ifstream &table_file);
61
65 void init_similarity_table();
66
72 bool is_above_threshold(double similarity);
73
79 vector<tuple<int,Path,Path>> sort_pairs_by_line_number(vector<pair<Path,Path>> similar_path_pairs);
80
81 public:
86 Similarity_Table(double _similarity_threshold);
87
92
97 void update_similarity(double new_similarity_threshold);
98
105 double get_similarity(Path path1, Path path2);
106
113 double is_similar(Path path1, Path path2);
114
119 vector<Path> get_path_list();
120
126 vector<Path> get_similar_path_to_the_reference(Path reference);
127
132 vector<tuple<double,Path,Path>> get_all_path_pairs_and_similarity_sorted_by_similarity();
133
138 vector<pair<Path,Path>> get_all_similar_path_pairs_sorted_by_similarity();
139
144 vector<pair<Path,Path>> get_all_similar_path_pairs_sorted_by_line_number();
145};
146
147#endif
Path manipulation class for tool-specific directory structure.
Definition path.hpp:24
vector< pair< Path, Path > > get_all_similar_path_pairs_sorted_by_line_number()
Gets all similar path pairs, sorted by line count.
Similarity_Table(double _similarity_threshold)
Constructs with custom similarity threshold.
void update_similarity(double new_similarity_threshold)
Updates similarity threshold.
vector< Path > get_path_list()
Gets list of all known paths.
vector< Path > get_similar_path_to_the_reference(Path reference)
Gets paths similar to reference path.
double is_similar(Path path1, Path path2)
Checks if two paths are similar.
vector< tuple< double, Path, Path > > get_all_path_pairs_and_similarity_sorted_by_similarity()
Gets all similar path pairs with scores, sorted.
Similarity_Table()
Constructs with default similarity threshold.
double get_similarity(Path path1, Path path2)
Gets similarity between two paths.
vector< pair< Path, Path > > get_all_similar_path_pairs_sorted_by_similarity()
Gets all similar path pairs, sorted by similarity.
Function abstraction for temporary codebase.
Path abstraction for temporary codebase.
Defines utility functions used across all files.