OpenCog Framework  Branch: master, revision 6f0b7fc776b08468cf1b74aa9db028f387b4f0c0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
FuzzyPatternMatchCB.cc
Go to the documentation of this file.
1 /*
2  * FuzzyPatternMatchCB.cc
3  *
4  * Copyright (C) 2015 OpenCog Foundation
5  *
6  * Author: Leung Man Hin <https://github.com/leungmanhin>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License v3 as
10  * published by the Free Software Foundation and including the exceptions
11  * at http://opencog.org/wiki/Licenses
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program; if not, write to:
20  * Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22  */
23 
25 #include "FuzzyPatternMatchCB.h"
26 
27 using namespace opencog;
28 
29 //#define DEBUG
30 
33  rtn_type(rt),
34  excl_list(excl)
35 {
36 }
37 
49 void FuzzyPatternMatchCB::find_starters(const Handle& hp, const size_t& depth,
50  const size_t& clause_idx,
51  const Handle& term,
52  std::vector<Starter>& rtn)
53 {
54  // Traverse its outgoing set if it is a link
55  LinkPtr lp(LinkCast(hp));
56  if (lp) {
57  for (Handle h : lp->getOutgoingSet())
58  find_starters(h, depth + 1, clause_idx, hp, rtn);
59  }
60 
61  // Get the nodes that are not an instance nor a variable
62  else {
63  NodePtr np(NodeCast(hp));
64 
65  if (hp != Handle::UNDEFINED and np) {
66  pat_size++;
67 
68  if ((np->getType() != VARIABLE_NODE) and
69  (np->getName().find("@") == std::string::npos) and
70  (std::find(excl_list.begin(), excl_list.end(), hp) == excl_list.end())) {
71  Starter sn;
72  sn.uuid = hp.value();
73  sn.handle = hp;
74  sn.term = term;
75  sn.width = hp->getIncomingSetSize();
76  sn.depth = depth;
77 
78  rtn.push_back(sn);
79  }
80  }
81  }
82 }
83 
96 {
97  // Find starters from the clause
100 
101  // For removing duplicates, if any, form the list of starters,
102  // as we want to have a different starters for each of the searches
103  std::sort(starters.begin(), starters.end(),
104  [](const Starter& s1, const Starter& s2)
105  { return s1.uuid < s2.uuid; });
106  starters.erase(std::unique(starters.begin(), starters.end(),
107  [](const Starter& s1, const Starter& s2)
108  { return s1.uuid == s2.uuid; }),
109  starters.end());
110 
111  // Sort the starters according to their "width" and "depth"
112  auto sort_by_wd = [](const Starter& s1, const Starter& s2)
113  {
114  if (s1.width == s2.width) return s1.depth > s2.depth;
115  else return s1.width < s2.width;
116  };
117 
118  std::sort(starters.begin(), starters.end(), sort_by_wd);
119 
120  // Start the searches
121  size_t search_cnt = 0;
122  size_t num_starters = starters.size();
123  while (num_starters > search_cnt) {
124  const Handle& starter_term = starters[search_cnt].term;
125  const Handle& best_start = starters[search_cnt].handle;
126  search_cnt++;
127 
128 #ifdef DEBUG
129  std::cout << "\n========================================\n";
130  std::cout << "Initiating the fuzzy match... (" << search_cnt << "/"
131  << num_starters << ")\n";
132  std::cout << "Starter:\n" << best_start->toShortString() << "\n";
133  std::cout << "Start term:\n" << starter_term->toShortString();
134  std::cout << "========================================\n\n";
135 #endif
136 
137  IncomingSet iset = best_start->getIncomingSet();
138  size_t iset_size = iset.size();
139  for (size_t i = 0; i < iset_size; i++) {
140  Handle h(iset[i]);
141 
142 #ifdef DEBUG
143  std::cout << "Loop candidate (" << (i + 1) << "/" << iset_size << "):\n"
144  << h->toShortString() << "\n";
145 #endif
146 
147  pme->explore_neighborhood(clause, starter_term, h);
148  }
149  }
150 
151  // Let's end the search here if there are solutions, continue could be costly
152  if (solns.size() > 0) {
153  std::cout << "Fuzzy match is finished.\n";
154  return true;
155  }
156 
157  // Return false to use other methods to find matches
158  else return false;
159 }
160 
172 {
173  // Check if the potential solution is having the type that we are looking for
174  if (rtn_type and gl->getType() != rtn_type) return true;
175 
176  const Handle& gh = gl->getHandle();
177  UUID gid = gh.value();
178 
179  // Check if we have already compared the same one previously
180  if (std::find(prev_compared.begin(), prev_compared.end(), gid) == prev_compared.end()) {
181  if (pat_nodes.empty()) pat_nodes = get_all_nodes(clause);
182  check_if_accept(gh);
183  prev_compared.push_back(gid);
184  }
185 
186  return true;
187 }
188 
198 {
199  // Get all the ndoes from the potential solution
200  HandleSeq gn = get_all_nodes(gh);
201 
202  // Reject if the potential solution contains any atoms that we want to exclude
203  for (const Handle& eh : excl_list) {
204  if (std::find(gn.begin(), gn.end(), eh) != gn.end()) {
205 #ifdef DEBUG
206  std::cout << "Rejecting:\n" << gh->toShortString()
207  << "due to the existance of:\n" << eh->toShortString()
208  << "\n";
209 #endif
210  return;
211  }
212  }
213 
214  // Find out how many nodes the potential solution has in common with the pattern
215  HandleSeq common_nodes;
216  std::sort(pat_nodes.begin(), pat_nodes.end());
217  std::sort(gn.begin(), gn.end());
218  std::set_intersection(pat_nodes.begin(), pat_nodes.end(), gn.begin(), gn.end(),
219  std::back_inserter(common_nodes));
220 
221  // The size different between the pattern and the potential solution
222  size_t diff = std::abs((int)(pat_size - gn.size()));
223 
224  double similarity = 0;
225 
226  for (const Handle& common_node : common_nodes) {
227  size_t iss;
228  auto it = in_set_sizes.find(common_node);
229  if (it == in_set_sizes.end()) {
230  iss = common_node->getIncomingSetSize();
231  in_set_sizes.insert({common_node, iss});
232  }
233  else iss = it->second;
234 
235  // Roughly estimate how "rare" the node is by using 1 / incoming set size
236  // TODO: May use Truth Value instead
237  similarity += 1.0 / iss;
238  }
239 
240 #ifdef DEBUG
241  std::cout << "\n========================================\n";
242  std::cout << "Compaing:\n" << clause->toShortString() << "--- and:\n"
243  << gh->toShortString() << "\n";
244  std::cout << "Common nodes = " << common_nodes.size() << "\n";
245  std::cout << "Size diff = " << diff << "\n";
246  std::cout << "Similarity = " << similarity << "\n";
247  std::cout << "Most similar = " << max_similarity << "\n";
248  std::cout << "========================================\n\n";
249 #endif
250 
251  // Decide if we should accept the potential solutions or not
252  if ((similarity > max_similarity) or
253  (similarity == max_similarity and diff < min_size_diff)) {
254  max_similarity = similarity;
255  min_size_diff = diff;
256  solns.clear();
257  solns.push_back(gh);
258  }
259 
260  else if (similarity == max_similarity and diff == min_size_diff) {
261  solns.push_back(gh);
262  }
263 }
std::vector< UUID > prev_compared
bool explore_neighborhood(const Handle &, const Handle &, const Handle &)
IncomingSet getIncomingSet()
Definition: Atom.cc:321
std::vector< Handle > HandleSeq
a list of handles
Definition: Handle.h:246
std::vector< Starter > starters
virtual std::string toShortString(std::string indent="")=0
Handle getHandle()
Definition: Atom.h:211
std::shared_ptr< Link > LinkPtr
Definition: Atom.h:53
static NodePtr NodeCast(const Handle &h)
Definition: Node.h:113
void check_if_accept(const Handle &gh)
static const Handle UNDEFINED
Definition: Handle.h:77
HandleSeq get_all_nodes(Handle h)
Definition: AtomUtils.cc:40
unsigned long UUID
UUID == Universally Unique Identifier.
Definition: Handle.h:46
std::unordered_map< Handle, size_t > in_set_sizes
static LinkPtr LinkCast(const Handle &h)
Definition: Link.h:263
HandleSeq mandatory
Definition: Pattern.h:105
std::vector< LinkPtr > IncomingSet
Definition: Atom.h:55
std::shared_ptr< Node > NodePtr
Definition: Node.h:112
UUID value(void) const
Definition: Handle.h:85
unsigned short Type
type of Atoms, represented as short integer (16 bits)
Definition: types.h:40
size_t getIncomingSetSize()
Get the size of the incoming set.
Definition: Atom.cc:310
virtual bool link_match(const LinkPtr &pLink, const LinkPtr &gLink)
void find_starters(const Handle &hg, const size_t &depth, const size_t &clause_idx, const Handle &term, std::vector< Starter > &rtn)
FuzzyPatternMatchCB(AtomSpace *, Type, const HandleSeq &)
virtual bool initiate_search(PatternMatchEngine *pme)