OpenCog Framework  Branch: master, revision 6f0b7fc776b08468cf1b74aa9db028f387b4f0c0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
InitiateSearchCB.cc
Go to the documentation of this file.
1 /*
2  * InitiateSearchCB.cc
3  *
4  * Copyright (C) 2015 Linas Vepstas
5  *
6  * Author: Linas Vepstas <linasvepstas@gmail.com> April 2015
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License v3 as
10  * published by the Free Software Foundation and including the exceptions
11  * at http://opencog.org/wiki/Licenses
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU Affero General Public License
19  * along with this program; if not, write to:
20  * Free Software Foundation, Inc.,
21  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22  */
23 
26 
27 #include "InitiateSearchCB.h"
28 #include "PatternMatchEngine.h"
29 
30 using namespace opencog;
31 
32 // Uncomment below to enable debug print
33 // #define DEBUG
34 #ifdef DEBUG
35 #define dbgprt(f, varargs...) printf(f, ##varargs)
36 #else
37 #define dbgprt(f, varargs...)
38 #endif
39 
40 /* ======================================================== */
41 
43  _classserver(classserver()),
44  _variables(NULL),
45  _pattern(NULL),
46  _type_restrictions(NULL),
47  _dynamic(NULL),
48  _as(as)
49 {
50 }
51 
53  const Pattern& pat)
54 {
55  _search_fail = false;
56  _variables = &vars;
57  _pattern = &pat;
60 }
61 
62 
63 /* ======================================================== */
64 
65 // Find a good place to start the search.
66 //
67 // The handle h points to a clause. In principle, it is enough to
68 // simply find a constant in the clause, and just start there. In
69 // practice, this can be an awful way to do things. So, for example,
70 // most "typical" clauses will be of the form
71 //
72 // EvaluationLink
73 // PredicateNode "blah"
74 // ListLink
75 // VariableNode $var
76 // ConceptNode "item"
77 //
78 // Typically, the incoming set for "blah" will be huge, so starting the
79 // search there would be a poor choice. Typically, the incoming set to
80 // "item" will be much smaller, and so makes a better choice. The code
81 // below tries to pass over "blah" and pick "item" instead. It does so
82 // by comparing the size of the incoming sets of the two constants, and
83 // picking the one with the smaller ("thinner") incoming set. Note that
84 // this is a form of "greedy" search.
85 //
86 // Atoms that are inside of dynamically-evaluatable terms are not
87 // considered. That's because groundings for such terms might not exist
88 // in the atomspace, so a search that starts there is doomed to fail.
89 //
90 // Note that the algo explores the clause to its greatest depth. That's
91 // OK, because typical clauses are never very deep.
92 //
93 // A variant of this algo could incorporate the Attentional focus
94 // into the "thinnest" calculation, so that only high-AF atoms are
95 // considered.
96 //
97 // Note that the size of the incoming set really is a better measure,
98 // and not the depth. So, for example, if "item" has a huge incoming
99 // set, but "blah" does not, then "blah" is a much better place to
100 // start.
101 //
102 // size_t& depth will be set to the depth of the thinnest constant found.
103 // Handle& start will be set to the link containing that constant.
104 // size_t& width will be set to the incoming-set size of the thinnest
105 // constant found.
106 // The returned value will be the constant at which to start the search.
107 // If no constant is found, then the returned value is the undefnied
108 // handle.
109 //
110 Handle
111 InitiateSearchCB::find_starter(const Handle& h, size_t& depth,
112  Handle& start, size_t& width)
113 {
114  // If its a node, then we are done. Don't modify either depth or
115  // start.
116  Type t = h->getType();
117  if (_classserver.isNode(t)) {
118  if (t != VARIABLE_NODE) {
119  width = h->getIncomingSetSize();
120  return h;
121  }
122  return Handle::UNDEFINED;
123  }
124 
125  // Ignore all ChoiceLink's. Picking a starter inside one of these
126  // will almost surely be disconnected from the rest of the graph.
127  if (CHOICE_LINK == t)
128  return Handle::UNDEFINED;
129 
130  // Ignore all dynamically-evaluatable links up front.
131  if (_dynamic->find(h) != _dynamic->end())
132  return Handle::UNDEFINED;
133 
134  // Iterate over all the handles in the outgoing set.
135  // Find the deepest one that contains a constant, and start
136  // the search there. If there are two at the same depth,
137  // then start with the skinnier one.
138  size_t deepest = depth;
139  start = Handle::UNDEFINED;
140  Handle hdeepest(Handle::UNDEFINED);
141  size_t thinnest = SIZE_MAX;
142 
143  LinkPtr ll(LinkCast(h));
144  for (Handle hunt : ll->getOutgoingSet())
145  {
146  size_t brdepth = depth + 1;
147  size_t brwid = SIZE_MAX;
148  Handle sbr(h);
149 
150  // Blow past the QuoteLinks, since they just screw up the search start.
151  if (QUOTE_LINK == hunt->getType())
152  hunt = LinkCast(hunt)->getOutgoingAtom(0);
153 
154  Handle s(find_starter(hunt, brdepth, sbr, brwid));
155 
156  if (s != Handle::UNDEFINED
157  and (brwid < thinnest
158  or (brwid == thinnest and deepest < brdepth)))
159  {
160  deepest = brdepth;
161  hdeepest = s;
162  start = sbr;
163  thinnest = brwid;
164  }
165 
166  }
167  depth = deepest;
168  width = thinnest;
169  return hdeepest;
170 }
171 
172 /* ======================================================== */
179  const std::set<Handle>& evl,
180  Handle& starter_term,
181  size_t& bestclause)
182 {
183  size_t thinnest = SIZE_MAX;
184  size_t deepest = 0;
185  bestclause = 0;
186  Handle best_start(Handle::UNDEFINED);
187  starter_term = Handle::UNDEFINED;
188 
189  size_t nc = clauses.size();
190  for (size_t i=0; i < nc; i++)
191  {
192  // Cannot start with an evaluatable clause!
193  if (0 < evl.count(clauses[i])) continue;
194  Handle h(clauses[i]);
195  size_t depth = 0;
196  size_t width = SIZE_MAX;
198  Handle start(find_starter(h, depth, term, width));
199  if (start != Handle::UNDEFINED
200  and (width < thinnest
201  or (width == thinnest and depth > deepest)))
202  {
203  thinnest = width;
204  deepest = depth;
205  bestclause = i;
206  best_start = start;
207  starter_term = term;
208  }
209  }
210 
211  return best_start;
212 }
213 
214 /* ======================================================== */
235 {
236  // Sometimes, the number of mandatory clauses can be zero...
237  // We still want to search, though.
238  const HandleSeq& clauses =
239  (0 < _pattern->mandatory.size()) ?
241 
242  // In principle, we could start our search at some node, any node,
243  // that is not a variable. In practice, the search begins by
244  // iterating over the incoming set of the node, and so, if it is
245  // large, a huge amount of effort might be wasted exploring
246  // dead-ends. Thus, it pays off to start the search on the
247  // node with the smallest ("narrowest" or "thinnest") incoming set
248  // possible. Thus, we look at all the clauses, to find the
249  // "thinnest" one.
250  //
251  // Note also: the user is allowed to specify patterns that have
252  // no constants in them at all. In this case, the search is
253  // performed by looping over all links of the given types.
254  size_t bestclause;
255  Handle best_start = find_thinnest(clauses, _pattern->evaluatable_holders,
256  _starter_term, bestclause);
257 
258  // Cannot find a starting point! This can happen if all of the
259  // clauses contain nothing but variables, or if all of the
260  // clauses are evaluatable(!) Somewhat unusual, but it can
261  // happen. In this case, we need some other, alternative search
262  // strategy.
263  if (Handle::UNDEFINED == best_start)
264  {
265  _search_fail = true;
266  return false;
267  }
268 
269  _root = clauses[bestclause];
270  dbgprt("Search start node: %s\n", best_start->toShortString().c_str());
271  dbgprt("Start term is: %s\n", _starter_term == Handle::UNDEFINED ?
272  "UNDEFINED" : _starter_term->toShortString().c_str());
273  dbgprt("Root clause is: %s\n", _root->toShortString().c_str());
274 
275  // This should be calling the over-loaded virtual method
276  // get_incoming_set(), so that, e.g. it gets sorted by attentional
277  // focus in the AttentionalFocusCB class...
278  IncomingSet iset = get_incoming_set(best_start);
279  size_t sz = iset.size();
280  for (size_t i = 0; i < sz; i++)
281  {
282  Handle h(iset[i]);
283  dbgprt("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n");
284  dbgprt("Loop candidate (%lu/%lu):\n%s\n", i+1, sz,
285  h->toShortString().c_str());
286  bool found = pme->explore_neighborhood(_root, _starter_term, h);
287 
288  // Terminate search if satisfied.
289  if (found) return true;
290  }
291 
292  // If we are here, we have searched the entire neighborhood, and
293  // no satisfiable groundings were found.
294  return false;
295 }
296 
297 /* ======================================================== */
387 {
388  dbgprt("Attempt to use node-neighbor search\n");
389  _search_fail = false;
390  bool found = neighbor_search(pme);
391  if (found) return true;
392  if (not _search_fail) return false;
393 
394  // If we are here, then we could not find a clause at which to
395  // start, which can happen if the clauses hold no variables, and
396  // they are all evaluatable. This can happen for sequence links;
397  // we want to quickly rule out this case before moving to more
398  // complex searches, below.
399  dbgprt("Cannot use node-neighbor search, use no-var search\n");
400  _search_fail = false;
401  found = no_search(pme);
402  if (found) return true;
403  if (not _search_fail) return false;
404 
405  // If we are here, then we could not find a clause at which to
406  // start, which can happen if the clauses consist entirely of
407  // variables! Which can happen (there is a unit test for this,
408  // the LoopUTest), and so instead, we search based on the link
409  // types that occur in the atomspace.
410  dbgprt("Cannot use no-var search, use link-type search\n");
411  _search_fail = false;
412  found = link_type_search(pme);
413  if (found) return true;
414  if (not _search_fail) return false;
415 
416  // The URE Reasoning case: if we found nothing, then there are no
417  // links! Ergo, every clause must be a lone variable, all by
418  // itself. This is how some URE rules may start: the specify a single
419  // variable, all by itself, and set some type restrictions on it,
420  // and that's all. We deal with this in the variable_search()
421  // method.
422  dbgprt("Cannot use link-type search, use variable-type search\n");
423  _search_fail = false;
424  found = variable_search(pme);
425  return found;
426 }
427 
428 /* ======================================================== */
435  Handle& rarest,
436  size_t& count)
437 {
438  Type t = clause->getType();
439  if (QUOTE_LINK == t) return;
440  if (CHOICE_LINK == t) return;
441 
442  LinkPtr lll(LinkCast(clause));
443  if (NULL == lll) return;
444 
445  size_t num = (size_t) _as->get_num_atoms_of_type(t);
446  if (num < count)
447  {
448  count = num;
449  rarest = clause;
450  }
451 
452  const HandleSeq& oset = lll->getOutgoingSet();
453  for (const Handle& h : oset)
454  find_rarest(h, rarest, count);
455 }
456 
457 /* ======================================================== */
465 {
466  const HandleSeq& clauses = _pattern->mandatory;
467 
468  _search_fail = false;
471  size_t count = SIZE_MAX;
472 
473  for (const Handle& cl: clauses)
474  {
475  // Evaluatables don't exist in the atomspace, in general.
476  // Cannot start a search with them.
477  if (0 < _pattern->evaluatable_holders.count(cl)) continue;
478  size_t prev = count;
479  find_rarest(cl, _starter_term, count);
480  if (count < prev)
481  {
482  prev = count;
483  _root = cl;
484  }
485  }
486 
487  // The URE Reasoning case: if we found nothing, then there are no
488  // links! Ergo, every clause must be a lone variable, all by
489  // itself. This is how some URE rules may start: the specify a single
490  // variable, all by itself, and set some type restrictions on it,
491  // and that's all. We deal with this in the variable_search()
492  // method.
493  if (Handle::UNDEFINED == _root)
494  {
495  _search_fail = true;
496  return false;
497  }
498 
499  dbgprt("Start clause is: %s\n", _root->toShortString().c_str());
500  dbgprt("Start term is: %s\n", _starter_term->toShortString().c_str());
501 
502  // Get type of the rarest link
503  Type ptype = _starter_term->getType();
504 
505  HandleSeq handle_set;
506  _as->get_handles_by_type(handle_set, ptype);
507 
508 #ifdef DEBUG
509  size_t i = 0;
510 #endif
511  for (const Handle& h : handle_set)
512  {
513  dbgprt("yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\n");
514  dbgprt("Loop candidate (%lu/%lu):\n%s\n", ++i, handle_set.size(),
515  h->toShortString().c_str());
516  bool found = pme->explore_neighborhood(_root, _starter_term, h);
517  if (found) return true;
518  }
519  return false;
520 }
521 
522 /* ======================================================== */
536 {
537  const HandleSeq& clauses = _pattern->mandatory;
538 
539  // Find the rarest variable type;
540  size_t count = SIZE_MAX;
541  Type ptype = ATOM;
542 
543  dbgprt("varset size = %lu\n", _variables->varset.size());
546  for (const Handle& var: _variables->varset)
547  {
548  dbgprt("Examine variable %s\n", var->toShortString().c_str());
549  auto tit = _type_restrictions->find(var);
550  if (_type_restrictions->end() == tit) continue;
551  const std::set<Type>& typeset = tit->second;
552  dbgprt("Type-restictions set size = %lu\n", typeset.size());
553  for (Type t : typeset)
554  {
555  size_t num = (size_t) _as->get_num_atoms_of_type(t);
556  dbgprt("Type = %s has %lu atoms in the atomspace\n",
557  _classserver.getTypeName(t).c_str(), num);
558  if (0 < num and num < count)
559  {
560  for (const Handle& cl : clauses)
561  {
562  // Evaluatables dont' exist in the atomspace, in general.
563  // Cannot start a search with them.
564  if (0 < _pattern->evaluatable_holders.count(cl)) continue;
565  FindAtoms fa(var);
566  fa.search_set(cl);
567  if (cl == var)
568  {
569  _root = cl;
570  _starter_term = cl;
571  count = num;
572  ptype = t;
573  dbgprt("New minimum count of %lu\n", count);
574  break;
575  }
576  if (0 < fa.least_holders.size())
577  {
578  _root = cl;
579  _starter_term = *fa.least_holders.begin();
580  count = num;
581  ptype = t;
582  dbgprt("New minimum count of %lu (nonroot)\n", count);
583  break;
584  }
585  }
586  }
587  }
588  }
589 
590  // There were no type restrictions!
591  if (Handle::UNDEFINED == _root)
592  {
593  dbgprt("There were no type restrictions! That must be wrong!\n");
594  if (0 == clauses.size())
595  {
596  // This is kind-of weird, it can happen if all clauses
597  // are optional.
598  _search_fail = true;
599  return false;
600  }
601  _root = _starter_term = clauses[0];
602  }
603 
604  HandleSeq handle_set;
605  if (ptype == ATOM)
606  _as->get_handles_by_type(handle_set, ptype, true);
607  else
608  _as->get_handles_by_type(handle_set, ptype);
609 
610  dbgprt("Atomspace reported %lu atoms\n", handle_set.size());
611 
612 #ifdef DEBUG
613  size_t i = 0;
614 #endif
615  for (const Handle& h : handle_set)
616  {
617  dbgprt("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz\n");
618  dbgprt("Loop candidate (%lu/%lu):\n%s\n", ++i, handle_set.size(),
619  h->toShortString().c_str());
620  bool found = pme->explore_neighborhood(_root, _starter_term, h);
621  if (found) return true;
622  }
623 
624  return false;
625 }
626 
627 /* ======================================================== */
640 {
641  if (0 < _variables->varset.size() or
642  1 != _pattern->mandatory.size())
643  {
644  _search_fail = true;
645  return false;
646  }
647 
648  // The one-and-only clause must be evaluatable!
649  const HandleSeq& clauses = _pattern->mandatory;
650  const std::set<Handle>& evl = _pattern->evaluatable_holders;
651  if (0 == evl.count(clauses[0]))
652  {
653  _search_fail = true;
654  return false;
655  }
656 
657  dbgprt("wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww\n");
658  dbgprt("Non-search: no variables, no non-evaluatable clauses\n");
659  _root = _starter_term = clauses[0];
660  bool found = pme->explore_neighborhood(_root, _starter_term, _root);
661  return found;
662 }
663 
664 /* ===================== END OF FILE ===================== */
virtual Handle find_thinnest(const HandleSeq &, const std::set< Handle > &, Handle &, size_t &)
bool explore_neighborhood(const Handle &, const Handle &, const Handle &)
virtual bool neighbor_search(PatternMatchEngine *)
std::vector< Handle > HandleSeq
a list of handles
Definition: Handle.h:246
std::set< Handle > evaluatable_holders
Definition: Pattern.h:121
std::set< Handle > varset
Definition: Pattern.h:65
bool isNode(Type t)
Definition: ClassServer.h:182
std::set< Handle > evaluatable_terms
Definition: Pattern.h:120
virtual std::string toShortString(std::string indent="")=0
Type getType() const
Definition: Atom.h:197
HandleSeq cnf_clauses
Definition: Pattern.h:102
std::shared_ptr< Link > LinkPtr
Definition: Atom.h:53
void get_handles_by_type(HandleSeq &appendToHandles, Type type, bool subclass=false) const
Definition: AtomSpace.h:392
ClassServer & classserver(ClassServerFactory *=ClassServer::createInstance)
Definition: ClassServer.cc:159
virtual IncomingSet get_incoming_set(const Handle &h)
const Variables * _variables
virtual bool link_type_search(PatternMatchEngine *)
static const Handle UNDEFINED
Definition: Handle.h:77
const VariableTypeMap * _type_restrictions
virtual void find_rarest(const Handle &, Handle &, size_t &)
const std::set< Handle > * _dynamic
std::set< Handle > least_holders
Definition: FindUtils.h:90
int get_num_atoms_of_type(Type type, bool subclass=false) const
Definition: AtomSpace.h:93
const std::string & getTypeName(Type type)
Definition: ClassServer.cc:148
virtual bool no_search(PatternMatchEngine *)
static LinkPtr LinkCast(const Handle &h)
Definition: Link.h:263
HandleSeq mandatory
Definition: Pattern.h:105
std::vector< LinkPtr > IncomingSet
Definition: Atom.h:55
virtual Handle find_starter(const Handle &, size_t &, Handle &, size_t &)
unsigned short Type
type of Atoms, represented as short integer (16 bits)
Definition: types.h:40
size_t getIncomingSetSize()
Get the size of the incoming set.
Definition: Atom.cc:310
virtual bool variable_search(PatternMatchEngine *)
virtual bool initiate_search(PatternMatchEngine *)
void search_set(const Handle &h)
Definition: FindUtils.h:128
#define dbgprt(f, varargs...)
VariableTypeMap typemap
Definition: Pattern.h:66
virtual void set_pattern(const Variables &, const Pattern &)