OpenCog Framework  Branch: master, revision 6f0b7fc776b08468cf1b74aa9db028f387b4f0c0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
PatternLink.cc
Go to the documentation of this file.
1 /*
2  * PatternLink.cc
3  *
4  * Copyright (C) 2009, 2014, 2015 Linas Vepstas
5  *
6  * Author: Linas Vepstas <linasvepstas@gmail.com> January 2009
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU Affero General Public License v3 as
10  * published by the Free Software Foundation and including the
11  * exceptions
12  * at http://opencog.org/wiki/Licenses
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU Affero General Public
20  * License
21  * along with this program; if not, write to:
22  * Free Software Foundation, Inc.,
23  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24  */
25 
26 #include <opencog/util/Logger.h>
29 #include <opencog/atomspace/Node.h>
31 
32 #include "PatternLink.h"
33 #include "PatternUtils.h"
34 #include "VariableList.h"
35 
36 using namespace opencog;
37 
39 {
42 
43  // Locate the black-box clauses.
46  _num_virts = _virtual.size();
47 
48  // unbundle_virtual does not handle connectives. Here, we assume that
49  // we are being run with the DefaultPatternMatchCB, and so we assume
50  // that the logical connectives are AndLink, OrLink and NotLink.
51  // Tweak the evaluatable_holders to reflect this.
52  std::set<Type> connectives({AND_LINK, OR_LINK, NOT_LINK});
53  trace_connectives(connectives, _pat.clauses);
54 
55  // Split the non-virtual clauses into connected components
58  _num_comps = _components.size();
59 
60  // If there is only one connected component, then this can be
61  // handled during search by a single PatternLink. The multi-clause
62  // grounding mechanism is not required for that case.
63  if (1 == _num_comps)
64  {
65  // Each component is in connection-order. By re-assigning to
66  // _pat.cnf_clauses, they get placed in that order, this giving
67  // a minor performance boost during clause traversal.
68  // Gurk. This does not work currently; the evaluatables have been
69  // stripped out of the component. I think this is a bug ...
70  // Is this related to the other XXX for validate_clasues??
71  // _pat.cnf_clauses = _components[0];
73  }
74 
76 }
77 
78 
81 {
82  // If we are here, then set up a PatternLink for each connected
83  // component.
84  //
85  // There is a pathological case where there are no virtuals, but
86  // there are multiple disconnected components. I think that this is
87  // a user-error, but in fact PLN does have a rule which wants to
88  // explore that combinatoric explosion, on purpose. So we have to
89  // allow the multiple disconnected components for that case.
91  for (size_t i=0; i<_num_comps; i++)
92  {
95  _component_patterns.push_back(h);
96  }
97 }
98 
100 {
101  _pat.redex_name = "anonymous PatternLink";
104  common_init();
106 }
107 
108 /* ================================================================= */
109 
113 PatternLink::PatternLink(const std::set<Handle>& vars,
114  const VariableTypeMap& typemap,
115  const HandleSeq& compo,
116  const std::set<Handle>& opts)
117  : Link(PATTERN_LINK, HandleSeq())
118 {
119  // First, lets deal with the vars. We have discarded the original
120  // order of the variables, and I think that's OK, because we will
121  // never have the substitute aka beta-redex aka putlink method
122  // called on us, not directly, at least. If we need it, then the
123  // API will need to be changed...
124  // So all we need is the varset, and the subset of the typemap.
125  _varlist.varset = vars;
126  for (const Handle& v : vars)
127  {
128  auto it = typemap.find(v);
129  if (it != typemap.end())
130  _varlist.typemap.insert(*it);
131  }
132 
133  // Next, the body... there no _body for lambda. The compo is the
134  // _cnf_clauses; we have to reconstruct the optionals. We cannot
135  // use extract_optionals because opts have been stripped already.
136 
137  _pat.cnf_clauses = compo;
138  for (const Handle& h : compo)
139  {
140  bool h_is_opt = false;
141  for (const Handle& opt : opts)
142  {
143  if (is_atom_in_tree(opt, h))
144  {
145  _pat.optionals.insert(opt);
146  _pat.clauses.push_back(opt);
147  h_is_opt = true;
148  break;
149  }
150  }
151  if (not h_is_opt)
152  _pat.mandatory.push_back(h);
153  }
154 
155  // The rest is easy: the evaluatables and the connection map
158  _num_virts = _virtual.size();
159  OC_ASSERT (0 == _num_virts, "Must not have any virtuals!");
160 
161  _components.push_back(compo);
162  _num_comps = 1;
163 
165  _pat.redex_name = "Unpacked component of a virtual link";
166 
167  make_term_trees();
168 }
169 
170 /* ================================================================= */
171 
176 PatternLink::PatternLink(const std::set<Handle>& vars,
177  const HandleSeq& clauses)
178  : Link(PATTERN_LINK, HandleSeq())
179 {
180  _varlist.varset = vars;
181  _pat.clauses = clauses;
182  common_init();
184 }
185 
186 /* ================================================================= */
187 
190  : Link(PATTERN_LINK, hseq, tv, av)
191 {
192  init();
193 }
194 
197  : Link(PATTERN_LINK, HandleSeq({body}), tv, av)
198 {
199  init();
200 }
201 
202 PatternLink::PatternLink(const Handle& vars, const Handle& body,
204  : Link(PATTERN_LINK, HandleSeq({vars, body}), tv, av)
205 {
206  init();
207 }
208 
211  : Link(t, hseq, tv, av)
212 {
213  // BindLink has other init sequences
214  if (BIND_LINK == t) return;
215  init();
216 }
217 
219  : Link(l)
220 {
221  // Type must be as expected
222  Type tscope = l.getType();
223  if (not classserver().isA(tscope, PATTERN_LINK))
224  {
225  const std::string& tname = classserver().getTypeName(tscope);
226  throw InvalidParamException(TRACE_INFO,
227  "Expecting a PatternLink, got %s", tname.c_str());
228  }
229 
230  // BindLink has other init sequences
231  if (BIND_LINK == tscope) return;
232 
233  init();
234 }
235 
236 
237 /* ================================================================= */
246 {
247  size_t sz = oset.size();
248  if (2 < sz)
249  throw InvalidParamException(TRACE_INFO,
250  "Expecting an outgoing set size of at most two, got %d", sz);
251 
252  // If the outgoing set size is one, then there are no variable
253  // declarations; extract all free variables.
254  if (1 == sz)
255  {
256  _body = oset[0];
257 
258  // Use the FreeLink class to find all the variables;
259  // Use the VariableList class for build the Variables struct.
260  FreeLink fl(oset[0]);
261  VariableList vl(fl.get_vars());
262  _varlist = vl.get_variables();
263  return;
264  }
265 
266  // If we are here, then the first outgoing set member should be
267  // a variable declaration.
268  _body = oset[1];
269 
270  // Initialize _varlist with the scoped variables
271  init_scoped_variables(oset[0]);
272 }
273 
274 /* ================================================================= */
280 {
281  // Either it is a VariableList, or its a naked variable, or its a
282  // typed variable. Use the VariableList class as a tool to
283  // extract the variables for us.
284  Type t = hvar->getType();
285  if (VARIABLE_LIST == t)
286  {
287  VariableList vl(LinkCast(hvar)->getOutgoingSet());
288  _varlist = vl.get_variables();
289  }
290  else
291  {
292  VariableList vl({hvar});
293  _varlist = vl.get_variables();
294  }
295 }
296 
297 /* ================================================================= */
310 {
311  Type t = hbody->getType();
312  if (AND_LINK == t)
313  {
314  _pat.clauses = LinkCast(hbody)->getOutgoingSet();
315  }
316  else
317  {
318  // There's just one single clause!
319  _pat.clauses.push_back(hbody);
320  }
321 }
322 
323 /* ================================================================= */
333 void PatternLink::validate_clauses(std::set<Handle>& vars,
334  HandleSeq& clauses)
335 
336 {
337  // The Fuzzy matcher does some strange things: it declares no
338  // vars at all, only clauses, and then uses the pattern matcher
339  // to automatically explore nearby atoms. As a result, all of
340  // its clauses are "constant", and we allow this special case.
341  // Need to review the rationality of this design...
342  if (0 < vars.size())
343  {
344  // Make sure that the user did not pass in bogus clauses.
345  // Make sure that every clause contains at least one variable.
346  // The presence of constant clauses will mess up the current
347  // pattern matcher. Constant clauses are "trivial" to match,
348  // and so its pointless to even send them through the system.
349  bool bogus = remove_constants(vars, clauses);
350  if (bogus)
351  {
352  logger().warn("%s: Constant clauses removed from pattern",
353  __FUNCTION__);
354  }
355  }
356 
357  // Make sure that each declared variable appears in some clause.
358  // We won't (can't) ground variables that don't show up in a
359  // clause. They are presumably there due to programmer error.
360  // Quoted variables are constants, and so don't count.
361  //
362  // XXX Well, we could throw, here, but sureal gives us spurious
363  // variables, so instead of throwing, we just discard them and
364  // print a warning.
365  for (const Handle& v : vars)
366  {
367  if (not is_unquoted_in_any_tree(clauses, v))
368  {
369 /*
370  logger().warn(
371  "%s: The variable %s does not appear (unquoted) in any clause!",
372  __FUNCTION__, v->toShortString().c_str());
373 */
374  vars.erase(v);
375  throw InvalidParamException(TRACE_INFO,
376  "The variable %s does not appear (unquoted) in any clause!",
377  v->toShortString().c_str());
378  }
379  }
380 
381  // The above 1-2 combination of removing constant clauses, and
382  // removing variables, can result in an empty body. That surely
383  // warrants a throw, and BuggyQuoteUTest is expecting one.
384  if (0 == vars.size() and 0 == clauses.size())
385  throw InvalidParamException(TRACE_INFO,
386  "No variable appears (unquoted) anywhere in any clause!");
387 }
388 
389 /* ================================================================= */
394 void PatternLink::extract_optionals(const std::set<Handle> &vars,
395  const std::vector<Handle> &component)
396 {
397  // Split in positive and negative clauses
398  for (const Handle& h : component)
399  {
400  Type t = h->getType();
401  if (ABSENT_LINK == t)
402  {
403  LinkPtr lopt(LinkCast(h));
404 
405  // We insist on an arity of 1, because anything else is
406  // ambiguous: consider not(A B) is that (not(A) and not(B))
407  // or is it (not(A) or not(B))?
408  if (1 != lopt->getArity())
409  throw InvalidParamException(TRACE_INFO,
410  "NotLink and AbsentLink can have an arity of one only!");
411 
412  const Handle& inv(lopt->getOutgoingAtom(0));
413  _pat.optionals.insert(inv);
414  _pat.cnf_clauses.push_back(inv);
415  }
416  else
417  {
418  _pat.mandatory.push_back(h);
419  _pat.cnf_clauses.push_back(h);
420  }
421  }
422 }
423 
424 /* ================================================================= */
425 
426 /* utility -- every variable in the key term will get the value. */
427 static void add_to_map(std::unordered_multimap<Handle, Handle>& map,
428  const Handle& key, const Handle& value)
429 {
430  if (key->getType() == VARIABLE_NODE) map.insert({key, value});
431  LinkPtr lll(LinkCast(key));
432  if (NULL == lll) return;
433  const HandleSeq& oset = lll->getOutgoingSet();
434  for (const Handle& ho : oset) add_to_map(map, ho, value);
435 }
436 
481 void PatternLink::unbundle_virtual(const std::set<Handle>& vars,
482  const HandleSeq& clauses,
483  HandleSeq& fixed_clauses,
484  HandleSeq& virtual_clauses,
485  std::set<Handle>& black_clauses)
486 {
487  for (const Handle& clause: clauses)
488  {
489  bool is_virtual = false;
490  bool is_black = false;
491 
492 #ifdef BORKEN_DOESNT_WORK
493 // The below should have worked to set things up, but it doesn't,
494 // and I'm too lazy to investigate, because an alternate hack is
495 // working, at the moment.
496  // If a clause is a variable, we have to make the worst-case
497  // assumption that it is evaulatable, so that we can evaluate
498  // it later.
499  if (VARIABLE_NODE == clause->getType())
500  {
501  _pat.evaluatable_terms.insert(clause);
502  add_to_map(_pat.in_evaluatable, clause, clause);
503  is_black = true;
504  }
505 #endif
506 
507  FindAtoms fgpn(GROUNDED_PREDICATE_NODE, true);
508  fgpn.search_set(clause);
509  for (const Handle& sh : fgpn.least_holders)
510  {
511  _pat.evaluatable_terms.insert(sh);
512  add_to_map(_pat.in_evaluatable, sh, sh);
513  // But they're virtual only if they have two or more
514  // unquoted, bound variables in them. Otherwise, they
515  // can be evaluated on the spot.
516  if (2 <= num_unquoted_in_tree(sh, vars))
517  {
518  is_virtual = true;
519  is_black = true;
520  }
521  }
522  for (const Handle& sh : fgpn.holders)
523  _pat.evaluatable_holders.insert(sh);
524 
525  // Subclasses of VirtualLink, e.g. GreaterThanLink, which
526  // are essentially a kind of EvaluationLink holding a GPN
527  FindAtoms fgtl(VIRTUAL_LINK, true);
528  fgtl.search_set(clause);
529  // Unlike the above, its varset, not least_holders...
530  // because its a link...
531  for (const Handle& sh : fgtl.varset)
532  {
533  _pat.evaluatable_terms.insert(sh);
534  _pat.evaluatable_holders.insert(sh);
535  add_to_map(_pat.in_evaluatable, sh, sh);
536  // But they're virtual only if they have two or more
537  // unquoted, bound variables in them. Otherwise, they
538  // can be evaluated on the spot. Virtuals are not black.
539  if (2 <= num_unquoted_in_tree(sh, vars))
540  is_virtual = true;
541  }
542  for (const Handle& sh : fgtl.holders)
543  _pat.evaluatable_holders.insert(sh);
544 
545  // Subclasses of ExecutionOutputLink, e.g. PlusLink,
546  // TimesLink are executable. They get treated by the
547  // same virtual-graph algo as the virtual links.
548  FindAtoms feol(EXECUTION_OUTPUT_LINK, true);
549  feol.search_set(clause);
550 
551  for (const Handle& sh : feol.varset)
552  {
553  _pat.executable_terms.insert(sh);
554  _pat.executable_holders.insert(sh);
555  add_to_map(_pat.in_executable, sh, sh);
556  // But they're virtual only if they have two or more
557  // unquoted, bound variables in them. Otherwise, they
558  // can be evaluated on the spot.
559  if (2 <= num_unquoted_in_tree(sh, vars))
560  {
561  is_virtual = true;
562  is_black = true;
563  }
564  }
565  for (const Handle& sh : feol.holders)
566  _pat.executable_holders.insert(sh);
567 
568  if (is_virtual)
569  virtual_clauses.push_back(clause);
570  else
571  fixed_clauses.push_back(clause);
572 
573  if (is_black)
574  black_clauses.insert(clause);
575  }
576 }
577 
578 /* ================================================================= */
579 
588 void PatternLink::trace_connectives(const std::set<Type>& connectives,
589  const HandleSeq& oset)
590 {
591  for (const Handle& term: oset)
592  {
593  Type t = term->getType();
594  if (connectives.find(t) == connectives.end()) continue;
595  _pat.evaluatable_holders.insert(term);
596  add_to_map(_pat.in_evaluatable, term, term);
597  LinkPtr lp(LinkCast(term));
598  if (lp)
599  trace_connectives(connectives, lp->getOutgoingSet());
600  }
601 }
602 
603 /* ================================================================= */
614 {
615  for (const Handle& h : _pat.cnf_clauses)
616  {
617  make_map_recursive(h, h);
618  }
619 
620  // Save some minor amount of space by erasing those atoms that
621  // participate in only one clause. These atoms cannot be used
622  // to determine connectivity between clauses, and so are un-needed.
623  auto it = _pat.connectivity_map.begin();
624  auto end = _pat.connectivity_map.end();
625  while (it != end)
626  {
627  if (it->second.size() == 1)
628  it = _pat.connectivity_map.erase(it);
629  else
630  it++;
631  }
632 }
633 
634 void PatternLink::make_map_recursive(const Handle& root, const Handle& h)
635 {
636  _pat.connectivity_map[h].push_back(root);
637 
638  LinkPtr l(LinkCast(h));
639  if (l)
640  {
641  for (const Handle& ho: l->getOutgoingSet())
642  make_map_recursive(root, ho);
643  }
644 }
645 
646 // Hack alert: Definitely it should not be here. Though some refactoring
647 // regarding shared libraries circular dependencies (liblambda and libquery)
648 // need to be done before fixing...
649 const PatternTermPtr PatternTerm::UNDEFINED(std::make_shared<PatternTerm>());
650 
652 {
653  for (const Handle& clause : _pat.cnf_clauses)
654  {
655  PatternTermPtr root_term(std::make_shared<PatternTerm>());
656  make_term_tree_recursive(clause, clause, root_term);
657  }
658 }
659 
661  const Handle& h,
662  PatternTermPtr& parent)
663 {
664  PatternTermPtr ptm(std::make_shared<PatternTerm>(parent, h));
665  parent->addOutgoingTerm(ptm);
666  _pat.connected_terms_map[{h, root}].push_back(ptm);
667  LinkPtr l(LinkCast(h));
668  if (l)
669  {
670  for (const Handle& ho: l->getOutgoingSet())
671  make_term_tree_recursive(root, ho, ptm);
672  }
673 }
674 
675 /* ================================================================= */
679 void PatternLink::check_connectivity(const std::vector<HandleSeq>& components)
680 {
681  if (1 == components.size()) return;
682 
683  // Users are going to be stumped by this one, so print
684  // out a verbose, user-freindly debug message to help
685  // them out.
686  std::stringstream ss;
687  ss << "Pattern is not connected! Found "
688  << components.size() << " components:\n";
689  int cnt = 1;
690  for (const auto& comp : components)
691  {
692  ss << "Connected component " << cnt
693  << " consists of ----------------: \n";
694  for (Handle h : comp) ss << h->toString();
695  cnt++;
696  }
697  throw InvalidParamException(TRACE_INFO, ss.str().c_str());
698 }
699 
700 /* ================================================================= */
701 
702 void PatternLink::debug_print(void) const
703 {
704  // Print out the predicate ...
705  printf("\nRedex '%s' has following clauses:\n",
706  _pat.redex_name.c_str());
707  int cl = 0;
708  for (const Handle& h : _pat.mandatory)
709  {
710  printf("Mandatory %d:", cl);
711  if (_pat.evaluatable_holders.find(h) != _pat.evaluatable_holders.end())
712  printf(" (evaluatable)");
713  if (_pat.executable_holders.find(h) != _pat.executable_holders.end())
714  printf(" (executable)");
715  printf("\n");
716  prt(h);
717  cl++;
718  }
719 
720  if (0 < _pat.optionals.size())
721  {
722  printf("Predicate includes the following optional clauses:\n");
723  cl = 0;
724  for (const Handle& h : _pat.optionals)
725  {
726  printf("Optional clause %d:", cl);
727  if (_pat.evaluatable_holders.find(h) != _pat.evaluatable_holders.end())
728  printf(" (evaluatable)");
729  if (_pat.executable_holders.find(h) != _pat.executable_holders.end())
730  printf(" (executable)");
731  printf("\n");
732  prt(h);
733  cl++;
734  }
735  }
736  else
737  printf("No optional clauses\n");
738 
739  // Print out the bound variables in the predicate.
740  for (const Handle& h : _varlist.varset)
741  {
742  if (NodeCast(h))
743  printf("Bound var: "); prt(h);
744  }
745 
746  if (_varlist.varset.empty())
747  printf("There are no bound vars in this pattern\n");
748 
749  printf("\n");
750  fflush(stdout);
751 }
752 
753 /* ===================== END OF FILE ===================== */
std::set< Handle > varset
Definition: FindUtils.h:88
static bool is_atom_in_tree(const Handle &tree, const Handle &atom)
Definition: FindUtils.h:186
std::shared_ptr< PatternTerm > PatternTermPtr
Definition: PatternTerm.h:35
std::string redex_name
Definition: Pattern.h:94
std::vector< Handle > HandleSeq
a list of handles
Definition: Handle.h:246
std::map< Handle, const std::set< Type > > VariableTypeMap
Definition: Pattern.h:43
std::set< Handle > evaluatable_holders
Definition: Pattern.h:121
std::set< Handle > varset
Definition: Pattern.h:65
std::shared_ptr< TruthValue > TruthValuePtr
Definition: TruthValue.h:85
std::set< Handle > evaluatable_terms
Definition: Pattern.h:120
std::shared_ptr< AttentionValue > AttentionValuePtr
#define createPatternLink
Definition: PatternLink.h:188
std::set< Handle > holders
Definition: FindUtils.h:89
static unsigned int num_unquoted_in_tree(const Handle &tree, const std::set< Handle > &atoms)
Definition: FindUtils.h:286
Type getType() const
Definition: Atom.h:197
HandleSeq cnf_clauses
Definition: Pattern.h:102
std::shared_ptr< Link > LinkPtr
Definition: Atom.h:53
ClassServer & classserver(ClassServerFactory *=ClassServer::createInstance)
Definition: ClassServer.cc:159
static NodePtr NodeCast(const Handle &h)
Definition: Node.h:113
bool remove_constants(const std::set< Handle > &vars, std::vector< Handle > &clauses)
Definition: PatternUtils.cc:51
std::set< Handle > least_holders
Definition: FindUtils.h:90
void get_connected_components(const std::set< Handle > &vars, const HandleSeq &clauses, std::vector< HandleSeq > &components, std::vector< std::set< Handle >> &component_vars)
const std::string & getTypeName(Type type)
Definition: ClassServer.cc:148
std::unordered_multimap< Handle, Handle > in_evaluatable
Definition: Pattern.h:131
static LinkPtr LinkCast(const Handle &h)
Definition: Link.h:263
HandleSeq mandatory
Definition: Pattern.h:105
std::set< Handle > executable_holders
Definition: Pattern.h:126
static bool is_unquoted_in_any_tree(const std::vector< Handle > &trees, const Handle &atom)
Definition: FindUtils.h:315
static const PatternTermPtr UNDEFINED
Definition: PatternTerm.h:47
ConnectMap connectivity_map
Definition: Pattern.h:139
const Variables & get_variables(void) const
Definition: VariableList.h:75
HandleSeq clauses
The actual clauses. Set by validate_clauses()
Definition: Pattern.h:97
std::set< Handle > black
Definition: Pattern.h:116
unsigned short Type
type of Atoms, represented as short integer (16 bits)
Definition: types.h:40
std::set< Handle > executable_terms
Definition: Pattern.h:125
std::set< Handle > optionals
Definition: Pattern.h:110
std::unordered_multimap< Handle, Handle > in_executable
Definition: Pattern.h:132
void search_set(const Handle &h)
Definition: FindUtils.h:128
VariableTypeMap typemap
Definition: Pattern.h:66
ConnectTermMap connected_terms_map
Definition: Pattern.h:141