.. Copyright (C) 2001-2012 NLTK Project .. For license information, see LICENSE.TXT ============================== Combinatory Categorial Grammar ============================== For more information, please see: http://nltk.googlecode.com/svn/trunk/doc/contrib/ccg/ccg.pdf Relative Clauses ---------------- >>> from nltk.ccg import chart, lexicon Construct a lexicon: >>> lex = lexicon.parseLexicon(''' ... :- S, NP, N, VP ... ... Det :: NP/N ... Pro :: NP ... Modal :: S\\NP/VP ... ... TV :: VP/NP ... DTV :: TV/NP ... ... the => Det ... ... that => Det ... that => NP ... ... I => Pro ... you => Pro ... we => Pro ... ... chef => N ... cake => N ... children => N ... dough => N ... ... will => Modal ... should => Modal ... might => Modal ... must => Modal ... ... and => var\\.,var/.,var ... ... to => VP[to]/VP ... ... without => (VP\\VP)/VP[ing] ... ... be => TV ... cook => TV ... eat => TV ... ... cooking => VP[ing]/NP ... ... give => DTV ... ... is => (S\\NP)/NP ... prefer => (S\\NP)/NP ... ... which => (N\\N)/(S/NP) ... ... persuade => (VP/VP[to])/NP ... ''') >>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) >>> for parse in parser.nbest_parse("you prefer that cake".split(),1): ... chart.printCCGDerivation(parse) ... for parse in parser.nbest_parse("that is the cake which you prefer".split(), 1): ... chart.printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE ... you prefer that cake NP ((S\NP)/NP) (NP/N) N --------------------->B ((S\NP)/N) ---------------------------> (S\NP) --------------------------------< S that is the cake which you prefer NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/NP) --------------------->B ((S\NP)/N) ------>T (N/(N\N)) --------------------------->B ((S\NP)/(N\N)) ------------------------------------------->B ((S\NP)/(S/NP)) ----->T (S/(S\NP)) ------------------>B (S/NP) -------------------------------------------------------------> (S\NP) -------------------------------------------------------------------< S Some other sentences to try: "that is the cake which we will persuade the chef to cook" "that is the cake which we will persuade the chef to give the children" >>> sent = "that is the dough which you will eat without cooking".split() >>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet + ... chart.CompositionRuleSet + chart.TypeRaiseRuleSet) Without Substitution (no output) >>> for parse in nosub_parser.nbest_parse(sent,1): ... chart.printCCGDerivation(parse) With Substitution: >>> for parse in parser.nbest_parse(sent,1): ... chart.printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE that is the dough which you will eat without cooking NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/VP) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) --------------------->B ((S\NP)/N) ------->T (N/(N\N)) ---------------------------->B ((S\NP)/(N\N)) -------------------------------------------->B ((S\NP)/(S/NP)) ----->T (S/(S\NP)) ------------------>B (S/VP) ------------------------------------->B ((VP\VP)/NP) ----------------------------------------------B (S/NP) ------------------------------------------------------------------------------------------------------------> (S\NP) ------------------------------------------------------------------------------------------------------------------< S Conjunction ----------- >>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet >>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation >>> from nltk.ccg import lexicon Lexicons for the tests: >>> test1_lex = ''' ... :- S,N,NP,VP ... I => NP ... you => NP ... will => S\\NP/VP ... cook => VP/NP ... which => (N\N)/(S/NP) ... and => var\\.,var/.,var ... might => S\\NP/VP ... eat => VP/NP ... the => NP/N ... mushrooms => N ... parsnips => N''' >>> test2_lex = ''' ... :- N, S, NP, VP ... articles => N ... the => NP/N ... and => var\\.,var/.,var ... which => (N\N)/(S/NP) ... I => NP ... anyone => NP ... will => (S/VP)\\NP ... file => VP/NP ... without => (VP\\VP)/VP[ing] ... forget => VP/NP ... reading => VP[ing]/NP ... ''' Tests handling of conjunctions. Note that while the two derivations are different, they are semantically equivalent. >>> lex = lexicon.parseLexicon(test1_lex) >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) >>> for parse in parser.nbest_parse("I will cook and might eat the mushrooms and parsnips".split()): ... printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE I will cook and might eat the mushrooms and parsnips NP ((S\NP)/VP) (VP/NP) ((_var2\.,_var2)/.,_var2) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var2\.,_var2)/.,_var2) N ---------------------->B ((S\NP)/NP) ---------------------->B ((S\NP)/NP) -------------------------------------------------> (((S\NP)/NP)\.,((S\NP)/NP)) -----------------------------------------------------------------------< ((S\NP)/NP) -------------------------------------> (N\.,N) ------------------------------------------------< N --------------------------------------------------------> NP -------------------------------------------------------------------------------------------------------------------------------> (S\NP) -----------------------------------------------------------------------------------------------------------------------------------< S I will cook and might eat the mushrooms and parsnips NP ((S\NP)/VP) (VP/NP) ((_var2\.,_var2)/.,_var2) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var2\.,_var2)/.,_var2) N ---------------------->B ((S\NP)/NP) ---------------------->B ((S\NP)/NP) -------------------------------------------------> (((S\NP)/NP)\.,((S\NP)/NP)) -----------------------------------------------------------------------< ((S\NP)/NP) ------------------------------------------------------------------------------->B ((S\NP)/N) -------------------------------------> (N\.,N) ------------------------------------------------< N -------------------------------------------------------------------------------------------------------------------------------> (S\NP) -----------------------------------------------------------------------------------------------------------------------------------< S Tests handling subject extraction. Interesting to point that the two parses are clearly semantically different. >>> lex = lexicon.parseLexicon(test2_lex) >>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) >>> for parse in parser.nbest_parse("articles which I will file and forget without reading".split()): ... printCCGDerivation(parse) # doctest: +NORMALIZE_WHITESPACE articles which I will file and forget without reading N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var3\.,_var3)/.,_var3) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) -----------------< (S/VP) ------------------------------------->B ((VP\VP)/NP) ---------------------------------------------- ((VP/NP)\.,(VP/NP)) ----------------------------------------------------------------------------------< (VP/NP) --------------------------------------------------------------------------------------------------->B (S/NP) -------------------------------------------------------------------------------------------------------------------> (N\N) -----------------------------------------------------------------------------------------------------------------------------< N articles which I will file and forget without reading N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var3\.,_var3)/.,_var3) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) -----------------< (S/VP) ------------------------------------> ((VP/NP)\.,(VP/NP)) ---------------------------------------------< (VP/NP) ------------------------------------->B ((VP\VP)/NP) ----------------------------------------------------------------------------------B (S/NP) -------------------------------------------------------------------------------------------------------------------> (N\N) -----------------------------------------------------------------------------------------------------------------------------< N