ScummVM API documentation
vocabulary.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 #ifndef SCI_PARSER_VOCABULARY_H
23 #define SCI_PARSER_VOCABULARY_H
24 
25 #include "common/str.h"
26 #include "common/hashmap.h"
27 #include "common/hash-str.h"
28 #include "common/list.h"
29 
30 #include "sci/sci.h"
31 #include "sci/engine/vm_types.h"
32 #include "sci/util.h"
33 
34 namespace Common {
35 
36 class Serializer;
37 
38 }
39 
40 namespace Sci {
41 
42 class ResourceManager;
43 
44 /*#define VOCABULARY_DEBUG */
45 
46 enum {
47  VOCAB_RESOURCE_SELECTORS = 997,
48 
49  VOCAB_RESOURCE_SCI0_MAIN_VOCAB = 0,
50  VOCAB_RESOURCE_SCUMM_LOC_VOCAB = 1, // Special fanmade format for vocab translate
51  VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES = 900,
52  VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB = 901,
53 
54  VOCAB_RESOURCE_SCI1_MAIN_VOCAB = 900,
55  VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES = 901,
56  VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB = 902,
57 
58  VOCAB_RESOURCE_ALT_INPUTS = 913
59 };
60 
61 
62 enum {
63  VOCAB_CLASS_PREPOSITION = 0x01,
64  VOCAB_CLASS_ARTICLE = 0x02,
65  VOCAB_CLASS_ADJECTIVE = 0x04,
66  VOCAB_CLASS_PRONOUN = 0x08,
67  VOCAB_CLASS_NOUN = 0x10,
68  VOCAB_CLASS_INDICATIVE_VERB = 0x20,
69  VOCAB_CLASS_ADVERB = 0x40,
70  VOCAB_CLASS_IMPERATIVE_VERB = 0x80,
71  VOCAB_CLASS_NUMBER = 0x001
72 };
73 
74 enum {
75  kParseEndOfInput = 0,
76  kParseOpeningParenthesis = 1,
77  kParseClosingParenthesis = 2,
78  kParseNil = 3,
79  kParseNumber = 4
80 };
81 
82 #define VOCAB_MAX_WORDLENGTH 256
83 
84 /* Anywords are ignored by the parser */
85 #define VOCAB_CLASS_ANYWORD 0xff
86 
87 /* This word class is used for numbers */
88 #define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
89 #define VOCAB_MAGIC_NOTHING_GROUP 0xffe
90 
91 /* Number of nodes for each ParseTreeNode structure */
92 #define VOCAB_TREE_NODES 500
93 
94 #define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
95 #define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
96 #define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
97 #define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
98 
99 #define SAID_COMMA 0xf0
100 #define SAID_AMP 0xf1
101 #define SAID_SLASH 0xf2
102 #define SAID_PARENO 0xf3
103 #define SAID_PARENC 0xf4
104 #define SAID_BRACKO 0xf5
105 #define SAID_BRACKC 0xf6
106 #define SAID_HASH 0xf7
107 #define SAID_LT 0xf8
108 #define SAID_GT 0xf9
109 #define SAID_TERM 0xff
110 
111 #define SAID_FIRST SAID_COMMA
112 
113 /* There was no 'last matching word': */
114 #define SAID_FULL_MATCH 0xffff
115 #define SAID_NO_MATCH 0xfffe
116 #define SAID_PARTIAL_MATCH 0xfffd
117 
118 #define SAID_LONG(x) ((x) << 8)
119 
120 struct ResultWord {
121  int _class;
122  int _group;
123 };
124 
127 
129 
130 
131 struct ParseRuleList;
132 
133 struct suffix_t {
134 
141  const char *alt_suffix;
142  const char *word_suffix;
144 };
145 
147 
148 
149 struct synonym_t {
150  uint16 replaceant;
151  uint16 replacement;
152 };
153 
155 
156 
157 struct AltInput {
158  const char *_input;
159  const char *_replacement;
160  uint32 _inputLength;
161  uint32 _replacementLength;
162  bool _prefix;
163 };
164 
165 
167  int id;
168  int data[10];
169 };
170 
171 enum ParseTypes {
172  kParseTreeWordNode = 4,
173  kParseTreeLeafNode = 5,
174  kParseTreeBranchNode = 6
175 };
176 
178  ParseTypes type;
179  int value;
182 };
183 
184 enum VocabularyVersions {
185  kVocabularySCI0 = 0,
186  kVocabularySCI1 = 1
187 };
188 
189 class Vocabulary {
190 public:
191  Vocabulary(ResourceManager *resMan, bool foreign);
192  ~Vocabulary();
193 
194  // reset parser status
195  void reset();
196 
201  const char *getAnyWordFromGroup(int group);
202 
203 
210  void lookupWord(ResultWordList &retval, const char *word, int word_len);
211 
221  void lookupWordPrefix(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len);
222 
234  bool lookupSpecificPrefixWithMeaning(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len, unsigned char prefix, const char *meaning);
235 
246  bool lookupVerbPrefix(ResultWordListList &parent_retval, ResultWordList &retval, Common::String word, int word_len, Common::String prefix);
247 
259  bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error);
260 
270  int parseGNF(const ResultWordListList &words, bool verbose = false);
271 
275  bool storePronounReference();
276 
280  void replacePronouns(ResultWordListList &words);
281 
293  ParseRuleList *buildGNF(bool verbose = false);
294 
300  void debugDecipherSaidBlock(const SciSpan<const byte> &data);
301 
305  void printSuffixes() const;
306 
310  void printParserWords() const;
311 
312  uint getParserBranchesSize() const { return _parserBranches.size(); }
313  const parse_tree_branch_t &getParseTreeBranch(int number) const { return _parserBranches[number]; }
314 
318  void addSynonym(synonym_t syn) { _synonyms.push_back(syn); }
319 
323  void clearSynonyms() { _synonyms.clear(); }
324 
329  void synonymizeTokens(ResultWordListList &words);
330 
331  void printParserNodes(int num);
332 
333  void dumpParseTree();
334 
335  int parseNodes(int *i, int *pos, int type, int nr, int argc, const char **argv);
336 
343  bool checkAltInput(Common::String& text, uint16& cursorPos);
344 
348  void saveLoadWithSerializer(Common::Serializer &ser);
349 
350 private:
355  bool loadParserWords();
356 
361  void loadTranslatedWords();
362 
367  bool loadSuffixes();
368 
373  void freeSuffixes();
374 
380  bool loadBranches();
381 
386  void freeRuleList(ParseRuleList *rule_list);
387 
388 
393  bool loadAltInputs();
394 
398  void freeAltInputs();
399 
400  ResourceManager *_resMan;
401  VocabularyVersions _vocabVersion;
402 
403  bool _foreign;
404  uint16 _resourceIdWords;
405  uint16 _resourceIdSuffixes;
406  uint16 _resourceIdBranches;
407 
408  // Parser-related lists
409  SuffixList _parserSuffixes;
410  ParseRuleList *_parserRules;
411  Common::Array<parse_tree_branch_t> _parserBranches;
412  WordMap _parserWords;
413  SynonymList _synonyms;
415 
416  struct PrefixMeaning {
417  unsigned char prefix;
418  const char *meaning;
419  };
420 
421  int _pronounReference;
422 
423 public:
424  // Accessed by said()
425  ParseTreeNode _parserNodes[VOCAB_TREE_NODES];
427  // Parser data:
430 };
431 
437 void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes);
438 
439 
440 
447 int said(const byte *spec, bool verbose);
448 
449 } // End of namespace Sci
450 
451 #endif // SCI_PARSER_VOCABULARY_H
const char * word_suffix
Definition: vocabulary.h:142
Definition: str.h:59
Definition: vocabulary.h:120
Definition: vocabulary.h:166
const char * alt_suffix
Definition: vocabulary.h:141
void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes)
Definition: list.h:44
int value
Definition: vocabulary.h:179
Definition: vocabulary.h:189
Definition: serializer.h:79
int alt_suffix_length
Definition: vocabulary.h:138
Definition: vocabulary.h:157
Definition: resource.h:327
int _group
Definition: vocabulary.h:122
int word_suffix_length
Definition: vocabulary.h:139
int _class
Definition: vocabulary.h:121
uint16 replaceant
Definition: vocabulary.h:150
void clearSynonyms()
Definition: vocabulary.h:323
ParseTypes type
Definition: vocabulary.h:178
Definition: algorithm.h:29
Definition: vocabulary.h:133
Definition: console.h:28
ParseTreeNode * left
Definition: vocabulary.h:180
uint16 replacement
Definition: vocabulary.h:151
void NORETURN_PRE error(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
int said(const byte *spec, bool verbose)
Definition: vocabulary.h:149
void addSynonym(synonym_t syn)
Definition: vocabulary.h:318
int result_class
Definition: vocabulary.h:136
reg_t parser_event
Definition: vocabulary.h:428
ParseTreeNode * right
Definition: vocabulary.h:181
Definition: vocabulary.h:177
Definition: vm_types.h:39
bool parserIsValid
Definition: vocabulary.h:429
int class_mask
Definition: vocabulary.h:135