ScummVM API documentation
vocabulary.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 #ifndef SCI_PARSER_VOCABULARY_H
23 #define SCI_PARSER_VOCABULARY_H
24 
25 #include "common/str.h"
26 #include "common/hashmap.h"
27 #include "common/hash-str.h"
28 #include "common/list.h"
29 
30 #include "sci/sci.h"
31 #include "sci/engine/vm_types.h"
32 #include "sci/util.h"
33 
34 namespace Common {
35 
36 class Serializer;
37 
38 }
39 
40 namespace Sci {
41 
42 class ResourceManager;
43 
44 /*#define VOCABULARY_DEBUG */
45 
46 enum {
47  VOCAB_RESOURCE_SELECTORS = 997,
48 
49  VOCAB_RESOURCE_SCI0_MAIN_VOCAB = 0,
50  VOCAB_RESOURCE_SCUMM_LOC_VOCAB = 1, // Special fanmade format for vocab translate
51  VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES = 900,
52  VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB = 901,
53 
54  VOCAB_RESOURCE_SCI1_MAIN_VOCAB = 900,
55  VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES = 901,
56  VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB = 902,
57 
58  VOCAB_RESOURCE_ALT_INPUTS = 913
59 };
60 
61 
62 enum {
63  VOCAB_CLASS_PREPOSITION = 0x01,
64  VOCAB_CLASS_ARTICLE = 0x02,
65  VOCAB_CLASS_ADJECTIVE = 0x04,
66  VOCAB_CLASS_PRONOUN = 0x08,
67  VOCAB_CLASS_NOUN = 0x10,
68  VOCAB_CLASS_INDICATIVE_VERB = 0x20,
69  VOCAB_CLASS_ADVERB = 0x40,
70  VOCAB_CLASS_IMPERATIVE_VERB = 0x80
71 };
72 
73 enum {
74  kParseEndOfInput = 0,
75  kParseOpeningParenthesis = 1,
76  kParseClosingParenthesis = 2,
77  kParseNil = 3,
78  kParseNumber = 4
79 };
80 
81 #define VOCAB_MAX_WORDLENGTH 256
82 
83 /* Anywords are ignored by the parser */
84 #define VOCAB_CLASS_ANYWORD 0xff
85 
86 /* This word class is used for numbers */
87 #define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
88 #define VOCAB_MAGIC_NOTHING_GROUP 0xffe
89 
90 /* Number of nodes for each ParseTreeNode structure */
91 #define VOCAB_TREE_NODES 500
92 
93 #define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
94 #define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
95 #define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
96 #define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
97 
98 #define SAID_COMMA 0xf0
99 #define SAID_AMP 0xf1
100 #define SAID_SLASH 0xf2
101 #define SAID_PARENO 0xf3
102 #define SAID_PARENC 0xf4
103 #define SAID_BRACKO 0xf5
104 #define SAID_BRACKC 0xf6
105 #define SAID_HASH 0xf7
106 #define SAID_LT 0xf8
107 #define SAID_GT 0xf9
108 #define SAID_TERM 0xff
109 
110 #define SAID_FIRST SAID_COMMA
111 
112 /* There was no 'last matching word': */
113 #define SAID_FULL_MATCH 0xffff
114 #define SAID_NO_MATCH 0xfffe
115 #define SAID_PARTIAL_MATCH 0xfffd
116 
117 #define SAID_LONG(x) ((x) << 8)
118 
119 struct ResultWord {
120  int _class;
121  int _group;
122 };
123 
126 
128 
129 
130 struct ParseRuleList;
131 
132 struct suffix_t {
133 
140  const char *alt_suffix;
141  const char *word_suffix;
143 };
144 
146 
147 
148 struct synonym_t {
149  uint16 replaceant;
150  uint16 replacement;
151 };
152 
154 
155 
156 struct AltInput {
157  const char *_input;
158  const char *_replacement;
159  uint32 _inputLength;
160  uint32 _replacementLength;
161  bool _prefix;
162 };
163 
164 
166  int id;
167  int data[10];
168 };
169 
170 enum ParseTypes {
171  kParseTreeWordNode = 4,
172  kParseTreeLeafNode = 5,
173  kParseTreeBranchNode = 6
174 };
175 
177  ParseTypes type;
178  int value;
181 };
182 
183 enum VocabularyVersions {
184  kVocabularySCI0 = 0,
185  kVocabularySCI1 = 1
186 };
187 
188 class Vocabulary {
189 public:
190  Vocabulary(ResourceManager *resMan, bool foreign);
191  ~Vocabulary();
192 
193  // reset parser status
194  void reset();
195 
200  const char *getAnyWordFromGroup(int group);
201 
202 
209  void lookupWord(ResultWordList &retval, const char *word, int word_len);
210 
220  void lookupWordPrefix(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len);
221 
233  bool lookupSpecificPrefixWithMeaning(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len, unsigned char prefix, const char *meaning);
234 
245  bool lookupVerbPrefix(ResultWordListList &parent_retval, ResultWordList &retval, Common::String word, int word_len, Common::String prefix);
246 
258  bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error);
259 
269  int parseGNF(const ResultWordListList &words, bool verbose = false);
270 
274  bool storePronounReference();
275 
279  void replacePronouns(ResultWordListList &words);
280 
292  ParseRuleList *buildGNF(bool verbose = false);
293 
299  void debugDecipherSaidBlock(const SciSpan<const byte> &data);
300 
304  void printSuffixes() const;
305 
309  void printParserWords() const;
310 
311  uint getParserBranchesSize() const { return _parserBranches.size(); }
312  const parse_tree_branch_t &getParseTreeBranch(int number) const { return _parserBranches[number]; }
313 
317  void addSynonym(synonym_t syn) { _synonyms.push_back(syn); }
318 
322  void clearSynonyms() { _synonyms.clear(); }
323 
328  void synonymizeTokens(ResultWordListList &words);
329 
330  void printParserNodes(int num);
331 
332  void dumpParseTree();
333 
334  int parseNodes(int *i, int *pos, int type, int nr, int argc, const char **argv);
335 
342  bool checkAltInput(Common::String& text, uint16& cursorPos);
343 
347  void saveLoadWithSerializer(Common::Serializer &ser);
348 
349 private:
354  bool loadParserWords();
355 
360  void loadTranslatedWords();
361 
366  bool loadSuffixes();
367 
372  void freeSuffixes();
373 
379  bool loadBranches();
380 
385  void freeRuleList(ParseRuleList *rule_list);
386 
387 
392  bool loadAltInputs();
393 
397  void freeAltInputs();
398 
399  ResourceManager *_resMan;
400  VocabularyVersions _vocabVersion;
401 
402  bool _foreign;
403  uint16 _resourceIdWords;
404  uint16 _resourceIdSuffixes;
405  uint16 _resourceIdBranches;
406 
407  // Parser-related lists
408  SuffixList _parserSuffixes;
409  ParseRuleList *_parserRules;
410  Common::Array<parse_tree_branch_t> _parserBranches;
411  WordMap _parserWords;
412  SynonymList _synonyms;
414 
415  struct PrefixMeaning {
416  unsigned char prefix;
417  const char *meaning;
418  };
419 
420  int _pronounReference;
421 
422 public:
423  // Accessed by said()
424  ParseTreeNode _parserNodes[VOCAB_TREE_NODES];
426  // Parser data:
429 };
430 
436 void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes);
437 
438 
439 
446 int said(const byte *spec, bool verbose);
447 
448 } // End of namespace Sci
449 
450 #endif // SCI_PARSER_VOCABULARY_H
const char * word_suffix
Definition: vocabulary.h:141
Definition: str.h:59
Definition: vocabulary.h:119
Definition: vocabulary.h:165
const char * alt_suffix
Definition: vocabulary.h:140
void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes)
Definition: list.h:44
int value
Definition: vocabulary.h:178
Definition: vocabulary.h:188
Definition: serializer.h:79
int alt_suffix_length
Definition: vocabulary.h:137
Definition: vocabulary.h:156
Definition: resource.h:327
int _group
Definition: vocabulary.h:121
int word_suffix_length
Definition: vocabulary.h:138
int _class
Definition: vocabulary.h:120
uint16 replaceant
Definition: vocabulary.h:149
void clearSynonyms()
Definition: vocabulary.h:322
ParseTypes type
Definition: vocabulary.h:177
Definition: algorithm.h:29
Definition: vocabulary.h:132
Definition: console.h:28
ParseTreeNode * left
Definition: vocabulary.h:179
uint16 replacement
Definition: vocabulary.h:150
void NORETURN_PRE error(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
int said(const byte *spec, bool verbose)
Definition: vocabulary.h:148
void addSynonym(synonym_t syn)
Definition: vocabulary.h:317
int result_class
Definition: vocabulary.h:135
reg_t parser_event
Definition: vocabulary.h:427
ParseTreeNode * right
Definition: vocabulary.h:180
Definition: vocabulary.h:176
Definition: vm_types.h:39
bool parserIsValid
Definition: vocabulary.h:428
int class_mask
Definition: vocabulary.h:134