ScummVM API documentation
vocabulary.h
1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program. If not, see <http://www.gnu.org/licenses/>.
19  *
20  */
21 
22 #ifndef SCI_PARSER_VOCABULARY_H
23 #define SCI_PARSER_VOCABULARY_H
24 
25 #include "common/str.h"
26 #include "common/hashmap.h"
27 #include "common/hash-str.h"
28 #include "common/list.h"
29 
30 #include "sci/sci.h"
31 #include "sci/engine/vm_types.h"
32 #include "sci/util.h"
33 
34 namespace Common {
35 
36 class Serializer;
37 
38 }
39 
40 namespace Sci {
41 
42 class ResourceManager;
43 
44 /*#define VOCABULARY_DEBUG */
45 
46 enum {
47  VOCAB_RESOURCE_SELECTORS = 997,
48 
49  VOCAB_RESOURCE_SCI0_MAIN_VOCAB = 0,
50  VOCAB_RESOURCE_SCUMM_LOC_VOCAB = 1, // Special fanmade format for vocab translate
51  VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES = 900,
52  VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB = 901,
53 
54  VOCAB_RESOURCE_SCI1_MAIN_VOCAB = 900,
55  VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES = 901,
56  VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB = 902,
57 
58  VOCAB_RESOURCE_ALT_INPUTS = 913
59 };
60 
61 
62 enum {
63  VOCAB_CLASS_PREPOSITION = 0x01,
64  VOCAB_CLASS_ARTICLE = 0x02,
65  VOCAB_CLASS_ADJECTIVE = 0x04,
66  VOCAB_CLASS_PRONOUN = 0x08,
67  VOCAB_CLASS_NOUN = 0x10,
68  VOCAB_CLASS_INDICATIVE_VERB = 0x20,
69  VOCAB_CLASS_ADVERB = 0x40,
70  VOCAB_CLASS_IMPERATIVE_VERB = 0x80,
71  VOCAB_CLASS_NUMBER = 0x001
72 };
73 
74 enum {
75  kParseEndOfInput = 0,
76  kParseOpeningParenthesis = 1,
77  kParseClosingParenthesis = 2,
78  kParseNil = 3,
79  kParseNumber = 4
80 };
81 
82 #define VOCAB_MAX_WORDLENGTH 256
83 
84 /* Anywords are ignored by the parser */
85 #define VOCAB_CLASS_ANYWORD 0xff
86 
87 /* This word class is used for numbers */
88 #define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
89 #define VOCAB_MAGIC_NOTHING_GROUP 0xffe
90 
91 /* Number of nodes for each ParseTreeNode structure */
92 #define VOCAB_TREE_NODES 500
93 
94 #define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
95 #define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
96 #define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
97 #define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
98 
99 #define SAID_COMMA 0xf0
100 #define SAID_AMP 0xf1
101 #define SAID_SLASH 0xf2
102 #define SAID_PARENO 0xf3
103 #define SAID_PARENC 0xf4
104 #define SAID_BRACKO 0xf5
105 #define SAID_BRACKC 0xf6
106 #define SAID_HASH 0xf7
107 #define SAID_LT 0xf8
108 #define SAID_GT 0xf9
109 #define SAID_TERM 0xff
110 
111 #define SAID_FIRST SAID_COMMA
112 
113 /* There was no 'last matching word': */
114 #define SAID_FULL_MATCH 0xffff
115 #define SAID_NO_MATCH 0xfffe
116 #define SAID_PARTIAL_MATCH 0xfffd
117 
118 #define SAID_LONG(x) ((x) << 8)
119 
120 struct ResultWord {
121  int _class;
122  int _group;
123 };
124 
127 
129 
130 
131 struct ParseRuleList;
132 
133 struct suffix_t {
134 
141  const char *alt_suffix;
142  const char *word_suffix;
144 };
145 
147 
148 
149 struct synonym_t {
150  uint16 replaceant;
151  uint16 replacement;
152 };
153 
155 
156 
157 struct AltInput {
158  const char *_input;
159  const char *_replacement;
160  uint32 _inputLength;
161  bool _prefix;
162 };
163 
164 
166  int id;
167  int data[10];
168 };
169 
170 enum ParseTypes {
171  kParseTreeWordNode = 4,
172  kParseTreeLeafNode = 5,
173  kParseTreeBranchNode = 6
174 };
175 
177  ParseTypes type;
178  int value;
181 };
182 
183 enum VocabularyVersions {
184  kVocabularySCI0 = 0,
185  kVocabularySCI1 = 1
186 };
187 
188 class Vocabulary {
189 public:
190  Vocabulary(ResourceManager *resMan, bool foreign);
191  ~Vocabulary();
192 
193  // reset parser status
194  void reset();
195 
200  const char *getAnyWordFromGroup(int group);
201 
202 
209  void lookupWord(ResultWordList &retval, const char *word, int word_len);
210 
220  void lookupWordPrefix(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len);
221 
233  bool lookupSpecificPrefixWithMeaning(ResultWordListList &parent_retval, ResultWordList &retval, const char *word, int word_len, unsigned char prefix, const char *meaning);
234 
245  bool lookupVerbPrefix(ResultWordListList &parent_retval, ResultWordList &retval, Common::String word, int word_len, Common::String prefix);
246 
258  bool tokenizeString(ResultWordListList &retval, const char *sentence, char **error);
259 
269  int parseGNF(const ResultWordListList &words, bool verbose = false);
270 
274  bool storePronounReference();
275 
279  void replacePronouns(ResultWordListList &words);
280 
292  ParseRuleList *buildGNF(bool verbose = false);
293 
299  void debugDecipherSaidBlock(const SciSpan<const byte> &data);
300 
304  void printSuffixes() const;
305 
309  void printParserWords() const;
310 
311  uint getParserBranchesSize() const { return _parserBranches.size(); }
312  const parse_tree_branch_t &getParseTreeBranch(int number) const { return _parserBranches[number]; }
313 
317  void addSynonym(synonym_t syn) { _synonyms.push_back(syn); }
318 
322  void clearSynonyms() { _synonyms.clear(); }
323 
328  void synonymizeTokens(ResultWordListList &words);
329 
330  void printParserNodes(int num);
331 
332  void dumpParseTree();
333 
334  int parseNodes(int *i, int *pos, int type, int nr, int argc, const char **argv);
335 
342  bool checkAltInput(Common::String& text, uint16& cursorPos);
343 
347  void saveLoadWithSerializer(Common::Serializer &ser);
348 
349 private:
354  bool loadParserWords();
355 
360  void loadTranslatedWords();
361 
366  bool loadSuffixes();
367 
372  void freeSuffixes();
373 
379  bool loadBranches();
380 
385  void freeRuleList(ParseRuleList *rule_list);
386 
387 
392  bool loadAltInputs();
393 
397  void freeAltInputs();
398 
399  ResourceManager *_resMan;
400  VocabularyVersions _vocabVersion;
401 
402  bool _foreign;
403  uint16 _resourceIdWords;
404  uint16 _resourceIdSuffixes;
405  uint16 _resourceIdBranches;
406 
407  // Parser-related lists
408  SuffixList _parserSuffixes;
409  ParseRuleList *_parserRules;
410  Common::Array<parse_tree_branch_t> _parserBranches;
411  WordMap _parserWords;
412  SynonymList _synonyms;
414 
415  struct PrefixMeaning {
416  unsigned char prefix;
417  const char *meaning;
418  };
419 
420  int _pronounReference;
421 
422 public:
423  // Accessed by said()
424  ParseTreeNode _parserNodes[VOCAB_TREE_NODES];
426  // Parser data:
429 };
430 
436 void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes);
437 
438 
439 
446 int said(const byte *spec, bool verbose);
447 
448 } // End of namespace Sci
449 
450 #endif // SCI_PARSER_VOCABULARY_H
const char * word_suffix
Definition: vocabulary.h:142
Definition: str.h:59
Definition: vocabulary.h:120
Definition: vocabulary.h:165
const char * alt_suffix
Definition: vocabulary.h:141
void vocab_dump_parse_tree(const char *tree_name, ParseTreeNode *nodes)
Definition: list.h:44
int value
Definition: vocabulary.h:178
Definition: vocabulary.h:188
Definition: serializer.h:79
int alt_suffix_length
Definition: vocabulary.h:138
Definition: vocabulary.h:157
Definition: resource.h:327
int _group
Definition: vocabulary.h:122
int word_suffix_length
Definition: vocabulary.h:139
int _class
Definition: vocabulary.h:121
uint16 replaceant
Definition: vocabulary.h:150
void clearSynonyms()
Definition: vocabulary.h:322
ParseTypes type
Definition: vocabulary.h:177
Definition: algorithm.h:29
Definition: vocabulary.h:133
Definition: console.h:28
ParseTreeNode * left
Definition: vocabulary.h:179
uint16 replacement
Definition: vocabulary.h:151
void NORETURN_PRE error(MSVC_PRINTF const char *s,...) GCC_PRINTF(1
int said(const byte *spec, bool verbose)
Definition: vocabulary.h:149
void addSynonym(synonym_t syn)
Definition: vocabulary.h:317
int result_class
Definition: vocabulary.h:136
reg_t parser_event
Definition: vocabulary.h:427
ParseTreeNode * right
Definition: vocabulary.h:180
Definition: vocabulary.h:176
Definition: vm_types.h:39
bool parserIsValid
Definition: vocabulary.h:428
int class_mask
Definition: vocabulary.h:135