@@ -10,14 +10,15 @@ type 's pluralform =
1010
1111type  's pastform  =
1212  | Ed 
13-   | Dded  (*  consonant doubling *) 
14-   | Yied 
13+   | Dded  (*  consonant doubling: "clapped" *) 
14+   | Yied  (*  change y to i *) 
15+   | Edden  (*  for the past participle "ridden" *) 
1516  | Irreg  of  's 
1617
1718type  's ingform  =
1819  | Ing 
19-   | Dding 
20-   | Eing   (*  what was this for?  *) 
20+   | Dding   (*  consonant doubling  *) 
21+   | Eing   (*  drop the e  *) 
2122  | Irreg  of  's 
2223
2324(*  Need to store noun and verb /types/ also: transitive/intransitive,
@@ -33,24 +34,26 @@ type 's ingform =
3334(*  Just put the tag string in the record, and when I make the map, I just
3435 * pull it out and add the tag separately. That's how a map works anyway. *)  
3536
36- type  's nouninfo  = { tag : string ; 
37-                      countable : bool ; 
37+ type  's nouninfo  = { countable : bool ; 
3838                     plural : 's  pluralform  } (*  Gerund? Infinitive? *) 
3939
4040(*  thinking that different forms of a verb can be in one record. *) 
41- type  's verbinfo  = { tag : string ; 
42-                      vbs : 's  pluralform ; 
41+ type  's verbinfo  = { vbs : 's  pluralform ; 
4342                     prespart : 's  ingform ; 
4443                     past : 's  pastform ; 
4544                     pastpart : 's  pastform  }
4645
4746(*  Still need a dictionary to POS lookup of a word *) 
4847(*  Actually, a datatype for a word with specific POS *) 
49- type  's posinfo   =
48+ type  's lexinfo   =
5049  | Nn  of  's  nouninfo 
5150  | Vb  of  's  verbinfo 
5251  | Jj 
5352  | Rb 
53+   | Prep 
54+   | Pron 
55+   | Conj 
56+   | Det 
5457
5558(*  idea: word module parameterized by a module that has a 
5659 * "string" type that supports character indexing. This way we can support  
@@ -65,7 +68,9 @@ module type STRINGY = sig
6568  val  length  : t  -> int 
6669  val  sub  : t  -> int  -> int  -> t 
6770  val  of_string  : string  -> t 
71+   val  show  : t  -> string 
6872  val  append  : t  -> t  -> t 
73+   val  append_r  : t  -> t  -> t 
6974  val  compare  : t  -> t  -> int 
7075end 
7176
@@ -76,39 +81,97 @@ module Word = functor (St: STRINGY) -> struct
7681  module  StSet  =  Set. Make (St )
7782  type  t  = {
7883      baseform : St .t ; 
79-       posdata : St .t  posinfo  list ; 
80-       posdict : St .t  posinfo  PosMap .t ; 
81-       allforms : StSet .t 
84+       lexdata : St .t  lexinfo  list ; 
85+       (*   posdict: St.t lexinfo  PosMap.t;  (*  gen_entry takes care...  *) 
86+       allforms: StSet.t  *)  
8287    }
8388  type  sentence  = (St .t  *  t ) list 
84-   let  create  ~baseform   ~posdata   =  {
85-       baseform; posdata;
86-       posdict =  PosMap. empty;
87-       allforms =  StSet. empty
89+   let  create  ~baseform   ~lexdata   =  {
90+       baseform;
91+       lexdata;
92+       (*  posdict = PosMap.empty;
93+       allforms = StSet.empty *)  
8894    }
8995  (*  Parse from JSON/Xml? *) 
9096  let  rtrim  s  n  =  St. sub s 0  (St. length s -  n)
91-   let  to_string  w  =  w.baseform  (*  maybe not actual string *) 
92-   (*  let add_plural w pl = match pl with
93-     | S -> { w with plural = St.append w.baseform (St.of_string "s") } 
94-     | Es -> { w with plural = St.append w.baseform (St.of_string "es") } 
95-     | Yies -> { w with 
96-                 plural = St.append (rtrim w.baseform 1) (St.of_string "ies") } 
97-     | Irreg s -> { w with plural = s } *)  
97+   let  to_string  w  =  w.baseform  (*  just a hack to print the base form *) 
98+   let  pluralize  baseform  pltype  =  match  pltype with 
99+     |  S  -> St. append baseform (St. of_string " s"  )
100+     |  Es  -> St. append baseform (St. of_string " es"  )
101+     |  Yies  -> St. append (rtrim baseform 1 ) (St. of_string " ies"  )
102+     |  Irreg  s  -> s
103+   let  add_ing  baseform  ingtype  =  match  ingtype with 
104+     |  Ing  -> St. append baseform (St. of_string " ing"  )
105+     |  Dding  ->
106+        let  dcons =  St. sub baseform (St. length baseform -  1 ) 1 
107+        in 
108+        St. append baseform dcons
109+        |>  St. append_r (St. of_string " ing"  )
110+     |  Eing  -> St. append (rtrim baseform 1 ) (St. of_string " ing"  )
111+     |  Irreg  s  -> s
112+   let  add_past  baseform  pastform  =  match  pastform with 
113+     |  Ed  -> St. append baseform (St. of_string " ed"  )
114+     |  Dded  ->
115+        let  dcons =  St. sub baseform (St. length baseform -  1 ) 1 
116+        in 
117+        St. append baseform dcons
118+        |>  St. append_r (St. of_string " ed"  )
119+     |  Yied  -> rtrim baseform 1 
120+               |>  St. append_r (St. of_string " ied"  )
121+     |  Edden  ->
122+        let  len =  St. length baseform in 
123+        let  dcons =  St. sub baseform (len-2 ) 1 
124+        in 
125+        St. append (rtrim baseform 1 ) dcons
126+        |>  St. append_r (St. of_string " en"  )
127+     |  Irreg  s  -> s
128+   (*  Generate all lexicon entries from string to (string, POS, t) triples 
129+    * (originally just lexinfo, but t has the base in it too, so good) *)  
130+   let  gen_entry  w  = 
131+     w.lexdata
132+     |>  List. map (function 
133+            |  Nn  { countable; plural }  ->
134+               if  countable then  [
135+                   (w.baseform, " NN-S"  , w);
136+                   (pluralize w.baseform plural, " NN-P"  , w)
137+                 ]
138+               else  [] 
139+            |  Vb  { vbs; prespart; past; pastpart }  -> [
140+                (w.baseform, " VP-S"  , w);
141+                (pluralize w.baseform vbs, " VB-P"  , w);
142+                (add_ing w.baseform prespart, " VB-G"  , w);
143+                (add_past w.baseform past, " VB-D"  , w);
144+                (add_past w.baseform pastpart, " VB-N"  , w)
145+              ]
146+            |  Jj  -> [ (w.baseform, " JJ-S"  , w) ]
147+            |  Rb  -> [ (w.baseform, " RB-S"  , w) ]
148+            |  Conj  -> [ (w.baseform, " CJ"  , w) ]
149+            |  Det  -> [ (w.baseform, " DT"  , w) ]
150+            |  Prep  -> [ (w.baseform, " IN"  , w) ]
151+            |  Pron  -> [ (w.baseform, " PN"  , w) ]
152+          )
153+     |>  List. concat
98154end 
99155
100156(*  String-specific module implementations start here. *) 
101157
102- module  StringyString  : STRINGY  =  struct 
158+ module  StringEx  : STRINGY  =  struct 
103159  include  String 
104160  type  c  = char 
105161  let  of_string  s  =  s
162+   let  show  s  =  s
106163  let  append  s1  s2  =  s1 ^  s2
164+   let  append_r  s1  s2  =  s2 ^  s1
107165end 
108166
167+ (*  For the toplevel pretty-printer *) 
168+ let  format_stringex  fmt  ss  = 
169+   Format. fprintf fmt " \" %s\" "   (StringEx. show ss)
170+ 
171+                                
109172(*  Eventually want to parameterize by the language too...or,
110173 * Just have a different Word module of same signature *)  
111- module  StrWord  =  Word (StringyString )
174+ module  StrWord  =  Word (StringEx )
112175
113176(*  Assumes word also includes punctuation. *) 
114177type  sentence  = (string  *  StrWord .t ) list 
0 commit comments