Skip to content

Commit 1488db5

Browse files
committed
tried to migrate C code automatically
1 parent d286bf0 commit 1488db5

File tree

3 files changed

+152
-3
lines changed

3 files changed

+152
-3
lines changed

poc/grammar/ctok.js

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
const tokenizer = function(str) {
2+
let tokens = [];
3+
let state = 0;
4+
let token = null;
5+
var flush = function(nextState, init) {
6+
if (state === nextState) {
7+
token += init;
8+
} else {
9+
if (token) {
10+
tokens.push([state, token]);
11+
}
12+
state = nextState;
13+
token = init;
14+
}
15+
}
16+
for(let i = 0; i < str.length; i++) {
17+
let char = str[i];
18+
if (char === ' ' || char === '\t' || char === '\r' || char === '\n') {
19+
if (state === tokenizer.STATE_WS) {
20+
token += char;
21+
} else {
22+
flush(tokenizer.STATE_WS, char);
23+
}
24+
continue;
25+
}
26+
if (char === '"') {
27+
flush(tokenizer.STATE_TXT, char);
28+
while(i < str.length) {
29+
if (str[++i] === "\\") {
30+
i++;
31+
token += "\\" + str[i];
32+
} else {
33+
token += str[i];
34+
}
35+
if (str[i] === '"') break;
36+
}
37+
flush();
38+
continue;
39+
}
40+
if (char === '\'') {
41+
flush(tokenizer.STATE_TXT, char);
42+
while(i < str.length) {
43+
if (str[++i] === "\\") {
44+
i++;
45+
token += "\\" + str[i];
46+
} else {
47+
token += str[i];
48+
}
49+
if (str[i] === '\'') break;
50+
}
51+
flush();
52+
continue;
53+
}
54+
55+
let ch = str.charCodeAt(i);
56+
if (
57+
(ch > 96 && ch < 123) ||
58+
(ch > 64 && ch < 91) ||
59+
ch === 95 ||
60+
(ch > 47 && ch < 58) ||
61+
ch > 126
62+
) {
63+
if (state === tokenizer.STATE_ID) {
64+
token += char;
65+
} else {
66+
flush(tokenizer.STATE_ID, char);
67+
}
68+
continue;
69+
}
70+
71+
if (token === '/') {
72+
let next = str[i + 1];
73+
if (next === '/') {
74+
flush(tokenizer.STATE_COM, char);
75+
while(i < str.length) {
76+
token += str[++i];
77+
if (str[i] === '\r') break;
78+
if (str[i] === '\n') break;
79+
}
80+
flush();
81+
continue;
82+
}
83+
if (next === '*') {
84+
flush(tokenizer.STATE_COM, char);
85+
while(i < str.length) {
86+
token += str[++i];
87+
if (str[i] === '*') {
88+
token += str[++i];
89+
if (str[i] === '/') break;
90+
}
91+
}
92+
flush();
93+
continue;
94+
}
95+
}
96+
97+
if (token === '#') {
98+
flush(tokenizer.STATE_DIR, char);
99+
while(i < str.length) {
100+
token += str[++i];
101+
if (str[i] === '\r') break;
102+
if (str[i] === '\n') break;
103+
}
104+
flush();
105+
continue;
106+
}
107+
if (token) {
108+
flush(tokenizer.STATE_OTHER, null);
109+
}
110+
tokens.push([tokenizer.STATE_OTHER, char]);
111+
}
112+
flush();
113+
return tokens;
114+
};
115+
116+
tokenizer.STATE_WS = "whitespace";
117+
tokenizer.STATE_ID = "identifier";
118+
tokenizer.STATE_COM = "comment";
119+
tokenizer.STATE_TXT = "string";
120+
tokenizer.STATE_OTHER = "any";
121+
tokenizer.STATE_DIR = "directive";
122+
123+
module.exports = tokenizer;

poc/grammar/lexer.js

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
*/
44
const lexer = require('jison-gho').lexParser;
55
const fs = require('fs');
6+
const tokenize = require('./ctok');
67
module.exports = function(filename, destination) {
78
fs.readFile(filename, function(err, contents) {
89
contents = contents.toString();
@@ -17,12 +18,37 @@ module.exports = function(filename, destination) {
1718
// handle script blocks
1819
contents = contents.replace(/(\<[^\>]+[^\n]+)\s+{\n*/g, '$1 %{\n');
1920
contents = contents.replace(/\n+\}\n/g, '\n%}\n');
21+
2022
// locate macros
2123
let macro = contents.indexOf('\n<');
22-
contents = contents.substring(0, macro - 1) + '\n%%\n' + contents.substring(macro + 1);
24+
contents = contents.substring(0, macro) + '\n%options case-insensitive\n\n%%\n\n' + contents.substring(macro + 1);
25+
2326
// migrate each macro
27+
let lexerTokens = [];
2428
contents = contents.replace(/^(\<.*?\>)([^\n]+)\s+\%\{(.*?)\%\}/gms, function(text, state, tag, script) {
25-
return state + tag + '\treturn null /* @todo ' + tag + '*/ ;';
29+
console.log(script);
30+
let src = '';
31+
const tokens = tokenize(script);
32+
for(let i = 0; i < tokens.length; i++) {
33+
let token = tokens[i];
34+
switch(token[1]) {
35+
case 'RETURN_TOKEN':
36+
let tok = tokens[i + 2][1];
37+
src += 'return ' + tok + ';';
38+
if (lexerTokens.indexOf(tok) === -1) {
39+
lexerTokens.push(tok);
40+
}
41+
i += 4;
42+
break;
43+
case 'goto':
44+
// ignore goto
45+
i += 3;
46+
break;
47+
default:
48+
src += token[1];
49+
}
50+
}
51+
return state + tag + '\t{ \n ' + src + '\n}';
2652
});
2753
try {
2854
const ast = lexer.parse(contents);

poc/zend/php7/lexer.l

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2005,7 +2005,7 @@ string:
20052005
RETURN_TOKEN(T_NS_C);
20062006
}
20072007

2008-
<SHEBANG>"#!" .* {NEWLINE} {
2008+
<SHEBANG>"#!".*{NEWLINE} {
20092009
CG(zend_lineno)++;
20102010
BEGIN(INITIAL);
20112011
goto restart;

0 commit comments

Comments
 (0)