Skip to content

Commit d286bf0

Browse files
committed
glayzzle#372 - progress on lexer
1 parent b0cf55a commit d286bf0

File tree

7 files changed

+8138
-5
lines changed

7 files changed

+8138
-5
lines changed

poc/README.md

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ The idea is to batch the update of the parser by syncing with php source.
22

33
# Goals / Requisites :
44

5-
- Avoid updating by hand lexer or grammar
6-
- Avoid forking PHP files (just keep them in sync)
7-
- Having exactly the same behavior (precedence, expression parsing, error messages)
8-
- Have a decent parsing speed
5+
1. Avoid updating by hand lexer or grammar
6+
2. Avoid forking PHP files (just keep them in sync)
7+
3. Having exactly the same behavior (precedence, expression parsing, error messages)
8+
4. Have a decent parsing speed
99

1010
## PHP Parser from nikic
1111

@@ -17,8 +17,15 @@ It will fail as it depends on php tokenizer launched from the php runtime, and i
1717

1818
Second approch (in progress) is to :
1919

20-
- [ ] Download grammar from PHP src
20+
- [x] Download grammar from PHP src
2121
- [ ] Automate C specific code
2222
- [ ] Inject javascript code
2323
- [ ] Generate lexer & grammar files
2424
- [ ] Generate files with JISON
25+
26+
Lexer Problems :
27+
1. Stripping bison not supported tags - should be fine, but needs maintenance on future upgrades (breaks 1 pre-requisite)
28+
2. Stripping C code - can be done but demands reimplementation (break 3 pre-requisite)
29+
30+
Grammar Problems :
31+
@todo

poc/bin/build.js

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/usr/bin/node
2+
3+
4+
const http = require('https');
5+
const fs = require('fs');
6+
const lexer = require('../grammar/lexer');
7+
8+
// 1. Downloading lexer files from PHP
9+
const files = {
10+
"php7": {
11+
"lexer.l": "https://raw.githubusercontent.com/php/php-src/master/Zend/zend_language_scanner.l",
12+
"grammar.y": "https://raw.githubusercontent.com/php/php-src/master/Zend/zend_language_parser.y"
13+
},
14+
"php5": {
15+
"lexer.l": "https://raw.githubusercontent.com/php/php-src/PHP-5.6.40/Zend/zend_language_scanner.l",
16+
"grammar.y": "https://raw.githubusercontent.com/php/php-src/PHP-5.6.40/Zend/zend_language_parser.y"
17+
}
18+
};
19+
const target = __dirname + '/../zend';
20+
const dl = [];
21+
for(let ver in files) {
22+
for(let type in files[ver]) {
23+
dl.push(new Promise(function(version, filename, url, done, reject) {
24+
return done(true); // disable the download
25+
console.log('Downloading ' + filename + ' for ' + version);
26+
const file = fs.createWriteStream(target + '/' + version + '/' + filename);
27+
const request = http.get(url, function(response) {
28+
response.pipe(file);
29+
response.on('end', function() {
30+
done(true);
31+
});
32+
response.on('error', function(e) {
33+
reject(e);
34+
});
35+
});
36+
}.bind(this, ver, type, files[ver][type])));
37+
}
38+
}
39+
Promise.all(dl).then(function() {
40+
console.log('Download is ready');
41+
// upgrading the lexer
42+
lexer(target + '/php7/lexer.l', null);
43+
}).catch(function(e) {
44+
console.error(e);
45+
process.exit(1);
46+
});

poc/grammar/lexer.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/**
2+
* This module open a lexer file and extract it's parts
3+
*/
4+
const lexer = require('jison-gho').lexParser;
5+
const fs = require('fs');
6+
module.exports = function(filename, destination) {
7+
fs.readFile(filename, function(err, contents) {
8+
contents = contents.toString();
9+
// remove header / footer
10+
let header = contents.indexOf('/*!re2c');
11+
let footer = contents.indexOf('\n*/\n', header);
12+
contents = contents.substring(header + 7, footer);
13+
// remove macro
14+
contents = contents.replace('re2c:yyfill:check = 0;', '');
15+
// remove trigger on states
16+
contents = contents.replace(/\<\!\*\>[^;]+;\n*/g, '');
17+
// handle script blocks
18+
contents = contents.replace(/(\<[^\>]+[^\n]+)\s+{\n*/g, '$1 %{\n');
19+
contents = contents.replace(/\n+\}\n/g, '\n%}\n');
20+
// locate macros
21+
let macro = contents.indexOf('\n<');
22+
contents = contents.substring(0, macro - 1) + '\n%%\n' + contents.substring(macro + 1);
23+
// migrate each macro
24+
contents = contents.replace(/^(\<.*?\>)([^\n]+)\s+\%\{(.*?)\%\}/gms, function(text, state, tag, script) {
25+
return state + tag + '\treturn null /* @todo ' + tag + '*/ ;';
26+
});
27+
try {
28+
const ast = lexer.parse(contents);
29+
console.log(ast);
30+
} catch(e) {
31+
console.error(e.message);
32+
}
33+
});
34+
};

0 commit comments

Comments
 (0)