我拥有的最小文件> 850k线和每条线的长度未知.目标是在浏览器中从该文件中读取n行.完全阅读它不会发生.
这是HTML< input type =“file”name =“file”id =“file”>和JS我有:
var n = 10; var reader = new FileReader(); reader.onload = function(progressEvent) { // Entire file console.log(this.result); // By lines var lines = this.result.split('\n'); for (var line = 0; line < n; line++) { console.log(lines[line]); } };
显然,这里的问题是它试图首先实现整个文件,然后用换行符拆分它.因此,无论n,它都会尝试读取整个文件,并在文件很大时最终不读取任何内容.
我该怎么办?
注意:我愿意删除整个函数并从头开始,因为我将能够在我们阅读的每一行中调用console.log().
*“每行都是未知长度” – >意味着该文件是这样的:
(0,(1,2)) (1,(4,5,6)) (2,(7)) (3,(8))
编辑:
要走的路就像filereader api on big files,但我看不出如何修改它来读取文件的n行……
通过使用Uint8Array to string in Javascript,可以从那里做到:
var view = new Uint8Array(fr.result); var string = new TextDecoder("utf-8").decode(view); console.log("Chunk " + string);
但这可能无法读取整个最后一行,那么您将如何确定以后的行?例如,这是它打印的内容:
((7202),(u'11330875493',u'2554375661')) ((1667),(u'9079074735',u'6883914476',
解决方法
逻辑非常类似于我在
filereader api on big files的回答中所写的内容,除了你需要跟踪到目前为止已处理的行数(以及到目前为止读取的最后一行,因为它可能尚未结束) .下一个示例适用于与UTF-8兼容的任何编码;如果您需要其他编码,请查看
TextDecoder
构造函数的选项.
如果您确定输入是ASCII(或任何其他单字节编码),那么您也可以跳过使用TextDecoder并使用FileReader
‘s readAsText
method直接将输入读取为文本.
// This is just an example of the function below. document.getElementById('start').onclick = function() { var file = document.getElementById('infile').files[0]; if (!file) { console.log('No file selected.'); return; } var maxlines = parseInt(document.getElementById('maxlines').value,10); var lineno = 1; // readSomeLines is defined below. readSomeLines(file,maxlines,function(line) { console.log("Line: " + (lineno++) + line); },function onComplete() { console.log('Read all lines'); }); }; /** * Read up to and including |maxlines| lines from |file|. * * @param {Blob} file - The file to be read. * @param {integer} maxlines - The maximum number of lines to read. * @param {function(string)} forEachLine - Called for each line. * @param {function(error)} onComplete - Called when the end of the file * is reached or when |maxlines| lines have been read. */ function readSomeLines(file,forEachLine,onComplete) { var CHUNK_SIZE = 50000; // 50kb,arbitrarily chosen. var decoder = new TextDecoder(); var offset = 0; var linecount = 0; var linenumber = 0; var results = ''; var fr = new FileReader(); fr.onload = function() { // Use stream:true in case we cut the file // in the middle of a multi-byte character results += decoder.decode(fr.result,{stream: true}); var lines = results.split('\n'); results = lines.pop(); // In case the line did not end yet. linecount += lines.length; if (linecount > maxlines) { // Read too many lines? Truncate the results. lines.length -= linecount - maxlines; linecount = maxlines; } for (var i = 0; i < lines.length; ++i) { forEachLine(lines[i] + '\n'); } offset += CHUNK_SIZE; seek(); }; fr.onerror = function() { onComplete(fr.error); }; seek(); function seek() { if (linecount === maxlines) { // We found enough lines. onComplete(); // Done. return; } if (offset !== 0 && offset >= file.size) { // We did not find all lines,but there are no more lines. forEachLine(results); // This is from lines.pop(),before. onComplete(); // Done return; } var slice = file.slice(offset,offset + CHUNK_SIZE); fr.readAsArrayBuffer(slice); } }
Read <input type="number" id="maxlines"> lines from <input type="file" id="infile">. <input type="button" id="start" value="Print lines to console">