using System; using System.Text; using System.IO; using System.Collections.Generic; using System.Drawing; namespace Cynosura { /// /// A StreamReader which records the positions of carriage returns as /// the reader progresses along a given stream of characters /// public class LineMappingStreamReader : StreamReader { const char lineFeedChar = '\n'; const char carriageReturnChar = '\r'; // ordered list of line lengths. List lineLengths; // the last found carriage return character // (used for finding windows "\r\n" carriage returns // split accross two read opperations) char lastCarriageReturn; // the text to be parsed by the stringreader Stream stream; // number of processed lines int linesRead; // The current line the reader is on int currentLine; // The current Column the reader is on int currentColumn; // number of chars read long position; public int CurrentLine { get { // if position isnt zero, we are at least // reading on the first line if (position > 0) { return linesRead + 1; } else { return 0; } } } public int CurrentColumn { get { return currentColumn; } } /// /// The number of lines which have been processed by the reader. /// Equal to the number of carriage returns processed. /// public int LinesRead { get { return this.linesRead; } } /// /// The number of characters processed by the /// LineMappingStreamReader /// public long Position { get { return this.position; } } /// /// A List of line lengths, ordered according to the order /// they were found. /// public IEnumerator LineLengths { get { return this.lineLengths.GetEnumerator(); } } /// /// Initializes a LineMappingStringReader with a given text /// public LineMappingStreamReader( Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize) : base(stream, encoding, detectEncodingFromByteOrderMarks, bufferSize) { this.stream = stream; this.lineLengths = new List(); } ///zero-based public int GetOffsetAtLine(int line) { // if @line is more than lineLengths, then return the length // of the last line which isnt contained within lineLengths // because it doesnt have a carrage return // at the end of it return PositionToOffset(new Point(0, line)); } /// zero-based. The length of the last line is equal to /// the number of unprocessed characters, which may include /// carriage returns public long GetLengthAtLine(int line) { if (line < 0) { throw new ArgumentOutOfRangeException( "argument 'line' must be greater than 0"); } // if @line is equal to lineLengths.count, then return the // length of the last line which isnt contained within // lineLengths because it doesnt have a carrage return // at the end of it if (this.lineLengths.Count == 0) { // no carrage returns have been found so far // check @line isnt more than 0 if (line > 0) { // this is invalid, only 1 line has been processed thus // far throw new ArgumentOutOfRangeException( "argument 'line' must not be greater than the total number of read lines"); } // line is 0 // return the entire length of the text return stream.Length; } else if (line == this.lineLengths.Count) { // line is the last line, and there is at least one other // line which has been read int offset = GetOffsetAtLine(line - 1); int length = this.lineLengths[line - 1]; // because the rest of the text hasnt been processed thus // far, return the entire length of the unprocessed text // (theoretically could also contain line returns, // but we dont know at this point) return this.stream.Length - (offset + length); } else if(line < this.lineLengths.Count) { // @line should be in the collection return this.lineLengths[line]; } else { // more than than zero lines have been processed so far, // and @line is greater than the size of the collection // AND isnt the last (unprocessed) line. throw new ArgumentOutOfRangeException( "argument 'line' must not be greater than the total number of read lines"); } } /// /// Converts a given two dimentional coordinate into /// its equivalent offset /// /// the location to convert /// The offset public int PositionToOffset(Point location) { if (location.IsEmpty) return 0; // sanity if (location.Y < 0 || location.X < 0) throw new ArgumentOutOfRangeException( "argument 'location' must point to a non-negative coordinate"); if (location.Y > lineLengths.Count) { throw new ArgumentOutOfRangeException( "argument 'location' must not be greater than the total number of read lines"); } int totalOffset = 0; int targetLine = 0; // build the total offset for a specific line by aggregating // the lengths of the lines which precede the taregt line while (location.Y > targetLine) { totalOffset += this.lineLengths[targetLine]; targetLine ++; } // the length of the line at the target line long lengthAtTarget = GetLengthAtLine(location.Y); if (location.X > lengthAtTarget) { // the column number is invalid for this line throw new ArgumentOutOfRangeException( "argument 'location' must be greater than the total number of columns in the line"); } return totalOffset + location.X; } int Process(char[] buffer, int index, int count) { int result = NumLines(buffer, index, count); linesRead += result; position += count; return result; } /// the characters to be searched /// the position in the buffer to begin searching /// for line breaks /// the number of characters to process /// The number of line breaks discovered int NumLines(char[] buffer, int index, int count) { int lf = -1; int cr = -1; DiscoverLineDelimiters( buffer, index, count, out lf, out cr); int nextCount = -1; int nextIndex = -1; int linesFound = 0; if(lf == -1 && cr == -1) { // No end of line marker currentColumn += count; return linesFound; } else if(lf != -1 && (cr == -1 || lf <= cr)) { // LF ('\n') if (lf == 0 && lastCarriageReturn == carriageReturnChar) { // this is a windows carriage return split // across two read opperations lineLengths[lineLengths.Count - 1] = lineLengths[lineLengths.Count - 1] + 1; linesFound = 0; } else { linesFound = 1; lineLengths.Add(currentColumn + lf + 1); } nextIndex = lf + 1 + index; nextCount = count - lf - 1; lastCarriageReturn = lineFeedChar; } else { linesFound = 1; if (lf == cr + 1) { // CRLF ('\r\n') lineLengths.Add(currentColumn + lf + 1); nextIndex = lf + 1 + index; nextCount = count - lf - 1; lastCarriageReturn = lineFeedChar; } else { // CR ('\r') lineLengths.Add(currentColumn + cr + 1); nextIndex = cr + 1 + index; nextCount = count - cr - 1; lastCarriageReturn = carriageReturnChar; } } currentColumn = 0; return linesFound + NumLines(buffer, nextIndex, nextCount); } void DiscoverLineDelimiters(char[] buffer, int startIndex, int count, out int lineFeedCharPosition, out int carriageReturnCharPosition) { // a fast-as-possible linear search over the string, // like a String.IndexOf that simultaneously searches // for two non-contiguous chars char current; lineFeedCharPosition = -1; carriageReturnCharPosition = -1; int endIndex = startIndex + count; for (int i = startIndex; i <= endIndex; i++) { current = buffer[i]; if (current == lineFeedChar && lineFeedCharPosition < 0) lineFeedCharPosition = i - startIndex; else if (current == carriageReturnChar && carriageReturnCharPosition < 0) carriageReturnCharPosition = i - startIndex; if(carriageReturnCharPosition > -1 && lineFeedCharPosition > -1) break; } } /// /// Reads a block of characters from the input string and advances /// the character position by count. /// /// When this method returns, contains the /// specified character array with the values between index and /// (index + count - 1) replaced by the characters read /// from the current source. /// /// The number of characters to read /// The total number of characters read into the buffer. /// This can be less than the number of characters requested if /// that many characters are not available, /// or zero if the end of the underlying string has been reached. /// public override int Read(char[] buffer, int index, int count) { // read the chars int read = base.Read(buffer, index, count); // calculate the total number of lines contained within the // buffer, add it to the total number of lines Process(buffer, index, read); return read; } public override int Read() { int result = base.Read(); Process(new char[] { (char)result }, 0, 1); return result; } public override string ReadLine() { StringBuilder sb = new StringBuilder(); char[] buffer = new char[1]; int linesAtStart = currentLine; while(Read(buffer, 0, 1) > 0) { if(currentLine == linesAtStart) { sb.Append(buffer[0]); } else { // a new line discovered // check for a windows carriage return if(lastCarriageReturn == carriageReturnChar && base.Peek() == lineFeedChar) { Read(buffer, 0, 1); } break; } } return sb.ToString(); } public override string ReadToEnd() { int num; char[] buffer = new char[0x1000]; StringBuilder builder = new StringBuilder(0x1000); while ((num = this.Read(buffer, 0, buffer.Length)) != 0) { builder.Append(buffer, 0, num); } return builder.ToString(); } } }