{ For reading Excel XP sheets and maybe generating some wicked } { output. This source code compiles with Free Pascal / win32. } { FPC gives some warnings about my pointer conversions. Tough. } { } { License: } { If you can make sense of the code, you can use it } { at your own discretion and responsibility. } { } { The OpenOffice.Org Source Project has produced commendable } { documentation, which has been heavily tapped to get this program } { to work with the minimum necessary effort. } { } { To make sense of the code, I recommend using their guides on both } { compound files and Excel files. Both documents are available at } { http://sc.openoffice.org/ } { } { MS themselves may also have made some documentation available. } { Refer to theirs instead if the purity of your soul is no object. } //uses windows; const stringtab : array[0..15] of string = ( 'The file &1 does not seem to be a compound document Excel sheet.' + chr(0), 'The file &1 does not exist.' + chr(0), 'Could not load the Excel sheet due to a programming error.' + chr(0), 'Too much data in the file, program upgrade needed.' + chr(0), 'The file &1 is locked by another program.' + chr(0), 'Unknown error! Consult xlog.txt and your friendly programmer.' + chr(0), '', '', '', '', '', '', '', '', '', ''); max_streams = 64; // increase this if more files exist inside CD file max_sheets = 256; // total number of worksheets that can be handled max_books = 4; max_cols = 127; // worksheets may have width spanning 0..max_cols type bytearray = array[0..9999999] of byte; lonkero = array[0..999999] of dword; dirtype = record unicodenamez : array[0..31] of word; unicodenamezlen : word; // length in bytes, incl. zero, up to 64 entrytype, nodecolor : byte; DIDleft, DIDright, DIDroot : dword; junk : array[0..35] of byte; SID : dword; streamsize : dword; notused : dword; end; rowtype = record genus : array[0..max_cols] of byte; // 0 - Blank cell // 1 - Reference to shared string table // 2 - integer RK value // 3 - Boolean // 4 - Error value // 5 - Double valus : array[0..max_cols] of longint; dublus : array[0..max_cols] of double; // 64-bit floating point! end; var workdir : string; f : file; conf : text; // this is xlog.txt for keeping a "console" log datapoku : pointer; datasize : dword; namu : string; strutsi : pchar; i, j : dword; sheetpoku : word; // points to the next free slot in sheet[] suppoku : byte; stripoku : dword; // number of loaded strings in stringtable varp : array[0..250000] of byte; cdheader : record // Excel-files use a compound document structure... idlo, idhi : dword; uid : array[0..15] of byte; revision, version, endianness, sectorsize, shortsize : word; bonky : array[0..9] of byte; totalsects, dirsid, frompy, minstreamsize, firstshortsid, totalshortsects, firstmastersid, totalmastersects : dword; end; ssecsize, secsize : dword; msat, sat : pointer; ssat : pointer; ssatz : dword; stream : array[0..max_streams] of record content : pointer; sizu : dword; end; dirtable : array[0..max_sheets] of dirtype; dirtentries : byte; wbpoku : byte; workbook : array[0..max_books] of record filunamu : string; datemode : byte; sup : record internalsheets : byte; ext : array[0..15] of record numsheets : byte; URL : array[0..191] of word; sheetname : array[0..31] of array[0..63] of word; end; ref : array[0..255] of record supdex, firstsheet, lastsheet : word; end; end; stringtable : pointer; // Strings are loaded into the table like this: // length in characters : word // unicode character : words // number of formatting runs : word // followed by the runs, if any : dwords stlist : pointer; // A sequential list, one dword for each stringtable // entry - gives the offset from the beginning of the // stringtable to each entry's beginning sheet : array[0..max_sheets] of record data : pointer; sheettype : byte; sheetnamelen : word; sheetname : array[0..63] of word; firstrow, lastrow : word; end; end; {$inline on} {$I-} procedure outcon(whut : string); inline; // To easily shift all debug output into a file rather than in a console! begin writeln(conf, whut); end; function strdec(luku : dword) : string; // Takes a value and returns it in plain numbers in an ascii string var tempstr : string; begin tempstr := ''; while luku > 9 do begin tempstr := chr(luku mod 10 + 48) + tempstr; luku := luku div 10; end; strdec := chr(luku + 48) + tempstr; end; function strhex(luku : dword) : string; // Takes a value and returns it in hex in an ascii string. var tempstr:string; begin tempstr:=''; while luku>15 do begin if luku and 15<10 then tempstr:=chr(luku mod 16+48)+tempstr else tempstr:=chr(luku mod 16+55)+tempstr; luku:=luku shr 4; end; if luku and 15<10 then strhex:=chr(luku mod 16+48)+tempstr else strhex:=chr(luku mod 16+55)+tempstr; end; procedure releaseall; // This is set as the exit procedure, which gets automatically called // upon program termination. begin if ssat <> NIL then freemem(ssat, ssatz); if sat <> NIL then freemem(sat, cdheader.totalsects * secsize); if datapoku <> NIL then freemem(datapoku, datasize); for i := 0 to max_streams do if stream[i].content <> NIL then freemem(stream[i].content, stream[i].sizu); for i := 0 to max_books do if workbook[i].sup.internalsheets > 0 then begin if workbook[i].stringtable <> NIL then freemem(workbook[i].stringtable); if workbook[i].stlist <> NIL then freemem(workbook[i].stlist); for j := 0 to workbook[i].sup.internalsheets - 1 do begin with workbook[i].sheet[j] do if data <> NIL then freemem(data, lastrow * sizeof(rowtype)); end; end; // Error handling rests here if erroraddr <> NIL then case exitcode of 200: outcon('+++ Division by zero error +++ don''t implode the universe +++'); 201: outcon('+++ Range check error +++'); 204: outcon('+++ Freeing a null pointer error +++ nothing comes from nothing +++'); 207: outcon('+++ Invalid floating point operation error +++ avoid esoteric math +++'); 216: outcon('+++ GPF error +++ we tried to sneak into forbidden memory and got caught +++'); else outcon('Crash! Runtime error ' + strhex(exitcode) + 'h! (' + strdec(exitcode) + ')'); end; outcon('Bye!'); // Even in case of runtime errors, the output file is properly closed! close(conf); end; procedure buildbrook(sourcesid : dword; whither : word); // Concatenates short sectors into a full stream. // Whither must point to a stream record. var ofsu : dword; begin if stream[whither].content <> nil then begin outcon('Stream $' + strhex(whither) + ' is being overwritten...'); freemem(stream[whither].content, stream[whither].sizu); stream[whither].content := NIL; end; getmem(stream[whither].content, datasize); ofsu := 0; while sourcesid <> $FFFFFFFE do begin move(pointer(dword(stream[0].content) + sourcesid * ssecsize)^, pointer(dword(stream[whither].content) + ofsu)^, ssecsize); inc(ofsu, ssecsize); sourcesid := lonkero(ssat^)[sourcesid]; end; reallocmem(stream[whither].content, ofsu); stream[whither].sizu := ofsu; end; procedure buildstream(sourcesid : dword; whither : word); // Concatenates compound document sectors into a stream. // Whither must point to a stream record. var ofsu : dword; begin if stream[whither].content <> nil then begin outcon('Stream $' + strhex(whither) + ' is being overwritten...'); freemem(stream[whither].content, stream[whither].sizu); stream[whither].content := NIL; end; getmem(stream[whither].content, datasize); ofsu := 0; while sourcesid <> $FFFFFFFE do begin move(pointer(dword(datapoku) + sourcesid * secsize + 512)^, pointer(dword(stream[whither].content) + ofsu)^, secsize); inc(ofsu, secsize); sourcesid := lonkero(sat^)[sourcesid]; end; reallocmem(stream[whither].content, ofsu); stream[whither].sizu := ofsu; end; function readunicode(sorsa, desti : pointer; lenku : word) : pointer; // Tries to interpret an excel unicode string from sorsa^, and unpacks // it at desti^; return value is sorsa moved to the end of the string data. // This function ignores rich-text formatting runs and is not not used to // load up the string table due to the Continue-record exception with it... var ops : byte; runs : word; begin ops := byte(sorsa^); if ops and 4 = 4 then begin outcon('Asian phonetic settings in unicode string... potential mess there.'); inc(dword(sorsa), 4); end; inc(dword(sorsa)); runs := 0; if ops and 8 = 8 then begin runs := word(sorsa^); inc(dword(sorsa), 2); end; if ops and 1 = 1 then begin move(sorsa^, desti^, lenku * 2); inc(sorsa, lenku * 2); end else while lenku > 0 do begin word(desti^) := byte(sorsa^); inc(dword(desti), 2); inc(dword(sorsa)); dec(lenku); end; inc(sorsa, runs * 4); readunicode := sorsa; end; procedure inputstream(skolka : word); // Unpacks data records from an excel datastream. var boo, muah, vard, varl, varm, vart, vars : dword; varr, varg, varv : word; streeng : array[0..191] of word; // fog : file; begin if dirtable[skolka].entrytype <> 2 then exit; outcon('===== Stream ' + strdec(skolka) + ' ====='); boo := 0; varv := 0; // This code dumps the streams as files for manual inspection... { assign(fog,strhex(skolka)+'.str'); rewrite(fog,1); blockwrite(fog,stream[skolka].content^,stream[skolka].sizu); close(fog); exit;} while boo < stream[skolka].sizu do begin vard := 0; // continue records are unloaded at this offset varr := word(pointer(dword(stream[skolka].content) + boo)^); // record ID repeat // Concatenate this record and any following continue records for easier // handling... unless it is the shared string table, in which case the // continue record is handled during processing. muah := dword(stream[skolka].content) + boo; varl := word(pointer(muah + 2)^); // record length move(pointer(muah + 4)^, varp[vard], varl); if (word(pointer(muah + varl + 4)^) <> $3C) or (varr = $FC) then break; inc(boo, varl + 4); inc(vard, varl); outcon('Continue block encountered after record ' + strhex(varr) + 'h...'); until false = true; inc(boo, varl + 4); inc(varl, vard); if varl > 250000 then outcon('+++ Record ' + strhex(varr) + 'h has length ' + strdec(varl) + '! Varp overflow error +++'); muah := dword(@varp[0]); // outcon('wb['+strdec(wbpoku)+'].sheet['+strdec(sheetpoku)+'].data = '+strdec(dword(workbook[wbpoku].sheet[sheetpoku].data))+' ... next record '+strhex(varr)+' ... addr '+strhex(boo)); // Varv identifies the type of data this pseudofile contains. // $005 means the Workbook Globals section, $010 is worksheet data. // The others can mostly be ignored, except for the Beginning and Ending // records, $809 and $00A. // Varr identifies the record that was just loaded into Varp[]. if (varv = $005) or (varv = $010) or (varr = $A) or (varr = $809) then case varr of $0006: begin // FORMULA - why recalculate when the result is right there? varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // column with workbook[wbpoku].sheet[sheetpoku] do begin varl := dword(data) + varm * word(sizeof(rowtype)); // To determine type of result: // If last word of result is FFFF, it is probably not a number; // The first byte of result determines the special type... // and if it doesn't match, it was a 64-bit float after all. // If last word is not FFFF, it is a 64-bit float. // However, is the result is, say, 100000000000FFFFh, and // intended to be a float, we are screwed. Properly identifying // the result would take a lot of work with formula tokens... if word(pointer(muah + 12)^) = $FFFF then case byte(pointer(muah + 6)^) of 0: // string begin rowtype(pointer(varl)^).valus[vart] := stripoku; rowtype(pointer(varl)^).genus[vart] := 1; end; 1: begin // boolean rowtype(pointer(varl)^).genus[vart] := 3; rowtype(pointer(varl)^).valus[vart] := byte(pointer(muah + 8)^); end; 2: begin // error rowtype(pointer(varl)^).genus[vart] := 4; rowtype(pointer(varl)^).valus[vart] := byte(pointer(muah + 8)^); end; 3: begin // empty cell rowtype(pointer(varl)^).genus[vart] := 0; end; else begin // double float rowtype(pointer(varl)^).genus[vart] := 5; rowtype(pointer(varl)^).dublus[vart] := double(pointer(muah + 6)^); end; end else begin // double float rowtype(pointer(varl)^).genus[vart] := 5; rowtype(pointer(varl)^).dublus[vart] := double(pointer(muah + 6)^); end; end; end; $000A: begin // EOF - end of a pseudofile within the stream if word(pointer(dword(stream[skolka].content) + boo)^) = 0 then exit; // A zero-record after EOF is probably end of stream // When finished with workbook globals, reset the sheet pointer... if varv = $005 then begin workbook[wbpoku].sup.internalsheets := sheetpoku; sheetpoku := $FFFF; end; end; $000C: ; // CALCCOUNT $000D: ; // CALCMODE $000E: ; // PRECISION (assume maximal precision) $000F: ; // REFMODE $0010: ; // DELTA $0011: ; // ITERATION $0012: begin // PROTECT varm := word(pointer(muah)^); if varm <> 0 then outcon('!!!Security Breach!!! (ignoring sheet protection order :p)'); end; $0013: begin // PASSWORD varm := word(pointer(muah)^); if varm <> 0 then outcon('FYI, the password for this stream gives a hash value of ' + strhex(varm) + 'h.'); end; $0014: ; // HEADER $0015: ; // FOOTER $0017: begin // EXTERNSHEET varm := word(pointer(muah)^); outcon('Externsheet has ' + strhex(varm) + 'h structures.'); move(pointer(muah + 2)^, workbook[wbpoku].sup.ref[0], varm * 6); end; $0018: begin // NAME (4DD5) end; $0019: ; // WINDOWPROTECT $001B: ; // HORIZONTALPAGEBREAKS $001C: ; // NOTE $001D: ; // SELECTION $0022: workbook[wbpoku].datemode := byte(pointer(muah)^); // DATEMODE $0026..$0029: ; // margin settings... $002A: ; // PRINTHEADERS $002B: ; // PRINTGRIDLINES $0031: ; // FONT $003D: ; // WINDOW1 $0040: ; // BACKUP $0041: ; // PANE $0042: ; // CODEPAGE $004D: ; // PLS, environment-specific print record $0055: ; // DEFCOLWIDTH $0059: begin // XCT - number of cache records following end; $005A: begin // CRN - cache values from external documents end; $005C: ; // WRITEACCESS $005D: ; // OBJ, describes a graphic object!? $005F: ; // SAVERECALC $0063: ; // OBJECTPROTECT - ignore it :p $007D: ; // COLINFO $0080: ; // GUTS $0081: ; // WSBOOL $0082: ; // GRIDSET $0083: ; // HCENTER $0084: ; // VCENTER $0085: begin // BOUNDSHEET if sheetpoku > max_sheets then begin outcon('+++ Painful Death by Paper Cuts +++'); strutsi := @stringtab[3][1]; messageBox(0, strutsi, nil, 0); halt(69); end; if byte(pointer(muah + 4)^) > 1 then outcon('Strongly hidden sheet found. Pretty pitiful protection.'); workbook[wbpoku].sheet[sheetpoku].sheettype := byte(pointer(muah + 5)^); inc(muah, 6); workbook[wbpoku].sheet[sheetpoku].sheetnamelen := byte(pointer(muah)^); readunicode(pointer(muah + 1), @workbook[wbpoku].sheet[sheetpoku].sheetname, byte(pointer(muah)^)); inc(sheetpoku); end; $008C: ; // COUNTRY $008D: ; // HIDEOBJ $0090: ; // SORT $0092: ; // PALETTE $009C: ; // one of word data, value $000E... "FNGROUPCOUNT" $00A0: ; // SCL $00A1: ; // SETUP $00BD: begin // MULRK varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // beginning column varg := word(pointer(muah + varl - 2)^); // concluding column inc(muah, 6); with workbook[wbpoku].sheet[sheetpoku] do begin varl := dword(data) + varm * word(sizeof(rowtype)); repeat if dword(pointer(muah)^) and 2 = 2 then begin rowtype(pointer(varl)^).genus[vart] := 2; rowtype(pointer(varl)^).valus[vart] := dword(pointer(muah)^) shr 2; // if the value is negative, replicate signed bit in MSBs if dword(pointer(muah)^) and $80000000 <> 0 then rowtype(pointer(varl)^).valus[vart] := rowtype(pointer(varl)^).valus[vart] or $C0000000; if dword(pointer(muah)^) and 1 = 1 then begin rowtype(pointer(varl)^).genus[vart] := 5; rowtype(pointer(varl)^).dublus[vart] := rowtype(pointer(varl)^).valus[vart] / 100; end; end else begin rowtype(pointer(varl)^).genus[vart] := 5; vard := dword(@rowtype(pointer(varl)^).dublus[vart]); dword(pointer(vard)^) := 0; dword(pointer(vard + 4)^) := dword(pointer(muah)^) and $FFFFFFFC; if dword(pointer(muah)^) and 1 = 1 then rowtype(pointer(varl)^).dublus[vart] := rowtype(pointer(varl)^).dublus[vart] / 100; end; inc(vart); inc(muah, 6); until vart > varg; end; end; $00BE: begin // MULBLANK varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // beginning column varg := word(pointer(muah + varl - 2)^); // concluding column with workbook[wbpoku].sheet[sheetpoku] do begin varl := dword(data) + varm * word(sizeof(rowtype)); repeat rowtype(pointer(varl)^).genus[vart] := 0; inc(vart); until vart > varg; end; end; $00C1: ; // one word of data, value 0... useless? $00D3: ; // empty record, marks something? $00D7: ; // DBCELL $00DA: ; // BOOKBOOL $00DD: ; // SCENPROTECT - we don't care about no protection $00E0: ; // XF (contains data on various cell formatting... including // a pointer to a default Number Format type) $00E1: ; // one word of data, likely refers to codepage... $00E2: ; // an empty record, marking something? $00E5: ; // MERGEDCELLS $00EB..$00ED: ; // Office drawing stuff $00EF: ; // PHONETIC $00FC: begin // SST, the ancient and terrible if workbook[wbpoku].stringtable <> NIL then begin outcon('More than one SST block in this workbook!'); outcon('old SST is at ' + strdec(dword(workbook[wbpoku].stringtable))); freemem(workbook[wbpoku].stringtable); workbook[wbpoku].stringtable := NIL; end; outcon('String table'); stripoku := dword(pointer(muah + 4)^); // num of strings in table getmem(workbook[wbpoku].stlist, stripoku * 4); getmem(workbook[wbpoku].stringtable, 4); // first dword contains dword(workbook[wbpoku].stringtable^) := 4; // size of table varm := 4; // ofs --> stringtable vard := muah + 8; // ofs --> record data vars := stripoku; repeat // Read the unicode string header varg := word(pointer(vard)^); // <-- string length vart := byte(pointer(vard + 2)^); // <-- option byte inc(vard, 3); if vart and 8 = 8 then begin vart := vart or (word(pointer(vard)^) shl 16); // RTF-ing runs inc(vard, 2); end; // Write the output offset into our lookup table lonkero(workbook[wbpoku].stlist^)[stripoku - vars] := varm; // Reserve space in the stringtable for this string inc(dword(workbook[wbpoku].stringtable^), varg * 2 + 4 + word(vart shr 16) * 4); reallocmem(workbook[wbpoku].stringtable, dword(workbook[wbpoku].stringtable^)); // Output the string length word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := varg; inc(varm, 2); // Read, process and output the string itself : varg = length while varg > 0 do begin // If reached end of record, grab the next record if vard - muah >= varl then begin muah := dword(stream[skolka].content) + boo; varl := word(pointer(muah + 2)^); // record length move(pointer(muah + 4)^, varp[0], varl); inc(boo, varl + 4); muah := dword(@varp[0]); vard := muah; // Read the bloody option byte vart := (vart and $FFFFFF00) or byte(pointer(vard)^); inc(vard, 1); end; if vart and 1 = 1 then begin // 16-bit char word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := word(pointer(vard)^); inc(vard, 2); inc(varm, 2); end else begin // 8-bit char word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := byte(pointer(vard)^); inc(vard); inc(varm, 2); end; dec(varg); end; if vard - muah >= varl then begin muah := dword(stream[skolka].content) + boo; varl := word(pointer(muah + 2)^); // record length move(pointer(muah + 4)^, varp[0], varl); inc(boo, varl + 4); muah := dword(@varp[0]); vard := muah; // No bloody option byte here end; vart := vart shr 16; // now vart = rich text f-ing runs word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := word(vart); inc(varm, 2); while vart > 0 do begin if vard - muah >= varl then begin muah := dword(stream[skolka].content) + boo; varl := word(pointer(muah + 2)^); // record length move(pointer(muah + 4)^, varp[0], varl); inc(boo, varl + 4); muah := dword(@varp[0]); vard := muah; // No bloody option byte here either end; dword(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := dword(pointer(vard)^); inc(varm, 4); inc(vard, 4); dec(vart); end; // Done with this string, on to next one if any are left dec(vars); until vars = 0; outcon('Hopefully done with SST.'); end; $00FD: begin // LABELSST varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // column with workbook[wbpoku].sheet[sheetpoku] do begin varl := dword(data) + varm * word(sizeof(rowtype)); rowtype(pointer(varl)^).genus[vart] := 1; rowtype(pointer(varl)^).valus[vart] := dword(pointer(muah + 6)^); end; end; $00FF: ; // EXTSST - pointless hash table into SST $013D: ; // 18 bytes of unknown stuff; values 0001 .. 000A except 0008 $0160: begin // USESELFS varm := word(pointer(muah)^); if varm and 1 = 1 then outcon('Ack! It wants to use natural language formulas! I don''t speak natural!'); if varm > 1 then outcon('USESELFS = ' + strdec(varm) + '. Something undocumented this way comes.'); end; $0161: begin // DSF varm := word(pointer(muah)^); if varm <> 0 then outcon('An additional "Book" stream is hiding somewhere! It may cause trouble...'); end; $01AE: begin // SUPBOOK varm := word(pointer(muah)^); vart := word(pointer(muah + 2)^); if varm = 0 then outcon('DDE or OLE link in supbook!') else case vart of $13A: outcon('Add-in function in supbook!'); $401: begin outcon('Supbook: ' + strdec(varm) + ' sheets here?'); end; else begin outcon('Supbook sheet list #' + strdec(suppoku)); inc(muah, 4); muah := dword(readunicode(pointer(muah), @workbook[wbpoku].sup.ext[suppoku].URL[1], vart)); move(workbook[wbpoku].sup.ext[suppoku].URL[1], streeng[0], vart * 2); // write('URL '); for varg := 0 to vart - 1 do write(chr(streeng[varg])); writeln; workbook[wbpoku].sup.ext[suppoku].URL[0] := vart; workbook[wbpoku].sup.ext[suppoku].numsheets := varm; for varg := 0 to varm - 1 do begin vart := word(pointer(muah)^); workbook[wbpoku].sup.ext[suppoku].sheetname[varg][0] := vart; muah := dword(readunicode(pointer(muah + 2), @workbook[wbpoku].sup.ext[suppoku].sheetname[varg][1], vart)); end; inc(suppoku); end; end; end; $01B6: ; // ??? these are big records, multiply present in some files $01AF, $01B7, $01BC: ; // one word of data, value 0... useless? $01B8: ; // HLINK $01BA: ; // 13 bytes of stuff? $01BD, $01C0: ; // an empty record, marking something? $01C1: ; // 8 bytes of data? C1 01 00 00 54 8D 01 00 $01C2: ; // 24 bytes of data? $0200: begin // DIMENSIONS workbook[wbpoku].sheet[sheetpoku].firstrow := dword(pointer(muah)^); workbook[wbpoku].sheet[sheetpoku].lastrow := dword(pointer(muah + 4)^); varg := word(pointer(muah + 10)^); if varg > max_cols then outcon('Worksheet says last column + 1 is ' + strdec(varg) + ' while max_cols is ' + strdec(max_cols) + '!'); // Since this appears at the beginning of each worksheet, I use // this record to get memory for the data about to be loaded with workbook[wbpoku].sheet[sheetpoku] do begin getmem(data, lastrow * sizeof(rowtype)); fillbyte(data^, lastrow * sizeof(rowtype), 0); end; end; $0201: begin // BLANK varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // column rowtype(pointer(dword(workbook[wbpoku].sheet[sheetpoku].data) + varm * word(sizeof(rowtype)))^).genus[vart] := 0; end; $0203: begin // NUMBER varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // column with workbook[wbpoku].sheet[sheetpoku] do begin varl := dword(data) + varm * word(sizeof(rowtype)); rowtype(pointer(varl)^).genus[vart] := 5; rowtype(pointer(varl)^).dublus[vart] := double(pointer(muah + 6)^); end; end; $0205: begin // BOOLERR varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // column with workbook[wbpoku].sheet[sheetpoku] do begin varl := dword(data) + varm * word(sizeof(rowtype)); if byte(pointer(muah + 6)^) = 1 then rowtype(pointer(varl)^).genus[vart] := 4 else rowtype(pointer(varl)^).genus[vart] := 3; rowtype(pointer(varl)^).valus[vart] := byte(pointer(muah + 6)^); end; end; $0207: begin // STRING if workbook[wbpoku].stringtable = NIL then outcon('+++ String without SST error +++'); inc(stripoku); // Read the unicode string header varg := word(pointer(muah)^); // <-- string length vart := byte(pointer(muah + 2)^); // <-- option byte inc(muah, 3); if vart and 8 = 8 then begin vart := vart or (word(pointer(muah)^) shl 16); // RTF-ing runs inc(muah, 2); end; // Reserve space in the stringtable for this string varm := dword(workbook[wbpoku].stringtable^); inc(dword(workbook[wbpoku].stringtable^), varg * 2 + 4 + word(vart shr 16) * 4); reallocmem(workbook[wbpoku].stringtable, dword(workbook[wbpoku].stringtable^)); // Write the output offset into our lookup table reallocmem(workbook[wbpoku].stlist, stripoku * 4); lonkero(workbook[wbpoku].stlist^)[stripoku - 1] := varm; // Output the string length word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := varg; inc(varm, 2); // Read, process and output the string itself : varg = length while varg > 0 do begin if vart and 1 = 1 then begin // 16-bit char word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := word(pointer(muah)^); inc(muah, 2); inc(varm, 2); end else begin // 8-bit char word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := byte(pointer(muah)^); inc(muah); inc(varm, 2); end; dec(varg); end; vart := vart shr 16; // now vart = rich text f-ing runs word(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := word(vart); inc(varm, 2); while vart > 0 do begin dword(pointer(dword(workbook[wbpoku].stringtable) + varm)^) := dword(pointer(muah)^); inc(varm, 4); inc(muah, 4); dec(vart); end; end; $0208: ; // ROW $020B: begin // INDEX workbook[wbpoku].sheet[sheetpoku].firstrow := dword(pointer(muah + 4)^); end; $0225: ; // DEFAULTROWHEIGHT $023E: ; // WINDOW2 $027E: begin // RK varm := word(pointer(muah)^); // row vart := word(pointer(muah + 2)^); // column with workbook[wbpoku].sheet[sheetpoku] do begin varl := dword(data) + varm * word(sizeof(rowtype)); if dword(pointer(muah + 6)^) and 2 = 2 then begin rowtype(pointer(varl)^).genus[vart] := 2; rowtype(pointer(varl)^).valus[vart] := dword(pointer(muah + 6)^) shr 2; // if the value is negative, replicate signed bit in MSBs if dword(pointer(muah + 6)^) and $80000000 <> 0 then rowtype(pointer(varl)^).valus[vart] := rowtype(pointer(varl)^).valus[vart] or $C0000000; if dword(pointer(muah + 6)^) and 1 = 1 then begin rowtype(pointer(varl)^).genus[vart] := 5; rowtype(pointer(varl)^).dublus[vart] := rowtype(pointer(varl)^).valus[vart] / 100; end; end else begin rowtype(pointer(varl)^).genus[vart] := 5; vard := dword(@rowtype(pointer(varl)^).dublus[vart]); dword(pointer(vard)^) := 0; dword(pointer(vard + 4)^) := dword(pointer(muah + 6)^) and $FFFFFFFC; if dword(pointer(muah + 6)^) and 1 = 1 then rowtype(pointer(varl)^).dublus[vart] := rowtype(pointer(varl)^).dublus[vart] / 100; end; end; end; $0293: ; // STYLE $041E: ; // FORMAT $04BC: ; // SHRFMLA insanity $0801: ; // 202 bytes of data?? $0809: begin // Beginning of file varm := word(pointer(muah)^); if varm <> $0600 then outcon('Got ' + strhex(varm) + 'h for BIFF version instead of 0600h??'); varv := word(pointer(muah + 2)^); case varv of $005: outcon('--- Workbook globals ---'); $006: outcon('--- VB module!! :o ---'); $010: begin if sheetpoku = $FFFF then sheetpoku := 0 else inc(sheetpoku); if sheetpoku > max_sheets then begin outcon('+++ Super Sheet Maximum Attack error +++'); strutsi := @stringtab[3][1]; messageBox(0, strutsi, nil, 0); halt(69); end; outcon('--- Worksheet --- #' + strdec(sheetpoku + 1)); end; $020: outcon('--- Chart ---'); $040: outcon('--- Macro sheet ---'); $100: outcon('--- Workspace file ---'); else outcon('--- something weird ---'); end; end; $0862: ; // SHEETLAYOUT $0863: ; // 20 bytes of data?? $0866: ; // 230 bytes of data??? $0867: ; // SHEETPROTECTION gets no respect $087C: ; // ????? $087D: ; // ????? $088B: ; // ????? $088C: ; // ????? $088E: ; // ????? $0892: ; // ????? $0896: ; // ????? $089A: ; // ????? $089B: ; // ????? $089C: ; // ????? $08A3: ; // ????? $FFFE: begin // Summaries outcon('Nothing to see here...'); exit; end; else outcon('? - ' + strhex(varr) + 'h @ stream offset ' + strhex(boo - varl - 4) + 'h'); end; end; end; function readFileIntoMemory (fileid : string; tsuda : pointer; size : pointer) : byte; // Gets memory for tsuda^, reads the file "fileid" into tsuda^^. // The caller must free the pointer afterward, unless this function returned // non-zero, which means an error. // dword(size^) gets the file size. var loopvar : byte; begin assign(f, fileid); outcon('Accessing file ' + fileid + '...'); loopvar := 5; reset(f, 1); readFileIntoMemory := ioresult; // In case of errors, try again a few times in case the file was locked... while (readFileIntoMemory <> 0) and (loopvar > 0) do begin dec(loopvar); sleep(90); reset(f, 1); readFileIntoMemory := ioresult; end; case readFileIntoMemory of 0: ; 2: outcon('+++ File inexistence error +++'); 3: outcon('+++ To doom leads the wide path, and many programs take it +++'); 5: outcon('+++ ACCESS: DENIED +++ but thank you for playing +++'); else outcon('+++ Some sort of funky error #' + strdec(readFileIntoMemory) + ' +++'); end; if readFileIntoMemory <> 0 then exit; outcon('File opened. Reading...'); dword(size^) := filesize(f); getmem(pointer(tsuda^), dword(size^)); blockread(f, pointer(tsuda^)^, dword(size^)); outcon('Data happily residing in memory. Closing file.'); close(f); end; function inputfile(imya : string) : byte; // Unpacks datastreams from an excel compound document file, // returns 0 if successful, otherwise an error code. begin inputfile := readFileIntoMemory(imya, @datapoku, @datasize); if inputfile <> 0 then exit; // Process the compound document header move(datapoku^, cdheader, 76); if (cdheader.idlo <> $E011CFD0) or (cdheader.idhi <> $E11AB1A1) then begin outcon('+++ Case of mistaken file ID error +++'); strutsi := @stringtab[0][1]; messageBox(0, strutsi, nil, 0); inputfile := $FF; exit; end; secsize := 1 shl cdheader.sectorsize; ssecsize := 1 shl cdheader.shortsize; // The list of sector IDs may be spread outside sector 0 (the header)... getmem(msat, cdheader.totalsects * 4); if cdheader.totalsects <= 109 then move(pointer(dword(datapoku) + 76)^, msat^, cdheader.totalsects * 4) else begin move(pointer(dword(datapoku) + 76)^, msat^, 109 * 4); i := cdheader.firstmastersid; j := 109; // i tracks msat sid, j is sids loaded while (i <> $FFFFFFFE) and (j < cdheader.totalsects) do begin if (cdheader.totalsects - j) * 4 <= secsize then move(pointer(dword(datapoku) + i * secsize + 512)^, pointer(dword(msat) + j * 4)^, (cdheader.totalsects - j) * 4) else move(pointer(dword(datapoku) + i * secsize + 512)^, pointer(dword(msat) + j * 4)^, secsize - 4); i := lonkero(datapoku^)[(i + 1) * secsize - 4 + 512]; inc(j, secsize div 4 - 4); end; end; // Concatenate contents of sectors indicated by the master sector table // to create the plain sector allocation table... getmem(sat, cdheader.totalsects * secsize); for i := 0 to cdheader.totalsects - 1 do move(pointer(dword(datapoku) + lonkero(msat^)[i] * secsize + 512)^, pointer(dword(sat) + i * secsize)^, secsize); freemem(msat, cdheader.totalsects * 4); msat := nil; // Create the short sector allocation table... if cdheader.firstshortsid <> $FFFFFFFE then begin outcon('Building short sector allocation table...'); buildstream(cdheader.firstshortsid, max_streams); ssatz := stream[max_streams].sizu; getmem(ssat, ssatz); move(stream[max_streams].content^, ssat^, ssatz); freemem(stream[max_streams].content, ssatz); stream[max_streams].content := NIL; end else outcon('No short sector allocation table present. Whew!'); // Read the compound file's internal directory structure... outcon('Building the directory structure...'); buildstream(cdheader.dirsid, max_streams); if stream[max_streams].sizu > sizeof(dirtable) then begin outcon('+++ Super Directory Attack Error +++'); strutsi := @stringtab[3][1]; messageBox(0, strutsi, nil, 0); halt(69); end; move(stream[max_streams].content^, dirtable, stream[max_streams].sizu); dirtentries := stream[max_streams].sizu div 128; outcon('Found $' + strhex(dirtentries) + ' entries.'); i := 0; while i < dirtentries do begin namu := ''; if dirtable[i].unicodenamezlen = 0 then namu := '' else begin for j := 0 to (dirtable[i].unicodenamezlen shr 1) - 2 do if (dirtable[i].unicodenamez[j] > 31) and (dirtable[i].unicodenamez[j] < 255) then namu := namu + chr(dirtable[i].unicodenamez[j]) else namu := namu + '<' + strhex(dirtable[i].unicodenamez[j]) + '>'; end; outcon(strdec(i) + ': ' + namu + space(31 - length(namu)) + ', type ' + strdec(dirtable[i].entrytype) + ', sid ' + strdec(dirtable[i].sid) + ', size ' + strdec(dirtable[i].streamsize)); if i = max_streams then begin outcon('+++ Flooding error +++ someone please help +++ noooo +++ blub blub blub +++'); strutsi := @stringtab[3][1]; messageBox(0, strutsi, nil, 0); halt(69); end; case dirtable[i].entrytype of // These are the stream types we are interested in handling... // 1 is a storage or virtual directory 1: outcon('Encountered a "storage" container... ignore and hope it goes away.'); // 5 is the root entry, pointing to the short sector storage stream, // and so cannot itself be saved as a short stream. 5: buildstream(dirtable[i].SID, i); // 2 means all other streams, mainly real data. These can be stored in // the regular way or in the short sector storage. 2: if dirtable[i].streamsize < cdheader.minstreamsize then buildbrook(dirtable[i].SID, i) else buildstream(dirtable[i].SID, i); end; inc(i); end; outcon('File dissected successfully! Building the worksheets...'); workbook[wbpoku].sup.internalsheets := 0; // Now process the streams... for j := 0 to i - 1 do inputstream(j); // Clean up... if ssat <> NIL then freemem(ssat, ssatz); freemem(sat, cdheader.totalsects * secsize); freemem(datapoku, datasize); ssat := NIL; sat := NIL; datapoku := NIL; for i := 0 to max_streams do if stream[i].content <> NIL then begin freemem(stream[i].content, stream[i].sizu); stream[i].content := NIL; end; outcon('Finished with this file.'); outcon(''); end; function findlastcolumn(boku, shetu : word) : byte; // Returns the rightmost column number in the given sheet. var flci : word; flcj : dword; begin findlastcolumn := max_cols; if workbook[boku].sheet[shetu].lastrow = 0 then exit; flci := workbook[boku].sheet[shetu].lastrow; while findlastcolumn > 0 do begin dec(flci); flcj := dword(workbook[boku].sheet[shetu].data) + dword(flci * sizeof(rowtype)); if rowtype(pointer(flcj)^).genus[findlastcolumn] <> 0 then break; if flci = 0 then begin flci := workbook[boku].sheet[shetu].lastrow; dec(findlastcolumn); end; end; end; function crunch(whence : pointer) : string; // Reads an uncompressed unicode string from whence^, which should probably // point into one of our loaded string tables... and returns an 8-bit string // in the windows cyrillic codepage. var lund : word; pond : byte; begin lund := word(whence^); if lund > 254 then begin lund := 254; outcon('Too much to crunch!'); end; crunch[0] := char(lund); pond := 0; while lund > 0 do begin inc(pond); inc(dword(whence), 2); case word(whence^) of 1..127, 171, 187: crunch[pond] := char(whence^); 160: ; // funky whitespace 1025: crunch[pond] := chr(168); // capital YO 1028: crunch[pond] := chr(170); // Ukrainian capital YE 1030: crunch[pond] := chr(178); // Ukrainian capital sharp I 1031: crunch[pond] := chr(175); // Ukrainian capital YI 1040..1103: crunch[pond] := char(word(whence^) - 848); 1105: crunch[pond] := chr(184); // lowercase YO 1108: crunch[pond] := chr(186); // Ukrainian lowercase YE 1110: crunch[pond] := chr(179); // Ukrainian lowercase sharp I 1111: crunch[pond] := chr(191); // Ukrainian lowercase YI 8211: crunch[pond] := chr(45); // silly long dash 8217: crunch[pond] := chr(39); // silly fake ' 8220, 8221, 8222: crunch[pond] := '"'; // silly quote-endquote pair 8470: crunch[pond] := chr(185); // Nommer else begin outcon('Funky character with code ' + strhex(word(whence^)) + 'h while crunching.'); crunch[pond] := chr(63); end; end; dec(lund); end; end; function getstr(wub, she : byte; column, row : word) : string; // Returns a string from a loaded workbook. var pork : pointer; begin if row >= workbook[wub].sheet[she].lastrow then begin outcon('GetStr called to fetch WB ' + strdec(wub) + ' sheet ' + strdec(she + 1) + ' column ' + strdec(column + 1) + ' row ' + strdec(row + 1) + '; doesn''t exist.'); getstr := '+++ OOB +++'; exit; end; pork := pointer(dword(workbook[wub].sheet[she].data) + dword(row * sizeof(rowtype))); if rowtype(pork^).genus[column] <> 1 then begin getstr := '+++ NON +++'; exit; end; getstr := crunch(pointer(dword(workbook[wub].stringtable) + lonkero(workbook[wub].stlist^)[rowtype(pork^).valus[column]])); end; function getval(wub, she : byte; column, row : word) : double; // Returns a number from a loaded workbook. // In case of errors, returns 0. Sorry. Test specifically with GetType. var pork : pointer; begin if row >= workbook[wub].sheet[she].lastrow then begin outcon('GetVal called to fetch WB ' + strdec(wub) + ' sheet ' + strdec(she + 1) + ' column ' + strdec(column + 1) + ' row ' + strdec(row + 1) + '; doesn''t exist.'); getval := 0; exit; end; pork := pointer(dword(workbook[wub].sheet[she].data) + dword(row * sizeof(rowtype))); case rowtype(pork^).genus[column] of 2: getval := rowtype(pork^).valus[column]; 5: getval := rowtype(pork^).dublus[column]; else begin getval := 0; outcon('GetVal tried to access non-value type!'); end; end; end; function gettype(wub, she : byte; column, row : word) : byte; inline; // Returns a cell content type from a loaded workbook. // All variables are zero-based. begin if row >= workbook[wub].sheet[she].lastrow then begin outcon('GetType called to fetch WB ' + strdec(wub) + ' sheet ' + strdec(she + 1) + ' column ' + strdec(column + 1) + ' row ' + strdec(row + 1) + '; doesn''t exist.'); gettype := $FF; end else gettype := rowtype(pointer(dword(workbook[wub].sheet[she].data) + dword(row * sizeof(rowtype)))^).genus[column]; end; {$ifdef bonk} procedure dumpbook(numor : byte); // Attempts to unload all worksheets of workbook[numor] into a non-standard // barebones hack RTF-file which loads just fine under OpenOffice. // Everything is output into table cells. // Beware, some worksheets may have thousands of useless cells just because // some idiot fell asleep on the enter key and never bothered to delete the // empty rows at the end of the sheet... output will still work, but handling // the file will be SLOW. var landfill : text; orc, goblin : dword; troll, kobold : byte; ogre : pointer; begin outcon('Dumping workbook ' + workbook[numor].filunamu); if workbook[numor].sup.internalsheets = 0 then begin outcon('No sheets loaded in it?'); exit; end; assign(landfill, workbook[numor].filunamu[1] + workbook[numor].filunamu[2] + '.rtf'); rewrite(landfill); if ioresult <> 0 then outcon('bonk! output RTF target file is not available'); writeln(landfill, '{\rtf1\ansi\deff0\adeflang1025'); writeln(landfill, '\paperh16837\paperw11905\margl1134\margr1134\margt1134\margb1134\fs28'); writeln(landfill, 'Workbook ' + upcase(workbook[numor].filunamu) + '\par'); writeln(landfill, 'Contains ' + strdec(workbook[numor].sup.internalsheets) + ' sheets\par\par'); for sheetpoku := 0 to workbook[numor].sup.internalsheets - 1 do begin writeln(landfill, '\fs24---------------------------------------------------------------------\par'); write(landfill, 'Sheet ', sheetpoku + 1,': '); for i := 0 to workbook[numor].sheet[sheetpoku].sheetnamelen - 1 do with workbook[numor].sheet[sheetpoku] do if sheetname[i] < 255 then write(landfill, chr(sheetname[i])) else write(landfill, '\u' + strdec(sheetname[i]) + space(5 - length(strdec(sheetname[i]))) + '?'); writeln(landfill, '\par\par\fs18'); orc := dword(workbook[numor].sheet[sheetpoku].data); troll := findlastcolumn(numor, sheetpoku); if workbook[numor].sheet[sheetpoku].lastrow > 0 then for i := 0 to workbook[numor].sheet[sheetpoku].lastrow - 1 do begin for j := 0 to troll do writeln(landfill, '\clbrdrt\brdrs\clbrdrl\brdrs\clbrdrb\brdrs\clbrdrr\brdrs\cellx' + strdec((j + 1) * 9637 div byte(troll + 1))); for j := 0 to troll do begin // cell content output if rowtype(pointer(orc)^).genus[j] = 1 then begin // string goblin := rowtype(pointer(orc)^).valus[j]; ogre := pointer(dword(workbook[numor].stringtable) + lonkero(workbook[numor].stlist^)[goblin]); kobold := word(ogre^); inc(dword(ogre), 2); while kobold > 0 do begin // Characters may be vanilla ASCII... if word(ogre^) < 128 then write(landfill, chr(word(ogre^))) // or a funky piece of whitespace... else if word(ogre^) = 160 then write(landfill, ' ') // or a glorious 16-bit unicode character! else write(landfill, '\u' + strdec(word(ogre^)) + space(5 - length(strdec(word(ogre^)))) + '?'); inc(dword(ogre), 2); dec(kobold); end; end; if rowtype(pointer(orc)^).genus[j] = 3 then // boolean write(landfill, boolean(rowtype(pointer(orc)^).valus[j])); if rowtype(pointer(orc)^).genus[j] = 4 then // error write(landfill, 'ERROR ', rowtype(pointer(orc)^).valus[j]); if rowtype(pointer(orc)^).genus[j] = 2 then // int write(landfill, rowtype(pointer(orc)^).valus[j]); if rowtype(pointer(orc)^).genus[j] = 5 then // doublefloat write(landfill, rowtype(pointer(orc)^).dublus[j]:1:2); // end cell content output writeln(landfill, '\cell'); end; inc(orc, sizeof(rowtype)); writeln(landfill, '\row'); end; writeln(landfill, '\pard'); end; writeln(landfill, '}'); close(landfill); outcon('Finished the dump.'); end; // Below is a suggested main program... include at least the first two // paragraphs to ensure happiness. begin // In case of any program termination, my ReleaseAll procedure is called, // to clean up memory and die gracefully if possible. addExitProc(releaseall); // This is the logfile printed into by Outcon(string), handy for debugging. // Question: Why am I not using stdout? assign(conf, 'xlog.txt'); rewrite(conf); // Assign your filenames here! Adjust the string's maximum length if needed. // Also, workbooks may only go from 0 up to max_books, also adjustable. workdir := ''; workbook[0].filunamu := 'data.xls'; workbook[1].filunamu := 'genesis.xls'; workbook[2].filunamu := 'exodus.xls'; workbook[3].filunamu := 'numbers.xls'; workbook[4].filunamu := 'deuteronomy.xls'; for wbpoku := 0 to 4 do begin sheetpoku := 0; suppoku := 0; stripoku := 0; inputfile(workdir + workbook[wbpoku].filunamu); dumpbook(wbpoku); end; // Program end automatically calls my exitproc, releaseall. ^_^ end. {$endif}