/*  Copyright (c) MediaArea.net SARL. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license that can
 *  be found in the License.html file in the root of the source tree.
 */
 
//---------------------------------------------------------------------------
// Pre-compilation
#include "MediaInfo/PreComp.h"
#ifdef __BORLANDC__
    #pragma hdrstop
#endif
//---------------------------------------------------------------------------
 
//---------------------------------------------------------------------------
#include "MediaInfo/Setup.h"
//---------------------------------------------------------------------------
 
//---------------------------------------------------------------------------
#if defined(MEDIAINFO_PDF_YES)
//---------------------------------------------------------------------------
 
//---------------------------------------------------------------------------
#include "MediaInfo/Text/File_Pdf.h"
#include "MediaInfo/Tag/File_Xmp.h"
#include <cstdlib>
#include <algorithm>
using namespace std;
//---------------------------------------------------------------------------
 
namespace MediaInfoLib
{
 
//***************************************************************************
// Constructor/Destructor
//***************************************************************************
 
//---------------------------------------------------------------------------
File_Pdf::File_Pdf()
:File__Analyze()
{
}
 
//***************************************************************************
// Streams management
//***************************************************************************
 
//---------------------------------------------------------------------------
void File_Pdf::Streams_Accept()
{
    Fill(Stream_General, 0, General_Format, "PDF");
 
    Stream_Prepare(Stream_Text);
    Fill(Stream_Text, 0, "Format", "PDF");
}
 
//***************************************************************************
// Buffer - File header
//***************************************************************************
 
//---------------------------------------------------------------------------
bool File_Pdf::FileHeader_Begin()
{
    //Synchro
    if (5>Buffer_Size)
        return false;
    if (Buffer[0]!=0x25 //"%PDF-"
     || Buffer[1]!=0x50
     || Buffer[2]!=0x44
     || Buffer[3]!=0x46
     || Buffer[4]!=0x2D)
    {
        Reject();
        return false;
    }
 
    Accept();
 
    //Temp
    Catalog_Level=0;
    Offsets_Max=0;
    Objects_Current=Objects.end();
 
    //All should be OK...
    return true;
}
 
//---------------------------------------------------------------------------
void File_Pdf::FileHeader_Parse()
{
    string PdfHeader;
    Get_String(SizeOfLine(), PdfHeader,                         "Header");
    for (;;)
    {
        int64u CommentSize=SizeOfLine();
        if (Buffer_Offset+Element_Offset>=Buffer_Size)
        {
            Element_WaitForMoreData();
            return;
        }
        if (Buffer[Buffer_Offset+Element_Offset]!='%')
            break;
        Skip_String(CommentSize,                                "Comment");
    }
 
    //Filling
    Fill(Stream_General, 0, General_Format_Version, PdfHeader.substr(5));
 
    GoToFromEnd(9+2+10+2+5+2); // "startxref" + EOL +  10max digits + EOL + "%%EOF" + EOL
    State=State_Parsing_startxref;
}
 
//***************************************************************************
// Buffer - Global
//***************************************************************************
 
//---------------------------------------------------------------------------
void File_Pdf::Read_Buffer_Continue()
{
    switch (State)
    {
        case State_Parsing_xref         : xref(); if (!Element_IsWaitingForMoreData()) trailer(); break;
        case State_Parsing_startxref    : eof(); startxref(); break;
        case State_Parsing_object       : break; //Using elements
        default                         : Finish();
    }
}
 
//***************************************************************************
// Buffer - Per element
//***************************************************************************
 
//---------------------------------------------------------------------------
bool File_Pdf::Header_Begin()
{
    //Offsets_Current=Offsets.find(Objects_Current->second.Offset);
    //offsets::iterator Offsets_Next=Offsets_Current;
    //Offsets_Next++;
    //if (Offsets_Next!=Offsets.end() && Offsets_Next->first>File_Offset+Buffer_Size)
    //{
    //    Element_WaitForMoreData();
    //    return false;
    //}
 
    return true;
}
 
//---------------------------------------------------------------------------
void File_Pdf::Header_Parse()
{
    offsets::iterator Offsets_Next=upper_bound(Offsets.begin(), Offsets.end(), (int32u)(File_Offset+Buffer_Offset));
    if (Offsets_Next!=Offsets.end() && *Offsets_Next>File_Offset+Buffer_Size)
    {
        Element_WaitForMoreData();
        return;
    }
 
    int64u Size;
    //if (Offsets_Current==Offsets.end())
    //    Size=Offsets_Max-(File_Offset+Buffer_Offset);
    //else
    //    Size=Offsets_Current->first-(File_Offset+Buffer_Offset);
    if (Offsets_Next==Offsets.end())
        Size=Offsets_Max-(File_Offset+Buffer_Offset);
    else
        Size=*Offsets_Next-(File_Offset+Buffer_Offset);
 
    Header_Fill_Size(Size);
}
 
//---------------------------------------------------------------------------
void File_Pdf::Data_Parse()
{
    Element_Name("Object");
 
    string Line;
    Get_String(SizeOfLine(), Line,                              "Header");
 
    size_t Space_Pos=Line.find(' ');
    int32u ObjectNumber=Ztring().From_UTF8(Line.substr(0, Space_Pos)).To_int32u();
    Element_Info1(ObjectNumber);
    objects::iterator Object=Objects.find(ObjectNumber);
    if (Object==Objects.end())
        Skip_XX(Element_Size-Element_Offset,                    "Data");
    else
        switch(Object->second.Type)
        {
            case Type_Root      : Object_Root(); break;
            case Type_Info      : Object_Info(); break;
            case Type_Metadata  : Object_Metadata(); break;
            default             : Skip_XX(Element_Size-Element_Offset, "Data");
        }
 
    for (;;)
    {
        if (Objects_Current==Objects.end())
            break;
 
        Objects_Current->second.BottomPos++;
        if (Objects_Current->second.BottomPos>=Objects_Current->second.Bottoms.size())
        {
            if (Objects_Current->first==(int32u)-1)
            {
                //No more to parse
                Objects_Current=Objects.end();
                Objects.clear();
                Finish();
                break;
            }
 
            Objects_Current=Objects.find(Objects_Current->second.TopObject);
            continue;
        }
 
         Objects_Current=Objects.find(Objects_Current->second.Bottoms[Objects_Current->second.BottomPos]);
         GoTo(Objects_Current->second.Offset);
         break;
    }
}
 
//***************************************************************************
// Elements
//***************************************************************************
 
//---------------------------------------------------------------------------
void File_Pdf::xref()
{
    //Parsing
    Element_Begin1("Cross-Reference Table");
 
    Element_Begin1("Cross-Reference Section");
 
    string FirstLine;
    Skip_String(SizeOfLine(),                                       "Object name");
    Element_Begin1("Cross-Reference SubSection");
        Get_String(SizeOfLine(), FirstLine,                         "Header");
        size_t FirstLine_Space=FirstLine.find(' ');
        int32u Base=atoi((const char*)FirstLine.c_str());
        int32u Count=0;
        if (FirstLine_Space!=string::npos)
            Count=atoi((const char*)FirstLine.c_str()+FirstLine_Space+1);
 
        if (0x10000+20*Count>Buffer_Size && File_Offset+Buffer_Size<File_Size)
        {
            // We wait for more data
            Buffer_Offset=0;
            Element_Offset=0;
            Element_DoNotShow();
            Element_End0();
            Element_End0();
            Element_End0();
            Element_WaitForMoreData();
            return;
        }
 
        while (Element_Offset<Element_Size && (Buffer[Buffer_Offset+(size_t)Element_Offset]=='\r' || Buffer[Buffer_Offset+(size_t)Element_Offset]=='\n'))
            Element_Offset++;
        const int8u* Buffer_Temp=Buffer+Buffer_Offset+(size_t)Element_Offset+17;
        for (int32u Pos=0; Pos<Count; ++Pos)
        {
            if (*Buffer_Temp=='n')
            {
                char atoi_buf[18];
                atoi_buf[17]=0;
                memcpy(atoi_buf,(const char*)Buffer_Temp-17,17);
                const int32u Offset=(int32u)atoi(atoi_buf);
                Objects[Base+Pos].Offset=Offset;
                //Offsets[Offset]=Base+Pos;
                Offsets.push_back(Offset);
            }
            Buffer_Temp+=20;
            if (Pos>100)
                Element_Offset+=20;
            else
            {
                Skip_String(18,                                 "Entry"); Param_Info1(Base+Pos);
                Element_Offset+=2; //Skipping spaces at end and line return
            }
        }
    Element_End0();
 
    Element_End0();
 
    Element_End0();
 
    if (File_Offset+Buffer_Offset>Offsets_Max)
        Offsets_Max=(int32u)(File_Offset+Buffer_Offset);
}
 
//---------------------------------------------------------------------------
void File_Pdf::trailer()
{
    Element_Begin1("Trailer");
 
    //Parsing
    int32u Prev=(int32u)-1;
    string Key;
    Ztring Value;
    Skip_String(SizeOfLine(),                                       "Object name");
    while (Element_Offset<Element_Size)
    {
        if (Get_Next(Key, Value))
        {
            for (;;)
            {
                Get_Next(Key, Value);
                if (Key.empty())
                    break;
                else if (Key=="Root")
                {
                    int32u ObjectNumber=Value.To_int32u();
                    Objects[ObjectNumber].Type=Type_Root;
                    Objects[ObjectNumber].TopObject=(int32u)-1;
                    Objects[(int32u)-1].Bottoms.push_back(ObjectNumber);
                    Param_Info1(__T("Document Catalog is at offset 0x"+Ztring().From_Number(Objects[ObjectNumber].Offset, 16)));
                }
                else if (Key=="Info")
                {
                    int32u ObjectNumber=Value.To_int32u();
                    Objects[ObjectNumber].Type=Type_Info;
                    Objects[ObjectNumber].TopObject=(int32u)-1;
                    Objects[(int32u)-1].Bottoms.push_back(ObjectNumber);
                    Param_Info1(__T("Info is at offset 0x"+Ztring().From_Number(Objects[ObjectNumber].Offset, 16)));
                }
                else if (Key=="Prev")
                {
                    Prev=Value.To_int32u();
                    Param_Info1(__T("Previous Cross-Reference Table is at offset 0x"+Ztring().From_Number(Prev, 16)));
                }
            }
 
            continue;
        }
 
        if (Key.empty())
            break;
    }
 
    Element_End0();
 
    //Previous Cross-Reference Table
    if (Prev!=(int32u)-1)
    {
        GoTo(Prev);
        return;
    }
 
    objects::iterator Object_Top=Objects.find((int32u)-1);
    if (Offsets.empty() || Object_Top==Objects.end())
    {
        ForceFinish();
        return;
    }
 
    sort(Offsets.begin(), Offsets.end());
 
    //Offsets_Current=Offsets.end(); //No more used for the moment
    Objects[(int32u)-1].BottomPos=0;
    Objects_Current=Objects.find(Object_Top->second.Bottoms[0]);
    GoTo(Objects_Current->second.Offset);
    State=State_Parsing_object;
}
 
//---------------------------------------------------------------------------
void File_Pdf::startxref()
{
    //We need to find the exact begin
    Buffer_Offset=Buffer_Size-1;
    while (Buffer_Offset && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n'))
        Buffer_Offset--;
    Buffer_Offset-=5; // "%%EOF"
    while (Buffer_Offset && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n'))
        Buffer_Offset--;
    while (Buffer_Offset && Buffer[Buffer_Offset]>='0' && Buffer[Buffer_Offset]<='9') // Value
        Buffer_Offset--;
    while (Buffer_Offset && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n'))
        Buffer_Offset--;
 
    Buffer_Offset-=8;
 
    //Parsing
    Element_Begin1("Cross-Reference Table Offset");
 
    string xrefOffsetS;
    Skip_String(SizeOfLine(),                                   "Object name");
    Get_String (SizeOfLine(), xrefOffsetS,                      "xref Offset");
    while (Buffer_Offset<Buffer_Size && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n'))
        ++Buffer_Offset;
    int32u xref_Offset=atoi(xrefOffsetS.c_str());
 
    Element_End0();
 
    //Going to xref
    if (xref_Offset>Offsets_Max)
        Offsets_Max=xref_Offset;
    GoTo (xref_Offset);
    State=State_Parsing_xref;
}
 
//---------------------------------------------------------------------------
void File_Pdf::eof()
{
    if (File_Size!=(int64u)-1 && File_Offset+Buffer_Size<File_Size)
    {
        Element_WaitForMoreData();
        return;
    }
 
    //We need to find the exact begin
    Buffer_Offset=Buffer_Size-1;
    while (Buffer_Offset && (Buffer[Buffer_Offset]=='\r' || Buffer[Buffer_Offset]=='\n'))
        Buffer_Offset--;
 
    Buffer_Offset-=5;
 
    //Parsing
    Element_Begin1("End Of File");
 
    Skip_String(SizeOfLine(),                                   "Object name");
 
    Element_End0();
}
 
//---------------------------------------------------------------------------
void File_Pdf::Object_Root()
{
    Element_Info1("Document Catalog");
 
    //Parsing
    string Key;
    Ztring Value;
    while (Element_Offset<Element_Size)
    {
        if (Get_Next(Key, Value))
        {
            for (;;)
            {
                Get_Next(Key, Value);
                if (Key.empty())
                    break;
                else if (Key=="Metadata")
                {
                    int32u ObjectNumber=Value.To_int32u();
                    Objects[ObjectNumber].Type=Type_Metadata;
                    Objects[ObjectNumber].TopObject=Objects_Current->first;
                    Objects[Objects_Current->first].Bottoms.push_back(ObjectNumber);
                    Param_Info1(__T("Metadata is at offset 0x"+Ztring().From_Number(Objects[ObjectNumber].Offset)));
                }
            }
 
            continue;
        }
 
        if (Key.empty())
            break;
    }
}
 
//---------------------------------------------------------------------------
void File_Pdf::Object_Info()
{
    Element_Info1("Info");
 
    //Parsing
    string Key;
    Ztring Value;
    while (Element_Offset<Element_Size)
    {
        if (Get_Next(Key, Value))
        {
            for (;;)
            {
                Get_Next(Key, Value);
                if (Key.empty())
                    break;
            }
 
            continue;
        }
 
        if (Key.empty())
            break;
    }
}
 
//---------------------------------------------------------------------------
void File_Pdf::Object_Metadata()
{
    Element_Info1("Metadata");
 
    //Parsing
    string Key;
    Ztring Value;
    int32u Length=0;
    while (Element_Offset<Element_Size)
    {
        if (Get_Next(Key, Value))
        {
            for (;;)
            {
                Get_Next(Key, Value);
                if (Key.empty())
                    break;
                else if (Key=="Length")
                {
                    Length=Value.To_int32u();
                }
            }
 
            continue;
        }
 
        if (Key.empty())
            break;
        else if (Key=="stream")
        {
            //Removig end of lines
            if (Element_Offset<Element_Size && Buffer[Buffer_Offset+(size_t)Element_Offset]=='\r')
                Element_Offset++;
            if (Element_Offset<Element_Size && Buffer[Buffer_Offset+(size_t)Element_Offset]=='\n')
                Element_Offset++;
 
            File_Xmp MI;
            Open_Buffer_Init(&MI, Length);
            Open_Buffer_Continue(&MI, Buffer+Buffer_Offset+(size_t)Element_Offset, Length);
            Skip_XX(Length,                                     "Stream, Data");
            Open_Buffer_Finalize(&MI);
            Merge(MI, Stream_General, 0, 0);
        }
    }
}
 
//***************************************************************************
// Helpers
//***************************************************************************
 
//---------------------------------------------------------------------------
int64u File_Pdf::SizeOfLine()
{
    //while (Element_Offset<Element_Size && (Buffer[Buffer_Offset+(size_t)Element_Offset]=='\r' || Buffer[Buffer_Offset+(size_t)Element_Offset]=='\n' || Buffer[Buffer_Offset+(size_t)Element_Offset]=='<' || Buffer[Buffer_Offset+(size_t)Element_Offset]=='>'))
    while (Element_Offset<Element_Size && (Buffer[Buffer_Offset+(size_t)Element_Offset]=='\r' || Buffer[Buffer_Offset+(size_t)Element_Offset]=='\n' || Buffer[Buffer_Offset+(size_t)Element_Offset]==' '))
        Element_Offset++;
    size_t End=Buffer_Offset+(size_t)Element_Offset;
    while (End<Buffer_Size && Buffer[End]!='\r' && Buffer[End]!='\n' && !(End+1<Buffer_Size && Buffer[End]=='<' && Buffer[End+1]=='<') && !(End+1<Buffer_Size && Buffer[End]=='>' && Buffer[End+1]=='>'))
        End++;
    return End-(Buffer_Offset+(size_t)Element_Offset);
}
 
//---------------------------------------------------------------------------
bool File_Pdf::Get_Next(string &Key, Ztring &Value)
{
    Key.clear();
    Value.clear();
 
    string Line;
 
    //Removig end of lines
    while (Element_Offset<Element_Size && (Buffer[Buffer_Offset+(size_t)Element_Offset]=='\r' || Buffer[Buffer_Offset+(size_t)Element_Offset]=='\n' || Buffer[Buffer_Offset+(size_t)Element_Offset]==' '))
        Element_Offset++;
 
    //End
    if (Element_Offset>=Element_Size)
        return true;
 
    //Testing Catalog
    Peek_String (2, Line);
    if (Line=="<<")
    {
        Element_Offset+=2;
        Catalog_Level++;
        return true;
    }
    else if (Line==">>")
    {
        Element_Offset+=2;
        Catalog_Level--;
        return true;
    }
 
    //Getting a complete line
    Peek_String (SizeOfLine(), Line);
 
    //Testing Catalog
    size_t Catalog_End=Line.find(">>");
    if (Catalog_End!=String::npos)
        Line.resize(Catalog_End);
 
    //Testing stream
    if (Line=="stream")
    {
        Skip_String(Line.size(),                                "Stream, Header");
        Key=Line;
        return false;
    }
    if (Line=="endstream")
    {
        Skip_String(Line.size(),                                "Stream, Footer");
        Key=Line;
        return false;
    }
 
    //Testing object
    if (Line=="endobj")
    {
        Skip_String(Line.size(),                                "Footer");
        Key=Line;
        return false;
    }
 
    //Base
    int64u Line_Base=Element_Offset;
 
    //Testing next key
    size_t Line_End=0;
    size_t Line_Begin=Line_End;
 
    // Key-Value
    if (Line_Begin<Line.size() && Line[Line_Begin]=='/')
    {
        Line_End= Line_Begin+1;
        size_t HasParenthesis=0;
        size_t HasBracket=0;
        size_t HasSpace=0;
        size_t HasValue=0;
        for (;;)
        {
            if (Line_End==Line.size())
                break;
 
            if (!HasParenthesis && !HasBracket && HasValue && Line[Line_End]=='<' && Line_End+1<Line.size() && Line[Line_End+1]=='<')
                break;
            if (!HasParenthesis && !HasBracket && HasValue && Line[Line_End]=='/')
                break;
            else if (!HasValue && Line[Line_End]=='/')
                ++HasValue;
            else if (!HasValue && HasSpace)
                ++HasValue;
 
            if (Line[Line_End]==' ')
                ++HasSpace;
 
            if (Line[Line_End]=='(')
                ++HasParenthesis;
            if (HasParenthesis && Line[Line_End]==')')
                --HasParenthesis;
 
            if (Line[Line_End]=='[')
                ++HasBracket;
            if (HasBracket && Line[Line_End]==']')
                --HasBracket;
 
            ++Line_End;
        }
        while(Line_End && Line[Line_End-1]==' ')
            Line_End--; //Removing trailing spaces
 
        Element_Offset=Line_Base+Line_Begin;
        string KeyValue;
        Get_String(Line_End-Line_Begin, KeyValue,               "Key-Value");
 
        size_t Key_Max=KeyValue.find_first_of(" (");
        if (Key_Max==string::npos)
            Key_Max=KeyValue.size();
        Key=KeyValue.substr(1, Key_Max-1);
        size_t Value_Min=Key_Max;
        while (Value_Min<KeyValue.size() && KeyValue[Value_Min]==' ')
            ++Value_Min;
        if (Value_Min<KeyValue.size() && KeyValue[Value_Min]=='(')
        {
            ++Value_Min;
            size_t Value_Max=KeyValue.find(')', Value_Min);
            if (Value_Max!=string::npos)
            {
                //TODO
                Value.From_UTF8(KeyValue.c_str()+Value_Min, Value_Max-Value_Min);
            }
            else
                Value.From_UTF8(KeyValue.c_str()+Value_Min);
        }
        else
                Value.From_UTF8(KeyValue.c_str()+Value_Min);
        return false;
    }
 
    return false;
}
 
} //NameSpace
 
#endif //MEDIAINFO_PDF_YES

V1051 Consider checking for misprints. It's possible that the 'CommentSize' should be checked here.

V730 Not all members of a class are initialized inside the constructor. Consider inspecting: State, Offsets_Max, Catalog_Level.

V688 The 'Buffer_Temp' local variable possesses the same name as one of the class members, which can result in a confusion.