I've been using IFilter COM Object in order to extract text from files. I've managed to extract the OLE properties values (such as Author's Value, Company's Value etc.), but i could not figure out how to know which value is the Author, Company and so on..
CoInitialize(NULL);
IFilter *pFilt;
HRESULT hr = LoadIFilter( L"c:\\bla.docx", 0, (void**)&pFilt );
if ( FAILED( hr ) )
{
cout<<"Bla"<<endl;
}
ULONG flags;
hr = pFilt->Init( IFILTER_INIT_APPLY_INDEX_ATTRIBUTES, 0, 0, &flags );
if ( FAILED( hr ) )
{
cout<<"Bla"<<endl;
}
if(flags == 1)
{
cout<<"With OLE!"<<endl;
}
STAT_CHUNK chunk;
while ( SUCCEEDED( hr = pFilt->GetChunk( &chunk ) ) )
{
if ( CHUNK_TEXT == chunk.flags )
{
WCHAR awc[100];
ULONG cwc = 100;
while ( SUCCEEDED( hr = pFilt->GetText( &cwc, awc ) ) )
{
cout<<awc<<endl;
// process the text buffer. . .
}
}
else // CHUNK_VALUE
{
PROPVARIANT *pVar;
while ( SUCCEEDED( hr = pFilt->GetValue( &pVar ) ) )
{
**// Right here, i can see the value of pVar is the correct author, but i dont know how to tell this is the author, or the company etc..**
PropVariantClear( pVar );
CoTaskMemFree( pVar );
}
}
}
In other words, i need to know what is the property id, and match it to the value of the propertie.
I've seen solutions using a IPropertyStorage->ReadMultiple but i'm trying to get the same using IFilter.
Thanks Alot! Hope you might find the answer.
The value is define in the STAT_CHUNK's
attribute
field. It's define as a FULLPROPSPEC
structure, which can be (most of the time) directly related to Windows Property System.
A FULLPROPSPEC can either point to a GUID+id property, or to a custom property defined by its name (ideally, you need to check for psProperty.ulKind
to determine this). Today, most of implementation just don't use the name thing and stick to the GUID (property set) + PROPID (int) definition of a "property".
So for example, this is a sample code that is capable of determining what's the property name and value formatted as a string using PSGetNameFromPropertyKey and IPropertyDescription::FormatForDisplay :
...
if (CHUNK_VALUE == chunk.flags)
{
if (chunk.attribute.psProperty.ulKind == PRSPEC_PROPID)
{
// build a Windows Property System property key
// need propsys.h & propsys.lib
PROPERTYKEY pk;
pk.fmtid = chunk.attribute.guidPropSet;
pk.pid = chunk.attribute.psProperty.propid;
PWSTR name;
if (SUCCEEDED(PSGetNameFromPropertyKey(pk, &name)))
{
wprintf(L" name:'%s'\n", name);
CoTaskMemFree(name);
}
IPropertyDescription *pd;
if (SUCCEEDED(PSGetPropertyDescription(pk, IID_PPV_ARGS(&pd))))
{
PROPVARIANT *pVar;
hr = pFilt->GetValue(&pVar);
if (SUCCEEDED(hr))
{
LPWSTR display;
if (SUCCEEDED(pd->FormatForDisplay(*pVar, PDFF_DEFAULT, &display)))
{
wprintf(L" value:'%s'\n", display);
CoTaskMemFree(display);
}
PropVariantClear(pVar);
}
pd->Release();
}
continue;
} // otherwise it's a string
PROPVARIANT *pVar;
hr = pFilt->GetValue(&pVar);
if (SUCCEEDED(hr))
{
// do something with the value
PropVariantClear(pVar);
}
}