sourceforge.net logo  
Charset Detector
 
API
 
Types
Return values
  NS_OK = 0;
  NS_ERROR_OUT_OF_MEMORY = $8007000e;


Returned types
  rCharsetInfo = record
    Name: pChar;                // charset name
    CodePage: integer;          // MS Windows CodePage id
    Language: pChar;            //
  end;

rAboutHolder = record MajorVersionNr: Cardinal; // Library's Major Version # MinorVersionNr: Cardinal; // Library's Minor Version # BuildVersionNr: Cardinal; // Library's Build/Release Version # About: pChar; // Copyleft information; end;
Exported functions
  procedure chsd_Reset; stdcall;
Reset Charset Detector state. Prepare to new analyse.


  function chsd_HandleData(aBuf: PChar; aLen: integer): integer; stdcall;
Analyse given buffer.
Parameters
  aBuf - pointer to buffer with text.
  aLen - buffer length.

Return value
  NS_ERROR_OUT_OF_MEMORY - failure. Unable to create internal objects.
  NS_OK - success.

Note
  Function can be called more that one time to continue guessing. Charset Detector remember last state until chsd_Reset called.


  function chsd_Done: Boolean; stdcall;
Return value
  TRUE - Charset Detector is sure about text encoding.
  FALSE - Overwise.

Note
  If input buffer is smaller then 1K chsd_Done returns anyway FALSE.


  procedure chsd_DataEnd; stdcall;
Signalise data end.
If Charset Detector hasn't sure result (i.e. chsd_Done = FALSE) the best guessed encoding will be set as result.


  function chsd_GetDetectedCharset: rCharsetInfo; stdcall;
Returns guessed charset.


  procedure chsd_GetKnownCharsets(var KnownCharsets: pChar); stdcall;
Fills the parameter with all supported charsets in form "CodePage - Name LineFeed".


  procedure chsd_GetAbout(var About: rAboutHolder); stdcall;
Fills the parameter with version and copyleft information.
Sample
  
  // WS: WideString; // Wide string which can be used in Unicode controls.
  
  // Get encoding of some buffer
  chsd_Reset;	
  chsd_HandleData(aBuf, aLen);

  if not chsd_Done then
    chsd_DataEnd;

  ChSInfo := chsd_GetDetectedCharset();
  
  if (ChSInfo.CodePage > 0) and
      Windows.IsValidCodePage(ChSInfo.CodePage) then
    begin
      // convert buffer to WideString
      OutputLength := MultiByteToWideChar(ChSInfo.CodePage, 0, aBuf, aLen, 
                                               nil, 0);
      SetLength(WS, OutputLength);
      MultiByteToWideChar(ChSInfo.CodePage, 0, aBuf, aLen, 
                               PWideChar(WS), OutputLength);

      // If you using Unicode SynEdit
      SynEdit.Lines.Text := WS;
    end
  else
  ....
Nick Yakowlew © 2006