hellorahulk commited on
Commit
5c197b6
·
1 Parent(s): 6c30c7d

Fix Docling import and usage with DocumentConverter

Browse files
Files changed (2) hide show
  1. dockling_parser/parser.py +4 -2
  2. requirements.txt +1 -1
dockling_parser/parser.py CHANGED
@@ -2,7 +2,7 @@ import os
2
  from pathlib import Path
3
  from typing import Optional, Dict, Any, Union
4
  import magic
5
- from docling import DoclingDocument
6
  from datetime import datetime
7
 
8
  from .types import ParsedDocument, DocumentMetadata
@@ -23,6 +23,7 @@ class DocumentParser:
23
 
24
  def __init__(self, config: Optional[Dict[str, Any]] = None):
25
  self.config = config or {}
 
26
 
27
  def parse(self, file_path: Union[str, Path]) -> ParsedDocument:
28
  """
@@ -59,7 +60,8 @@ class DocumentParser:
59
  )
60
 
61
  # Parse document using Docling
62
- doc = DoclingDocument.from_file(str(file_path))
 
63
 
64
  # Extract content and structure
65
  content = doc.text
 
2
  from pathlib import Path
3
  from typing import Optional, Dict, Any, Union
4
  import magic
5
+ from docling.document_converter import DocumentConverter
6
  from datetime import datetime
7
 
8
  from .types import ParsedDocument, DocumentMetadata
 
23
 
24
  def __init__(self, config: Optional[Dict[str, Any]] = None):
25
  self.config = config or {}
26
+ self.converter = DocumentConverter()
27
 
28
  def parse(self, file_path: Union[str, Path]) -> ParsedDocument:
29
  """
 
60
  )
61
 
62
  # Parse document using Docling
63
+ result = self.converter.convert(str(file_path))
64
+ doc = result.document
65
 
66
  # Extract content and structure
67
  content = doc.text
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- docling>=0.1.0
2
  pydantic>=2.0.0
3
  python-magic>=0.4.27
4
  python-docx>=0.8.11
 
1
+ docling>=0.2.0
2
  pydantic>=2.0.0
3
  python-magic>=0.4.27
4
  python-docx>=0.8.11