Skip to content

Commit

Permalink
WIP refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Jan 8, 2025
1 parent 89b2767 commit f75cc90
Show file tree
Hide file tree
Showing 14 changed files with 765 additions and 905 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ tabled_gui

```python
from tabled.extract import extract_tables
from tabled.fileinput import load_pdfs_images
from tabled.input.fileinput import load_pdfs_images
from tabled.inference.models import load_detection_models, load_recognition_models, load_layout_models

det_models, rec_models, layout_models = load_detection_models(), load_recognition_models(), load_layout_models()
Expand Down
2 changes: 1 addition & 1 deletion extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from tabled.extract import extract_tables
from tabled.formats import formatter
from tabled.fileinput import load_pdfs_images
from tabled.input.fileinput import load_pdfs_images
from tabled.inference.models import load_detection_models, load_recognition_models, load_layout_models


Expand Down
1,155 changes: 573 additions & 582 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ include = [

[tool.poetry.dependencies]
python = "^3.10"
surya-ocr = "~0.8.0"
surya-ocr = "~0.9.0"
click = "^8.1.7"
pypdfium2 = "^4.30.0"
pydantic-settings = "^2.5.2"
Expand Down
2 changes: 1 addition & 1 deletion table_app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os

from tabled.assignment import assign_rows_columns
from tabled.fileinput import load_pdfs_images
from tabled.input.fileinput import load_pdfs_images
from tabled.formats.markdown import markdown_format
from tabled.inference.detection import detect_tables
from tabled.inference.recognition import get_cells, recognize_tables
Expand Down
Empty file added tabled/__init__.py
Empty file.
245 changes: 0 additions & 245 deletions tabled/assignment.py

This file was deleted.

17 changes: 0 additions & 17 deletions tabled/fileinput.py

This file was deleted.

11 changes: 4 additions & 7 deletions tabled/inference/detection.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from surya.layout import batch_layout_detection
from surya.postprocessing.util import rescale_bbox
from surya.schema import Bbox

from tabled.settings import settings
from surya.common.polygon import PolygonBox


def merge_boxes(box1, box2):
Expand All @@ -24,16 +22,15 @@ def merge_tables(page_table_boxes):
page_table_boxes[i][2] * expansion_factor, page_table_boxes[i][3]]
expanded_box2 = [page_table_boxes[j][0] * shrink_factor, page_table_boxes[j][1],
page_table_boxes[j][2] * expansion_factor, page_table_boxes[j][3]]
if Bbox(bbox=expanded_box1).intersection_pct(Bbox(bbox=expanded_box2)) > 0:
if PolygonBox(polygon=expanded_box1).intersection_pct(PolygonBox(polygon=expanded_box2)) > 0:
page_table_boxes[i] = merge_boxes(page_table_boxes[i], page_table_boxes[j])
ignore_boxes.add(j)

return [b for i, b in enumerate(page_table_boxes) if i not in ignore_boxes]


def detect_tables(images, highres_images, models, layout_batch_size=settings.LAYOUT_BATCH_SIZE):
layout_model, layout_processor = models
layout_predictions = batch_layout_detection(images, layout_model, layout_processor, batch_size=layout_batch_size)
def detect_tables(images, highres_images, layout_predictor, layout_batch_size=settings.LAYOUT_BATCH_SIZE):
layout_predictions = layout_predictor(images, batch_size=layout_batch_size)

table_imgs = []
table_counts = []
Expand Down
26 changes: 0 additions & 26 deletions tabled/inference/models.py

This file was deleted.

Loading

0 comments on commit f75cc90

Please sign in to comment.