kathak_trainer_mudra_detection.py

# -*- coding: utf-8 -*-
"""Kathak-Trainer - Mudra Detection.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1_A72P9l4JsDiFVs8fmrBECqO9hDx0SMk

## Checking the Driver
"""

!nvidia-smi

"""## Mounting Drive"""

import os, sys
from google.colab import drive
drive.mount('/content/drive')

"""### Changing Directory to Store all results

"""

# Commented out IPython magic to ensure Python compatibility.
# %cd /content/drive/MyDrive/Kathak_Trainer

"""### Checking the Main Directory"""

import os
HOME = os.getcwd()
print(HOME)

"""## Installing Dependencies"""

!pip install roboflow
!pip install requests
!apt-get install ffmpeg
!pip install Pillow
!pip install ultralytics==8.0.196

from IPython import display
display.clear_output()

import ultralytics
ultralytics.checks()

"""### Download Yolov8 from Ultralytics"""

!pip install roboflow
from roboflow import Roboflow
import requests
import subprocess
from google.colab import files
from ultralytics import YOLO
from IPython.display import display, Image
from PIL import Image

"""### Add Roboflow"""

rf = Roboflow(api_key="6e1btkHFoBqSU5NzPKG3")
project = rf.workspace("aniruddha-jmp5a").project("kathak-trainer")
version = project.version(8)
dataset = version.download("yolov8")

"""# Training of the Custom Dataset

https://docs.ultralytics.com/modes/train/#train-settings

yolov8 Documentation link
"""

!yolo detect train model=/content/drive/MyDrive/Kathak_Trainer/runs/detect/train7/weights/last.pt data=/content/drive/MyDrive/Kathak_Trainer/Kathak-Trainer-8/data.yaml epochs=300 imgsz=640 patience=150 save=true

!yolo train resume model={HOME}/runs/detect/train/weights/last.pt epochs=150

!ls {HOME}/runs/detect/train8/

# Commented out IPython magic to ensure Python compatibility.
# %cd {HOME}
Image.open(f'{HOME}/runs/detect/train8/confusion_matrix.png')

# Commented out IPython magic to ensure Python compatibility.
# %cd {HOME}
Image.open(f'{HOME}/runs/detect/train8/results.png')

# Commented out IPython magic to ensure Python compatibility.
# %cd {HOME}
Image.open(f'{HOME}/runs/detect/train8/val_batch0_pred.jpg')

# Commented out IPython magic to ensure Python compatibility.
# %cd {HOME}
!yolo task=detect mode=val model={HOME}/runs/detect/train8/weights/best.pt data={dataset.location}/data.yaml

# Commented out IPython magic to ensure Python compatibility.
# %cd {HOME}
!yolo task=detect mode=predict model={HOME}/runs/detect/train/weights/best.pt source=/content/drive/MyDrive/Kathak_Trainer/Hand_Gesture.mp4

import glob
from IPython.display import Image, display

for image_path in glob.glob(f'{HOME}/runs/detect/predict3/*.jpg')[:3]:
      display(Image(filename=image_path, width=600))
      print("\n")

project.version(dataset.version).deploy(model_type="yolov8", model_path=f"{HOME}/runs/detect/train8/")

!pip install inference
!pip install supervision
!pip install opencv-python

from inference import InferencePipeline
from inference.core.interfaces.camera.entities import VideoFrame

# import opencv to display our annotated images
import cv2
# import supervision to help visualize our predictions
import supervision as sv

# create a bounding box annotator and label annotator to use in our custom sink
label_annotator = sv.LabelAnnotator()
box_annotator = sv.BoxAnnotator()

def my_custom_sink(predictions: dict, video_frame: VideoFrame):
    # get the text labels for each prediction
    labels = [p["class"] for p in predictions["predictions"]]
    # load our predictions into the Supervision Detections api
    detections = sv.Detections.from_inference(predictions)
    # annotate the frame using our supervision annotator, the video_frame, the predictions (as supervision Detections), and the prediction labels
    image = label_annotator.annotate(
        scene=video_frame.image.copy(), detections=detections, labels=labels
    )
    image = box_annotator.annotate(image, detections=detections)
    # display the annotated image
    cv2.imshow("Predictions", image)
    cv2.waitKey(1)

pipeline = InferencePipeline.init(
    model_id="/content/drive/MyDrive/Kathak_Trainer/runs/detect/train8/weights/best.pt",
    video_reference="/content/Y2meta.app-ASAMYUKTA HASTA _ single hand gestures (with meaning) _ kaladarpan _ bharatnatyam _ classical dance-(1080p).mp4",
    on_prediction=my_custom_sink,
)

pipeline.start()
pipeline.join()

"""## Problems That I tackles

the map50-95 was lower then map50
- tackled by changing the IOU to more stricter images for those classes
"""