diff --git a/README.md b/README.md
index 6335ebc..14cd1fe 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
-# bedmess
+# BEDMS
 
-bedmess is a tool used to standardize genomics/epigenomics metadata based on a schema chosen by the user ( eg. ENCODE, FAIRTRACKS).
+BEDMS (BED Metadata Standardizer) is a tool used to standardize genomics/epigenomics metadata based on a schema chosen by the user ( eg. ENCODE, FAIRTRACKS, BEDBASE).
 
 
 To install `attribute-standardizer` , you need to clone this repository first. Follow the steps given below to install:
 
 ```
-git clone https://github.com/databio/bedmess.git
+git clone https://github.com/databio/bedms.git
 
-cd bedmess
+cd bedms
 
 pip install .
 
@@ -16,13 +16,28 @@ pip install .
 
 ## Usage
 
-Using Python, this is how you can run `attribute_standardizer` :
+Using Python, this is how you can run `attribute_standardizer` and print the results :
 
 
 ```
-from attribute_standardizer.attribute_standardizer import attr_standardizer
+from attribute_standardizer import AttrStandardizer
 
-attr_standardizer(pep=/path/to/pep, schema="ENCODE")
+model = AttrStandardizer("ENCODE")
+model = AttrStandardizer("FAIRTRACKS")
+
+results = model.standardize(pep ="geo/gse178283:default")
+
+print(results)
+
+```
+
+To see the available schemas, you can run:
+```
+schemas = model.get_available_schemas()
+
+print(schemas)
 ```
 
+This will print the available schemas as a list. 
+
 You can use the format provided in the `trial.py` script in this repository as a reference. 
\ No newline at end of file
diff --git a/attribute_standardizer/__init__.py b/attribute_standardizer/__init__.py
index e5081d0..374c0be 100644
--- a/attribute_standardizer/__init__.py
+++ b/attribute_standardizer/__init__.py
@@ -1 +1 @@
-from .attribute_standardizer import attr_standardizer
+from .attr_standardizer import AttrStandardizer
diff --git a/attribute_standardizer/attr_standardizer.py b/attribute_standardizer/attr_standardizer.py
new file mode 100644
index 0000000..13bf949
--- /dev/null
+++ b/attribute_standardizer/attr_standardizer.py
@@ -0,0 +1,206 @@
+import logging
+from typing import Dict, Tuple, Union
+
+import peppy
+import torch
+import torch.nn as nn
+import torch.nn.functional as torch_functional
+
+from .const import (
+    CONFIDENCE_THRESHOLD,
+    DROPOUT_PROB,
+    EMBEDDING_SIZE,
+    HIDDEN_SIZE,
+    INPUT_SIZE_BOW_BEDBASE,
+    INPUT_SIZE_BOW_ENCODE,
+    INPUT_SIZE_BOW_FAIRTRACKS,
+    OUTPUT_SIZE_BEDBASE,
+    OUTPUT_SIZE_ENCODE,
+    OUTPUT_SIZE_FAIRTRACKS,
+    SENTENCE_TRANSFORMER_MODEL,
+    PROJECT_NAME,
+)
+from .model import BoWSTModel
+from .utils import (
+    data_encoding,
+    data_preprocessing,
+    fetch_from_pephub,
+    get_any_pep,
+    load_from_huggingface,
+)
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(PROJECT_NAME)
+
+
+class AttrStandardizer:
+    def __init__(self, schema: str, confidence: int = CONFIDENCE_THRESHOLD) -> None:
+        """
+        Initializes the attribute standardizer with user provided schema, loads the model.
+
+        :param str schema: User provided schema, can be "ENCODE" or "FAIRTRACKS"
+        :param int confidence: Confidence threshold for the predictions.
+        """
+        self.schema = schema
+        self.model = self._load_model()
+        self.conf_threshold = confidence
+
+    def _get_parameters(self) -> Tuple[int, int, int, int, int, float]:
+        """
+        Get the model parameters as per the chosen schema.
+
+        :return Tuple[int, int, int, int, int, int, float]: Tuple containing the model parameters.
+        """
+        if self.schema == "ENCODE":
+            return (
+                INPUT_SIZE_BOW_ENCODE,
+                EMBEDDING_SIZE,
+                EMBEDDING_SIZE,
+                HIDDEN_SIZE,
+                OUTPUT_SIZE_ENCODE,
+                DROPOUT_PROB,
+            )
+        elif self.schema == "FAIRTRACKS":
+            return (
+                INPUT_SIZE_BOW_FAIRTRACKS,
+                EMBEDDING_SIZE,
+                EMBEDDING_SIZE,
+                HIDDEN_SIZE,
+                OUTPUT_SIZE_FAIRTRACKS,
+                DROPOUT_PROB,
+            )
+        elif self.schema == "BEDBASE":
+            return (
+                INPUT_SIZE_BOW_BEDBASE,
+                EMBEDDING_SIZE,
+                EMBEDDING_SIZE,
+                HIDDEN_SIZE,
+                OUTPUT_SIZE_BEDBASE,
+                DROPOUT_PROB,
+            )
+        else:
+            raise ValueError(
+                f"Schema not available: {self.schema}. Presently, three schemas are available: ENCODE , FAIRTRACKS, BEDBASE"
+            )
+
+    def _load_model(self) -> nn.Module:
+        """
+        Calls function to load the model from HuggingFace repository and sets to eval().
+
+        :return nn.Module: Loaded Neural Network Model.
+        """
+        try:
+            model = load_from_huggingface(self.schema)
+            state_dict = torch.load(model)
+
+            (
+                input_size_values,
+                input_size_values_embeddings,
+                input_size_headers,
+                hidden_size,
+                output_size,
+                dropout_prob,
+            ) = self._get_parameters()
+
+            model = BoWSTModel(
+                input_size_values,
+                input_size_values_embeddings,
+                input_size_headers,
+                hidden_size,
+                output_size,
+                dropout_prob,
+            )
+            model.load_state_dict(state_dict)
+            model.eval()
+            return model
+
+        except Exception as e:
+            logger.error(f"Error loading the model: {str(e)}")
+            raise
+
+    def standardize(
+        self, pep: Union[str, peppy.Project]
+    ) -> Dict[str, Dict[str, float]]:
+        """
+        Fetches the user provided PEP from the PEPHub registry path, returns the predictions.
+
+        :param str pep: peppy.Project object or PEPHub registry path to PEP.
+        :return Dict[str, Dict[str, float]]: Suggestions to the user.
+        """
+        if isinstance(pep, str):
+            pep = get_any_pep(pep)
+        elif isinstance(pep, peppy.Project):
+            pass
+        else:
+            raise ValueError(
+                "PEP should be either a path to PEPHub registry or peppy.Project object."
+            )
+        try:
+            csv_file = fetch_from_pephub(pep)
+
+            X_values_st, X_headers_st, X_values_bow, num_rows = data_preprocessing(
+                csv_file
+            )
+            (
+                X_headers_embeddings_tensor,
+                X_values_embeddings_tensor,
+                X_values_bow_tensor,
+                label_encoder,
+            ) = data_encoding(
+                num_rows,
+                X_values_st,
+                X_headers_st,
+                X_values_bow,
+                self.schema,
+                model_name=SENTENCE_TRANSFORMER_MODEL,
+            )
+
+            logger.info("Data Preprocessing completed.")
+
+            with torch.no_grad():
+                outputs = self.model(
+                    X_values_bow_tensor,
+                    X_values_embeddings_tensor,
+                    X_headers_embeddings_tensor,
+                )
+                probabilities = torch_functional.softmax(outputs, dim=1)
+
+                values, indices = torch.topk(probabilities, k=3, dim=1)
+                top_preds = indices.tolist()
+                top_confidences = values.tolist()
+
+                decoded_predictions = [
+                    label_encoder.inverse_transform(indices) for indices in top_preds
+                ]
+
+                suggestions = {}
+            for i, category in enumerate(X_headers_st):
+                category_suggestions = {}
+                if top_confidences[i][0] >= self.conf_threshold:
+                    for j in range(3):
+                        prediction = decoded_predictions[i][j]
+                        probability = top_confidences[i][j]
+                        if probability >= self.conf_threshold:
+                            category_suggestions[prediction] = probability
+                        else:
+                            break
+                else:
+                    category_suggestions["Not Predictable"] = 0.0
+
+                suggestions[category] = category_suggestions
+
+            return suggestions
+
+        except Exception as e:
+            logger.error(
+                f"Error occured during standardization in standardize function: {str(e)}"
+            )
+
+    @staticmethod
+    def get_available_schemas() -> list[str]:
+        """
+        Stores a list of available schemas.
+        :return list: List of available schemas.
+        """
+        schemas = ["ENCODE", "FAIRTRACKS", "BEDBASE"]
+        return schemas
diff --git a/attribute_standardizer/attribute_standardizer.py b/attribute_standardizer/attribute_standardizer.py
deleted file mode 100644
index fcc82d4..0000000
--- a/attribute_standardizer/attribute_standardizer.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import pandas as pd
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import logging
-from .const import (
-    HIDDEN_SIZE,
-    DROPOUT_PROB,
-    CONFIDENCE_THRESHOLD,
-    EMBEDDING_SIZE,
-    SENTENCE_TRANSFORMER_MODEL,
-)
-
-from .utils import (
-    fetch_from_pephub,
-    load_from_huggingface,
-    data_preprocessing,
-    data_encoding,
-)
-from .model import BoWSTModel
-from huggingface_hub import hf_hub_download
-from typing import Dict, List, Tuple, Any, Union
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-def standardize_attr_names(csv_file: str, schema: str) -> Dict[str, Dict[str, float]]:
-    """
-    Standardize attribute names.
-
-    :param str csv_file: Path to the CSV file containing metadata to be standardized.
-    :param str schema: Schema type.
-    :return Dict[str, Dict[str, float]]: Suggestions for standardized attribute names.
-    """
-    try:
-        X_values_st, X_headers_st, X_values_bow = data_preprocessing(csv_file)
-        (
-            X_headers_embeddings_tensor,
-            X_values_embeddings_tensor,
-            X_values_bow_tensor,
-            label_encoder,
-        ) = data_encoding(
-            X_values_st,
-            X_headers_st,
-            X_values_bow,
-            schema,
-            model_name=SENTENCE_TRANSFORMER_MODEL,
-        )
-        logger.info("Data Preprocessing completed.")
-
-        model = load_from_huggingface(schema)
-        # print(model)
-        state_dict = torch.load(model)
-
-        """Padding the input tensors."""
-
-        padded_data_values_tensor = torch.zeros(
-            X_values_bow_tensor.shape[0], state_dict["fc_values1.weight"].shape[1]
-        )
-        padded_data_headers_tensor = torch.zeros(
-            X_headers_embeddings_tensor.shape[0],
-            state_dict["fc_headers1.weight"].shape[1],
-        )
-        padded_data_values_embeddings_tensor = torch.zeros(
-            X_values_embeddings_tensor.shape[0],
-            state_dict["fc_values_embeddings1.weight"].shape[1],
-        )
-
-        padded_data_values_tensor[:, : X_values_bow_tensor.shape[1]] = (
-            X_values_bow_tensor
-        )
-        padded_data_headers_tensor[:, : X_headers_embeddings_tensor.shape[1]] = (
-            X_headers_embeddings_tensor
-        )
-        padded_data_values_embeddings_tensor[
-            :, : X_values_embeddings_tensor.shape[1]
-        ] = X_values_embeddings_tensor
-
-        input_size_values = padded_data_values_tensor.shape[1]
-        input_size_headers = EMBEDDING_SIZE
-        input_size_values_embeddings = EMBEDDING_SIZE
-        hidden_size = HIDDEN_SIZE
-        output_size = len(label_encoder.classes_)
-        dropout_prob = DROPOUT_PROB
-        model = BoWSTModel(
-            input_size_values,
-            input_size_values_embeddings,
-            input_size_headers,
-            hidden_size,
-            output_size,
-            dropout_prob,
-        )
-
-        model.load_state_dict(state_dict)
-
-        model.eval()
-
-        all_preds = []
-        all_confidences = []
-        with torch.no_grad():
-            outputs = model(
-                padded_data_values_tensor,
-                padded_data_values_embeddings_tensor,
-                padded_data_headers_tensor,
-            )
-            probabilities = F.softmax(outputs, dim=1)
-            confidence, predicted = torch.max(probabilities, 1)
-            all_preds.extend(predicted.tolist())
-            all_confidences.extend(confidence.tolist())
-
-        decoded_predictions = label_encoder.inverse_transform(all_preds)
-
-        suggestions = {}
-        for i, category in enumerate(X_headers_st):
-            if all_confidences[i] >= CONFIDENCE_THRESHOLD:
-                prediction = decoded_predictions[i]
-                probability = all_confidences[i]
-            else:
-                prediction = "Not Predictable"
-                probability = 0.0
-            suggestions[category] = {prediction: probability}
-
-        return suggestions
-    except Exception as e:
-        logger.error(f"Error occured in standardize_attr_names: {str(e)}")
-        return {}
-
-
-def attr_standardizer(pep: str, schema: str) -> None:
-    """
-    :param str pep: Path to the PEPhub registry containing the metadata csv file.
-    :param str schema: Schema Type chosen by the user.
-    """
-    if not pep:
-        raise ValueError(
-            "pep argument is missing or empty. Please provide the PEPHub registry path to PEP"
-        )
-    if not schema:
-        raise ValueError(
-            "schema argument is missing or empty. Please mention the schema of choice: ENCODE or FAIRTRACKS."
-        )
-    csv_file = fetch_from_pephub(pep)
-    suggestions = standardize_attr_names(csv_file, schema)
-
-    logger.info(suggestions)
diff --git a/attribute_standardizer/const.py b/attribute_standardizer/const.py
index 7ed657a..54e9b06 100644
--- a/attribute_standardizer/const.py
+++ b/attribute_standardizer/const.py
@@ -1,12 +1,24 @@
+PROJECT_NAME = "bedmess"
+
 REPO_ID = "databio/attribute-standardizer-model6"
-FILENAME_ENCODE = "model_encode.pth"
-FILENAME_FAIRTRACKS = "model_fairtracks.pth"
+MODEL_ENCODE = "model_encode.pth"
+MODEL_FAIRTRACKS = "model_fairtracks.pth"
+MODEL_BEDBASE = "model_bedbase.pth"
 ENCODE_VECTORIZER_FILENAME = "vectorizer_encode.pkl"
 FAIRTRACKS_VECTORIZER_FILENAME = "vectorizer_fairtracks.pkl"
+BEDBASE_VECTORIZER_FILENAME = "vectorizer_bedbase.pkl"
 ENCODE_LABEL_ENCODER_FILENAME = "label_encoder_encode.pkl"
 FAIRTRACKS_LABEL_ENCODER_FILENAME = "label_encoder_fairtracks.pkl"
+BEDBASE_LABEL_ENCODER_FILENAME = "label_encoder_bedbase.pkl"
 SENTENCE_TRANSFORMER_MODEL = "all-MiniLM-L6-v2"
-HIDDEN_SIZE = 256
-DROPOUT_PROB = 0.203
-CONFIDENCE_THRESHOLD = 0.9
+HIDDEN_SIZE = 32
+DROPOUT_PROB = 0.113
+CONFIDENCE_THRESHOLD = 0.70
 EMBEDDING_SIZE = 384
+INPUT_SIZE_BOW_ENCODE = 10459
+INPUT_SIZE_BOW_FAIRTRACKS = 13617
+OUTPUT_SIZE_FAIRTRACKS = 15
+OUTPUT_SIZE_ENCODE = 18
+NUM_CLUSTERS = 3
+INPUT_SIZE_BOW_BEDBASE = 13708
+OUTPUT_SIZE_BEDBASE = 12
diff --git a/attribute_standardizer/model.py b/attribute_standardizer/model.py
index 23b9109..af212bc 100644
--- a/attribute_standardizer/model.py
+++ b/attribute_standardizer/model.py
@@ -29,18 +29,12 @@ def __init__(
         super(BoWSTModel, self).__init__()
         self.fc_values1 = nn.Linear(input_size_values, hidden_size)
         self.dropout_values1 = nn.Dropout(dropout_prob)
-        self.fc_values2 = nn.Linear(hidden_size, hidden_size)
-        self.dropout_values2 = nn.Dropout(dropout_prob)
         self.fc_values_embeddings1 = nn.Linear(
             input_size_values_embeddings, hidden_size
         )
         self.dropout_values_embeddings1 = nn.Dropout(dropout_prob)
-        self.fc_values_embeddings2 = nn.Linear(hidden_size, hidden_size)
-        self.dropout_values_embeddings2 = nn.Dropout(dropout_prob)
         self.fc_headers1 = nn.Linear(input_size_headers, hidden_size)
         self.dropout_headers1 = nn.Dropout(dropout_prob)
-        self.fc_headers2 = nn.Linear(hidden_size, hidden_size)
-        self.dropout_headers2 = nn.Dropout(dropout_prob)
         self.fc_combined1 = nn.Linear(hidden_size * 3, hidden_size)
         self.dropout_combined1 = nn.Dropout(dropout_prob)
         self.fc_combined2 = nn.Linear(hidden_size, output_size)
@@ -61,16 +55,10 @@ def forward(
         """
         x_values = F.relu(self.fc_values1(x_values))
         x_values = self.dropout_values1(x_values)
-        x_values = F.relu(self.fc_values2(x_values))
-        x_values = self.dropout_values2(x_values)
         x_values_embeddings = F.relu(self.fc_values_embeddings1(x_values_embeddings))
         x_values_embeddings = self.dropout_values_embeddings1(x_values_embeddings)
-        x_values_embeddings = F.relu(self.fc_values_embeddings2(x_values_embeddings))
-        x_values_embeddings = self.dropout_values_embeddings2(x_values_embeddings)
         x_headers = F.relu(self.fc_headers1(x_headers))
         x_headers = self.dropout_headers1(x_headers)
-        x_headers = F.relu(self.fc_headers2(x_headers))
-        x_headers = self.dropout_headers2(x_headers)
 
         x_combined = torch.cat((x_values, x_values_embeddings, x_headers), dim=1)
         x_combined = F.relu(self.fc_combined1(x_combined))
diff --git a/attribute_standardizer/utils.py b/attribute_standardizer/utils.py
index c16f025..aff492e 100644
--- a/attribute_standardizer/utils.py
+++ b/attribute_standardizer/utils.py
@@ -1,35 +1,49 @@
-import pandas as pd
+import pickle
+import warnings
+from collections import Counter
+from typing import Any, List, Optional, Tuple, Union
+
 import numpy as np
+import pandas as pd
+import peppy
 import torch
+from huggingface_hub import hf_hub_download
 from pephubclient import PEPHubClient
 from sentence_transformers import SentenceTransformer
-import pickle
-from sklearn.preprocessing import LabelEncoder
+from sklearn.cluster import KMeans
 from sklearn.feature_extraction.text import CountVectorizer
-from collections import Counter
-from huggingface_hub import hf_hub_download
-from typing import Optional, Any, List, Tuple, Union
+from sklearn.preprocessing import LabelEncoder
+
 from .const import (
-    REPO_ID,
-    FILENAME_ENCODE,
-    FILENAME_FAIRTRACKS,
+    BEDBASE_LABEL_ENCODER_FILENAME,
+    BEDBASE_VECTORIZER_FILENAME,
     ENCODE_LABEL_ENCODER_FILENAME,
-    FAIRTRACKS_LABEL_ENCODER_FILENAME,
     ENCODE_VECTORIZER_FILENAME,
+    FAIRTRACKS_LABEL_ENCODER_FILENAME,
     FAIRTRACKS_VECTORIZER_FILENAME,
-    SENTENCE_TRANSFORMER_MODEL,
+    MODEL_BEDBASE,
+    MODEL_ENCODE,
+    MODEL_FAIRTRACKS,
+    NUM_CLUSTERS,
+    REPO_ID,
+)
+
+# TODO : convert to single np array before converting to tensor
+warnings.filterwarnings(
+    "ignore",
+    category=UserWarning,
+    message="Creating a tensor from a list of numpy.ndarrays is extremely slow.",
 )
 
 
-def fetch_from_pephub(pep: str) -> pd.DataFrame:
+def fetch_from_pephub(project: peppy.Project) -> pd.DataFrame:
     """
     Fetches metadata from PEPhub registry.
 
     :param str pep: Path to the PEPhub registry containing the metadata csv file
     :return pd.DataFrame: path to the CSV file on the local system.
     """
-    phc = PEPHubClient()
-    project = phc.load_project(pep)
+
     sample_table = project.sample_table
     csv_file_df = pd.DataFrame(sample_table)
     return csv_file_df
@@ -43,15 +57,17 @@ def load_from_huggingface(schema: str) -> Optional[Any]:
     :return Optional[Any]: Loaded model object
     """
     if schema == "ENCODE":
-        model = hf_hub_download(repo_id=REPO_ID, filename=FILENAME_ENCODE)
+        model = hf_hub_download(repo_id=REPO_ID, filename=MODEL_ENCODE)
     elif schema == "FAIRTRACKS":
-        model = hf_hub_download(repo_id=REPO_ID, filename=FILENAME_FAIRTRACKS)
+        model = hf_hub_download(repo_id=REPO_ID, filename=MODEL_FAIRTRACKS)
+    elif schema == "BEDBASE":
+        model = hf_hub_download(repo_id=REPO_ID, filename=MODEL_BEDBASE)
     return model
 
 
 def data_preprocessing(
     df: pd.DataFrame,
-) -> Tuple[List[List[str]], List[str], List[List[str]]]:
+) -> Tuple[List[List[str]], List[str], List[List[str]], int]:
     """
     Preprocessing the DataFrame by extracting the column values and headers.
 
@@ -60,13 +76,16 @@ def data_preprocessing(
         - Nested list containing the comma separated values in each column for sentence transformer embeddings.
         - List containing the headers of the DataFrame.
         - Nested list containing the comma separated values in each column for Bag of Words encoding.
+        - Number of rows in the metadata csv
     """
 
     X_values_st = [df[column].astype(str).tolist() for column in df.columns]
     X_headers_st = df.columns.tolist()
     X_values_bow = [df[column].astype(str).tolist() for column in df.columns]
 
-    return X_values_st, X_headers_st, X_values_bow
+    num_rows = df.shape[0]
+
+    return X_values_st, X_headers_st, X_values_bow, num_rows
 
 
 def get_top_k_average(val_embedding: List[np.ndarray], k: int) -> np.ndarray:
@@ -89,7 +108,50 @@ def get_top_k_average(val_embedding: List[np.ndarray], k: int) -> np.ndarray:
     return column_embedding_mean.numpy()
 
 
+def get_top_cluster_averaged(embeddings: List[np.ndarray]) -> np.ndarray:
+    """
+    Calculates the average of the largest embedding cluster.
+
+    :param list embeddings: List of embeddings, each embedding is a vector of values.
+    :return np.ndarray: The mean of the largest cluster as a NumPy array.
+    """
+    flattened_embeddings = [embedding.tolist() for embedding in embeddings]
+    kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=0).fit(flattened_embeddings)
+    labels_kmeans = kmeans.labels_
+    cluster_counts = Counter(labels_kmeans)
+    most_common_cluster = max(cluster_counts, key=cluster_counts.get)
+    most_common_indices = [
+        idx for idx, label in enumerate(labels_kmeans) if label == most_common_cluster
+    ]
+    most_common_embeddings = [
+        torch.tensor(embeddings[idx]) for idx in most_common_indices
+    ]
+
+    if most_common_embeddings:
+        top_k_average = torch.mean(
+            torch.stack(most_common_embeddings), dim=0
+        ).unsqueeze(0)
+    else:
+        top_k_average = torch.zeros_like(most_common_embeddings[0]).unsqueeze(0)
+
+    return top_k_average.numpy()
+
+
+def get_averaged(embeddings: List[np.ndarray]) -> np.ndarray:
+    """
+    Averages the embeddings.
+    :param list embeddings: List of embeddings, each embedding is a vector of values.
+    :return np.ndarray: The mean of all the embeddings as a NumPy array.
+    """
+    flattened_embeddings = [embedding.tolist() for embedding in embeddings]
+    flattened_embeddings_array = np.array(flattened_embeddings)
+    averaged_embedding = np.mean(flattened_embeddings_array, axis=0)
+
+    return averaged_embedding
+
+
 def data_encoding(
+    num_rows: int,
     X_values_st: List[List[str]],
     X_headers_st: List[str],
     X_values_bow: List[List[str]],
@@ -99,6 +161,7 @@ def data_encoding(
     """
     Encode input data in accordance with the user-specified schemas.
 
+    :param int num_rows: Number of rows in the sample metadata
     :param list X_values_st: Nested list containing the comma separated values in each column for sentence transformer embeddings.
     :param list X_headers_st: List containing the headers of the DataFrame.
     :param list X_values_bow: Nested list containing the comma separated values in each column for Bag of Words encoding.
@@ -114,7 +177,11 @@ def data_encoding(
     embeddings = []
     for column in X_values_st:
         val_embedding = sentence_encoder.encode(column, show_progress_bar=False)
-        embedding = get_top_k_average(val_embedding, k=3)
+        if num_rows >= 10:
+            embedding = get_top_cluster_averaged(val_embedding)
+        else:
+            embedding = get_averaged(val_embedding)
+
         embeddings.append(embedding)
     X_values_embeddings = embeddings
     if schema == "ENCODE":
@@ -167,11 +234,36 @@ def data_encoding(
         with open(lb_path, "rb") as f:
             label_encoder = pickle.load(f)
 
+    elif schema == "BEDBASE":
+        vectorizer = CountVectorizer()
+        vc_path = hf_hub_download(repo_id=REPO_ID, filename=BEDBASE_VECTORIZER_FILENAME)
+        with open(vc_path, "rb") as f:
+            vectorizer = pickle.load(f)
+        transformed_columns = []
+        for column in X_values_bow:
+            column_text = " ".join(column)
+            transformed_column = vectorizer.transform([column_text])
+            transformed_columns.append(transformed_column.toarray()[0])
+        transformed_columns = np.array(transformed_columns)
+        # print(transformed_columns)
+        X_values_bow = transformed_columns
+        # Label Encoding
+        label_encoder = LabelEncoder()
+        lb_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename=BEDBASE_LABEL_ENCODER_FILENAME,
+        )
+        with open(lb_path, "rb") as f:
+            label_encoder = pickle.load(f)
+
     X_headers_embeddings_tensor = torch.tensor(
         X_headers_embeddings, dtype=torch.float32
     )
     X_values_embeddings_tensor = torch.tensor(X_values_embeddings, dtype=torch.float32)
     X_values_bow_tensor = torch.tensor(X_values_bow, dtype=torch.float32)
+    X_values_embeddings_tensor = X_values_embeddings_tensor.squeeze(
+        1
+    )  # brings the shape to [num_cols, vocab]
 
     return (
         X_headers_embeddings_tensor,
@@ -179,3 +271,21 @@ def data_encoding(
         X_values_bow_tensor,
         label_encoder,
     )
+
+
+def get_any_pep(pep: str) -> peppy.Project:
+    """
+    Get the PEP file from the local system or from PEPhub.
+
+    :param pep: Path to the PEP file or PEPhub registry path.
+
+    :return: peppy.Project object.
+    """
+
+    PEP_FILE_TYPES = ["yaml", "csv"]
+
+    res = list(filter(pep.endswith, PEP_FILE_TYPES)) != []
+    if res:
+        return peppy.Project(pep)
+    else:
+        return peppy.Project.from_pephub(pep)
diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index 6642681..848e7e8 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -2,5 +2,5 @@ pandas
 numpy
 torch
 sentence-transformers
-pephubclient
-
+pephubclient>=0.4.2
+peppy>=0.40.5
diff --git a/scripts/model1.py b/scripts/model1.py
index bef41fb..0118add 100644
--- a/scripts/model1.py
+++ b/scripts/model1.py
@@ -29,7 +29,8 @@
 
 
 class NN1(nn.Module):
-    """ Simple Neural Network with a single Hidden Layer."""
+    """Simple Neural Network with a single Hidden Layer."""
+
     def __init__(self, input_size, hidden_size, output_size):
         """
         Initializes the NN1 model.
@@ -45,7 +46,7 @@ def __init__(self, input_size, hidden_size, output_size):
 
     def forward(self, x):
         """
-        Defines the forward pass of the neural network. 
+        Defines the forward pass of the neural network.
 
         :param torch.Tensor x: Input tensor.
         :return torch.Tensor: Output tensor after passing through the network.
@@ -86,14 +87,14 @@ def data_split(df_values):
         df_values_temp, test_size=0.5, random_state=42
     )
 
-    #Snippet for testing on unseen data 
+    # Snippet for testing on unseen data
     """
     df_values_test = pd.read_csv(
         "/home/saanika/curation/scripts/bedmess_archive/data/encode_metadata_values_moderate.csv",
         sep=",",
     )
     """
-    #Comment out the above for training on seen data. 
+    # Comment out the above for training on seen data.
 
     X_values_train = [
         df_values_train[column].astype(str).tolist()
@@ -135,9 +136,9 @@ def data_split(df_values):
 
 def encoding(X_values_train, X_values_test, X_values_val, y_train, y_test, y_val):
     """
-    Encodes the values for the model. 
+    Encodes the values for the model.
 
-    :param list X_values_train: Training features. 
+    :param list X_values_train: Training features.
     :param list X_values_test: Testing features.
     :param list X_values_val: Validation features.
     :param list y_train: Training labels.
diff --git a/trial.py b/trial.py
index 160ae30..1df22e1 100644
--- a/trial.py
+++ b/trial.py
@@ -1,3 +1,12 @@
-from attribute_standardizer.attribute_standardizer import attr_standardizer
+from attribute_standardizer.attr_standardizer import AttrStandardizer
 
-attr_standardizer(pep="geo/gse178283:default", schema="ENCODE")
+model = AttrStandardizer("ENCODE")
+
+schemas = model.get_available_schemas()
+
+print(schemas)
+
+# results = model.standardize(pep="geo/gse178283:default")
+results = model.standardize(pep="geo/gse228634:default")
+
+print(results)