Spaces:

BerserkerMother
/

Aftrhour_demo

Runtime error

App Files Files Community

Kave Bahraman commited on Aug 6, 2023

Commit

9896b0f

•

2 Parent(s): 5f923af a99b495

Merge pull request #3 from BerserkerMother/dev

Browse files

Files changed (21) hide show

.gitignore +8 -0
.pylintrc +2 -1
MLproject +6 -0
Makefile +4 -0
elise/data_generation/data_generation_prompts.txt +21 -0
elise/data_generation/prompt_generation.txt +27 -0
elise/src/app.py +3 -1
elise/src/configs/__init__.py +4 -0
elise/src/configs/logging_config.yaml +3 -1
elise/src/configs/train_t5.py +20 -0
elise/src/data/__init__.py +5 -0
elise/src/data/mit_seq2seq_dataset.py +125 -0
elise/src/excutors/__init__.py +0 -0
elise/src/models/__init__.py +0 -0
elise/src/notebooks/flant_t5_playground.ipynb +72 -0
elise/src/notebooks/play.ipynb +443 -0
elise/src/notebooks/playground_prompt/version1.py +22 -0
elise/src/notebooks/t5 funetinung.ipynb +449 -0
elise/src/train_t5_seq2seq.py +192 -0
elise/src/utils/logger.py +3 -1
requirements.txt +2 -0

.gitignore CHANGED Viewed

@@ -3,6 +3,14 @@ __pycache__/
 *.py[cod]
 *$py.class
 # IDEs file
 .idea

 *.py[cod]
 *$py.class
+# Project files
+mlruns/
+mlartifacts/
+experiment_models/
+# mlflow databases
+mlflow.db
 # IDEs file
 .idea

.pylintrc CHANGED Viewed

@@ -428,7 +428,8 @@ disable=raw-checker-failed,
         suppressed-message,
         useless-suppression,
         deprecated-pragma,
-        use-symbolic-message-instead
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

         suppressed-message,
         useless-suppression,
         deprecated-pragma,
+        use-symbolic-message-instead,
+	R0902
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

MLproject ADDED Viewed

	@@ -0,0 +1,6 @@

+name: Afterhours Elise Model pipeline
+entry_points:
+    train_t5:
+        command: "python elise/src/train_t5_seq2seq.py"

Makefile CHANGED Viewed

@@ -1,10 +1,14 @@
 setup:
 	pip install -r requirements.txt
 format:
 	black elise/src/
 lint:
 	pylint elise/src/
 gradio:
 	python elise/src/app.py
 dev:
 	make format lint gradio

 setup:
 	pip install -r requirements.txt
+test:
+	pytest
 format:
 	black elise/src/
 lint:
 	pylint elise/src/
 gradio:
 	python elise/src/app.py
+train_t5:
+	python elise/src/train_t5_seq2seq.py
 dev:
 	make format lint gradio

elise/data_generation/data_generation_prompts.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+Image you are assisting me generating data for training a T5 language model. Each record contains a user prompts where the user describes a place they want to dine, and user intentions and intention category which is label for training model. the labels are user intentions.
+Intentions categories are:
+- Cuisine
+- Location
+- Price
+- Atmosphere
+- Service
+- Reviews
+- Accessibility
+- Amenity & Special features
+- Offerings
+- Recommendations
+- Crowd
+- Payment
+- Category
+Here is one example:
+Prompt: I have a gluten allergy and need to find a restaurant with gluten-free options. Do you know any good ones in this area?
+Label: { "Location": "in this area", "Dietary restrictions": "gluten-free" }
+Write 5 random records in json format containing user's prompts and user's intentions.

elise/data_generation/prompt_generation.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+Your task is to parse an unstructured job posting and turn it into a JSON containing the most important information. The job posting can describe one or more jobs at the same company. The JSON should consist of the following information:
+ - The company name (field name: "companyName", field type: string)
+ - the location of the company (field name: "companyLocation", field type: string); if not explictily stated, you can try to infer the company's actual location from other clues, e.g., something like "Remote (US)" usually means that the company is located in the US; if the location cannot be inferred, set it to null
+ - a short description of what the company is doing or building (field name: "companyDescription", field type: string); try to keep it short (max length: ca. 300 characters)
+ - a list of advertised jobs (field name: "jobs", field type: array).
+Each element of the "jobs" array should contain the following fields:
+ - The job title (field name: "jobTitle", field type: string); the job title should be given in the singular form (i.e., Frontend Developer instead of Frontend Developers)
+ - the salary range (field name: "salary", field type: string); only include explictly stated salary amounts, otherwise set to null
+ - whether equity is part of the compensation (field name: "equity", field type: boolean)
+ - the benefits (field name: "benefits", field type: string); include things like 401k, insurance, equipment, child care, etc. if stated, otherwise set to null
+ - the location of the job (field name: "location", field type: string)
+ - whether this is a job for senior/experienced candidates (field name: "senior", field type: boolean); typically senior, staff, lead, principal, vp, cto, etc. positions are all regarded as senior level
+ - whether it is a remote opportunity (field name: "remote", field type: boolean)
+ - whether it can be done onsite from an office (field name: "onsite", field type: boolean)
+ - whether it can be done part-time (field name: "partTime", field type: boolean)
+ - whether it can be done full-time (field name: "fullTime", field type: boolean)
+ - the URL to the specific job description (field name: "jobUrl", field type: string)
+ - and any specific requirements/skills that might be stated (field name: "requirements", field type: string).
+In general, if certain information is not stated, set the respective field to null. If the company seeks more than one person for the same role, include the role only once.
+This is the job posting:
+%s
+The structured JSON representation is:
+```json
+{"companyName":

elise/src/app.py CHANGED Viewed

@@ -10,7 +10,9 @@ from utils import df_to_json
 # prep models
-MODEL_CHECKPOINT = "BerserkerMother/restaurant_ner"
 parser = SentenceParser.from_huggingface(MODEL_CHECKPOINT)
 embedder = SentenceTransformer("all-MiniLM-L6-v2")

 # prep models
+MODEL_CHECKPOINT = (
+    "tner/roberta-large-mit-restaurant"  # "BerserkerMother/restaurant_ner"
+)
 parser = SentenceParser.from_huggingface(MODEL_CHECKPOINT)
 embedder = SentenceTransformer("all-MiniLM-L6-v2")

elise/src/configs/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+"""
+All configs for ML project
+"""
+from .train_t5 import T5TrainingConfig

elise/src/configs/logging_config.yaml CHANGED Viewed

@@ -1,12 +1,14 @@
 version: 1
 formatters:
   simple:
     format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 handlers:
   console:
     class: logging.StreamHandler
     formatter: simple
     stream: ext://sys.stdout
-Root:
   Level: DEBUG
   handlers: [console]

 version: 1
+disable_existing_loggers: False
 formatters:
   simple:
     format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 handlers:
   console:
+    level: DEBUG
     class: logging.StreamHandler
     formatter: simple
     stream: ext://sys.stdout
+root:
   Level: DEBUG
   handlers: [console]

elise/src/configs/train_t5.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""
+Training config for T5 Seq2Seq training
+"""
+from dataclasses import dataclass
+@dataclass
+class T5TrainingConfig:
+    """Training configs for T5 finetuing"""
+    train_batch_size: int = 32
+    eval_batch_size: int = 32
+    epochs: int = 10
+    max_length: int = 512
+    learning_rate: float = 3e-4
+    num_warmup_steps: int = 200
+    mixed_precision: str = "bf16"
+    gradient_accumulation_steps: int = 4
+    output_dir: str = "FlanT5_MIT_ner"

elise/src/data/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""
+Contians datasets and their connectors for model training
+"""
+from .mit_seq2seq_dataset import MITRestaurants, get_default_transforms

elise/src/data/mit_seq2seq_dataset.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+seq2seq models datasets
+Classes:
+    MITRestaurants: tner/mit_restaurant dataset to seq2seq
+Functions:
+    get_default_transforms: default transforms for mit dataset
+"""
+import datasets
+class MITRestaurants:
+    """
+    tner/mit_restaurants for seq2seq
+    Atrributes
+    ----------
+    ner_tags: ner tags and ids of mit restaurant
+    dataset: hf dataset
+    transforms: transforms to apply
+    """
+    ner_tags = {
+        "O": 0,
+        "B-Rating": 1,
+        "I-Rating": 2,
+        "B-Amenity": 3,
+        "I-Amenity": 4,
+        "B-Location": 5,
+        "I-Location": 6,
+        "B-Restaurant_Name": 7,
+        "I-Restaurant_Name": 8,
+        "B-Price": 9,
+        "B-Hours": 10,
+        "I-Hours": 11,
+        "B-Dish": 12,
+        "I-Dish": 13,
+        "B-Cuisine": 14,
+        "I-Price": 15,
+        "I-Cuisine": 16,
+    }
+    def __init__(self, dataset: datasets.DatasetDict, transforms=None):
+        """
+        Constructs mit datasets
+        Parameters:
+            dataset: huggingface mit dataset
+            transforms: dataset transform functions
+        """
+        self.dataset = dataset
+        self.transforms = transforms
+    def hf_training(self):
+        """
+        Returns dataset for huggingface training ecosystem
+        """
+        dataset_ = self.dataset
+        if self.transforms:
+            for transfrom in self.transforms:
+                dataset_ = dataset_.map(transfrom)
+        return dataset_
+    def set_transforms(self, transforms):
+        """
+        Set tranfroms fn
+        Parameters:
+            transforms: transforms functions
+        """
+        if self.transforms:
+            self.transforms += transforms
+        else:
+            self.transforms = transforms
+        return self
+    @classmethod
+    def from_hf(cls, hf_path: str):
+        """
+        Constructs dataset from huggingface
+        Parameters:
+            hf_path: path to dataset hf repo
+        """
+        return cls(datasets.load_dataset(hf_path))
+def get_default_transforms():
+    """
+    Default transformation to convert ner dataset to seq2seq
+    """
+    label_names = {v: k for k, v in MITRestaurants.ner_tags.items()}
+    def decode_tags(tags, words):
+        dict_out = {}
+        word_ = ""
+        for tag, word in zip(tags[::-1], words[::-1]):
+            if tag == 0:
+                continue
+            word_ = word + " " + word_
+            if label_names[tag].startswith("B"):
+                tag_name = label_names[tag][2:]
+                word_ = word_.strip()
+                if tag_name not in dict_out:
+                    dict_out[tag_name] = [word_]
+                else:
+                    dict_out[tag_name].append(word_)
+                word_ = ""
+        return dict_out
+    def format_to_text(decoded):
+        text = ""
+        for key, value in decoded.items():
+            text += f"{key}: {', '.join(value)}\n"
+        return text
+    def generate_seq2seq_data(example):
+        decoded = decode_tags(example["tags"], example["tokens"])
+        return {
+            "tokens": " ".join(example["tokens"]),
+            "labels": format_to_text(decoded),
+        }
+    return [generate_seq2seq_data]

elise/src/excutors/__init__.py ADDED Viewed

File without changes

elise/src/models/__init__.py ADDED Viewed

File without changes

elise/src/notebooks/flant_t5_playground.ipynb ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import transformers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipe = transformers.pipeline(\n",
+    "    \"text2text-generation\", model=\"/home/kave/work/Elise/output_dir/\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'generated_text': ''}]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipe(\"What are you?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

elise/src/notebooks/play.ipynb ADDED Viewed

	@@ -0,0 +1,443 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pprint import pprint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from playground_prompt.version1 import prompts\n",
+    "\n",
+    "test_p = prompts[\"test_prompts\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "\n",
+    "sys.path.append(\"/home/kave/work/Elise/elise/src\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using /home/kave/.cache/torch_extensions/py310_cu117 as PyTorch extensions root...\n",
+      "Detected CUDA files, patching ldflags\n",
+      "Emitting ninja build file /home/kave/.cache/torch_extensions/py310_cu117/cuda_kernel/build.ninja...\n",
+      "Building extension module cuda_kernel...\n",
+      "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
+      "Loading extension module cuda_kernel...\n",
+      "Failed to load CUDA kernels. Mra requires custom CUDA kernels. Please verify that compatible versions of PyTorch and CUDA Toolkit are installed: /home/kave/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/zmq/backend/cython/../../../../.././libstdc++.so.6: version `GLIBCXX_3.4.32' not found (required by /home/kave/.cache/torch_extensions/py310_cu117/cuda_kernel/cuda_kernel.so)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ninja: no work to do.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from parser import SentenceParser\n",
+    "\n",
+    "parser = SentenceParser.from_huggingface(\"BerserkerMother/restaurant_ner\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'Cuisine': ['fast -']},\n",
+      " {'Amenity': ['lively']},\n",
+      " {'Amenity': ['nice', 'fun'],\n",
+      "  'Hours': ['dinner'],\n",
+      "  'Rating': ['good', 'good'],\n",
+      "  'Services': ['ambian']},\n",
+      " {'Cuisine': ['authentic indian']},\n",
+      " {'Amenity': ['romantic', 'nice']},\n",
+      " {},\n",
+      " {},\n",
+      " {'Amenity': ['live music']},\n",
+      " {'DS': ['gluten allergy', 'gluten - free']},\n",
+      " {'Amenity': ['cozy']}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "ners = parser.get_ner(test_p)\n",
+    "pprint(ners)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'Semantic': ['fast -']},\n",
+      " {'Semantic': ['lively']},\n",
+      " {'Hours': ['dinner'], 'Semantic': ['nice', 'fun']},\n",
+      " {'Semantic': ['authentic indian']},\n",
+      " {'Semantic': ['romantic', 'nice']},\n",
+      " {},\n",
+      " {},\n",
+      " {'Semantic': ['live music']},\n",
+      " {},\n",
+      " {'Semantic': ['cozy']}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "parsed_prompts = parser.parse(ners)\n",
+    "pprint(parsed_prompts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sentence_transformers import SentenceTransformer\n",
+    "\n",
+    "embedder = SentenceTransformer(\"all-MiniLM-L6-v2\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                                         Accessibility  \\\n",
+      "0    Wheelchair accessible entrance, Wheelchair acc...   \n",
+      "1    Wheelchair accessible entrance, Wheelchair acc...   \n",
+      "2    Wheelchair accessible entrance, Wheelchair acc...   \n",
+      "3    Wheelchair accessible entrance, Wheelchair acc...   \n",
+      "4    Wheelchair accessible entrance, Wheelchair acc...   \n",
+      "..                                                 ...   \n",
+      "274  Wheelchair accessible entrance, Wheelchair acc...   \n",
+      "275  Wheelchair accessible elevator, Wheelchair acc...   \n",
+      "276  Wheelchair accessible entrance, Wheelchair acc...   \n",
+      "277  Wheelchair accessible seating, Wheelchair acce...   \n",
+      "278  Wheelchair accessible parking lot, Wheelchair ...   \n",
+      "\n",
+      "                         Amenities & Special featrures  \\\n",
+      "0    Bar onsite, Good for kids, High chairs, Restro...   \n",
+      "1                 Good for kids, High chairs, Restroom   \n",
+      "2    Bar onsite, Dogs allowed, Good for kids, High ...   \n",
+      "3                                             Restroom   \n",
+      "4                                             Restroom   \n",
+      "..                                                 ...   \n",
+      "274  Bar onsite, Good for kids, High chairs, Restro...   \n",
+      "275           Bar onsite, High chairs, Restroom, Wi-Fi   \n",
+      "276  Bar onsite, Dogs allowed, Good for kids, High ...   \n",
+      "277                  Bar onsite, High chairs, Restroom   \n",
+      "278                  Bar onsite, High chairs, Restroom   \n",
+      "\n",
+      "                 Atmosphere  \\\n",
+      "0              Casual, Cozy   \n",
+      "1              Casual, Cozy   \n",
+      "2              Casual, Cozy   \n",
+      "3              Casual, Cozy   \n",
+      "4    Casual, Cozy, Romantic   \n",
+      "..                      ...   \n",
+      "274            Casual, Cozy   \n",
+      "275            Casual, Cozy   \n",
+      "276  Casual, Cozy, Romantic   \n",
+      "277            Casual, Cozy   \n",
+      "278            Casual, Cozy   \n",
+      "\n",
+      "                                                 Crowd  \\\n",
+      "0    Family friendly, Groups, LGBTQ+ friendly, Tran...   \n",
+      "1                              Family friendly, Groups   \n",
+      "2                              Family friendly, Groups   \n",
+      "3                                                  NaN   \n",
+      "4                                               Groups   \n",
+      "..                                                 ...   \n",
+      "274           Family friendly, Groups, LGBTQ+ friendly   \n",
+      "275                                             Groups   \n",
+      "276     Groups, LGBTQ+ friendly, Transgender safespace   \n",
+      "277                                             Groups   \n",
+      "278                                             Groups   \n",
+      "\n",
+      "                                        Dining options  \\\n",
+      "0                      Lunch, Dinner, Dessert, Seating   \n",
+      "1                      Lunch, Dinner, Dessert, Seating   \n",
+      "2            Lunch, Dinner, Catering, Dessert, Seating   \n",
+      "3                   Breakfast, Lunch, Dessert, Seating   \n",
+      "4                             Dinner, Dessert, Seating   \n",
+      "..                                                 ...   \n",
+      "274          Dinner, Counter service, Dessert, Seating   \n",
+      "275  Breakfast, Brunch, Lunch, Dinner, Dessert, Sea...   \n",
+      "276  Breakfast, Brunch, Lunch, Dinner, Dessert, Sea...   \n",
+      "277            Brunch, Lunch, Dinner, Dessert, Seating   \n",
+      "278            Brunch, Lunch, Dinner, Dessert, Seating   \n",
+      "\n",
+      "                                             Offerings  \\\n",
+      "0    Alcohol, All you can eat, Beer, Cocktails, Cof...   \n",
+      "1    Alcohol, All you can eat, Beer, Coffee, Halal,...   \n",
+      "2    Alcohol, Beer, Coffee, Halal, Healthy options,...   \n",
+      "3                                               Coffee   \n",
+      "4    Alcohol, Beer, Cocktails, Coffee, Hard liquor,...   \n",
+      "..                                                 ...   \n",
+      "274  Alcohol, Beer, Coffee, Healthy options, Kids' ...   \n",
+      "275  Alcohol, Beer, Cocktails, Coffee, Hard liquor,...   \n",
+      "276  Alcohol, Beer, Cocktails, Coffee, Hard liquor,...   \n",
+      "277  Alcohol, Beer, Cocktails, Coffee, Hard liquor,...   \n",
+      "278  Alcohol, Beer, Cocktails, Coffee, Hard liquor,...   \n",
+      "\n",
+      "                                            Payment              Planning  \\\n",
+      "0    Debit cards, NFC mobile payments, Credit cards  Accepts reservations   \n",
+      "1       Cash-only, Debit cards, NFC mobile payments  Accepts reservations   \n",
+      "2    Debit cards, NFC mobile payments, Credit cards  Accepts reservations   \n",
+      "3                                       Debit cards                   NaN   \n",
+      "4                                               NaN  Accepts reservations   \n",
+      "..                                              ...                   ...   \n",
+      "274  Debit cards, NFC mobile payments, Credit cards                   NaN   \n",
+      "275  Debit cards, NFC mobile payments, Credit cards  Accepts reservations   \n",
+      "276  Debit cards, NFC mobile payments, Credit cards  Accepts reservations   \n",
+      "277  Debit cards, NFC mobile payments, Credit cards  Accepts reservations   \n",
+      "278                Debit cards, NFC mobile payments  Accepts reservations   \n",
+      "\n",
+      "                                               Service  \\\n",
+      "0          Outdoor seating, Delivery, Takeout, Dine-in   \n",
+      "1                             Outdoor seating, Dine-in   \n",
+      "2    Outdoor seating, Curbside pickup, No-contact d...   \n",
+      "3                    Outdoor seating, Takeout, Dine-in   \n",
+      "4          Outdoor seating, Delivery, Takeout, Dine-in   \n",
+      "..                                                 ...   \n",
+      "274        Outdoor seating, Takeout, Dine-in, Delivery   \n",
+      "275        Outdoor seating, Dine-in, Delivery, Takeout   \n",
+      "276  Outdoor seating, Curbside pickup, Takeout, Din...   \n",
+      "277        Outdoor seating, Takeout, Dine-in, Delivery   \n",
+      "278        Outdoor seating, Dine-in, Delivery, Takeout   \n",
+      "\n",
+      "                                            categories  \\\n",
+      "0                                                  NaN   \n",
+      "1                           Korean barbecue restaurant   \n",
+      "2    Indian restaurant, Asian restaurant, Health fo...   \n",
+      "3    Cafe, Breakfast restaurant, Brunch restaurant,...   \n",
+      "4                                      Thai restaurant   \n",
+      "..                                                 ...   \n",
+      "274                                                NaN   \n",
+      "275  , Cocktail bar, Coffee shop, Coworking space, ...   \n",
+      "276                                                NaN   \n",
+      "277                                             , Cafe   \n",
+      "278  , Culinary school, Event venue, Function room ...   \n",
+      "\n",
+      "                       Category  \\\n",
+      "0                           NaN   \n",
+      "1    Korean barbecue restaurant   \n",
+      "2             Indian restaurant   \n",
+      "3                          Cafe   \n",
+      "4               Thai restaurant   \n",
+      "..                          ...   \n",
+      "274                         NaN   \n",
+      "275                         NaN   \n",
+      "276                         NaN   \n",
+      "277                         NaN   \n",
+      "278                         NaN   \n",
+      "\n",
+      "                                           description  \n",
+      "0                                                  NaN  \n",
+      "1                                                  NaN  \n",
+      "2    Light-filled restaurant with colorful seating ...  \n",
+      "3    Seattle-based coffeehouse chain known for its ...  \n",
+      "4                                                  NaN  \n",
+      "..                                                 ...  \n",
+      "274                                                NaN  \n",
+      "275                                                NaN  \n",
+      "276                                                NaN  \n",
+      "277                                                NaN  \n",
+      "278                                                NaN  \n",
+      "\n",
+      "[279 rows x 12 columns]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from matcher import Matcher\n",
+    "\n",
+    "matcher = Matcher.from_path(\"/home/kave/work/Elise/elise/data/final_data.csv\", embedder)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 0.0409, -0.0148,  0.0419,  ...,  0.0347, -0.0333,  0.0381],\n",
+       "        [ 0.0090,  0.0329,  0.0421,  ...,  0.0387, -0.0885,  0.0523],\n",
+       "        [ 0.0155, -0.0316,  0.0217,  ...,  0.0344, -0.0364,  0.0223],\n",
+       "        ...,\n",
+       "        [ 0.0363, -0.0182,  0.0596,  ...,  0.0276, -0.0117,  0.0335],\n",
+       "        [ 0.0415, -0.0292,  0.0533,  ...,  0.0515, -0.0225,  0.0368],\n",
+       "        [ 0.0540, -0.0334,  0.0415,  ...,  0.0756, -0.0274,  0.0454]],\n",
+       "       device='cuda:0')"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "matcher.semantics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 331 ms, sys: 0 ns, total: 331 ms\n",
+      "Wall time: 91.5 ms\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "ners = parser.get_ner(test_p)\n",
+    "parsed_prompts = parser.parse(ners)\n",
+    "kk = matcher.handle_jobs(parsed_prompts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from utils import df_to_json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{\"I don't feel like cooking tonight. Where's a good place to get fast-food?\": [{'Name': 'Cafetaria Edison',\n",
+       "   'Score': 20.519733428955078},\n",
+       "  {'Name': 'Five Guys', 'Score': 20.5447940826416},\n",
+       "  {'Name': 'Five Guys', 'Score': 20.5447940826416}],\n",
+       " \"I'm planning a dinner with some friends. Any recommendations for a restaurant with a lively atmosphere?\": [{'Name': 'De Garde',\n",
+       "   'Score': 25.97490882873535},\n",
+       "  {'Name': 'Hemel & Aarde', 'Score': 26.467164993286133},\n",
+       "  {'Name': 'The Thai Orchid', 'Score': 36.23405838012695}],\n",
+       " \"I want to celebrate my graduation with a nice dinner out. What's a good restaurant with good food and a fun ambiance?\": [{'Name': 'Luc Utrecht',\n",
+       "   'Score': 20.497264862060547},\n",
+       "  {'Name': 'Spice Monkey', 'Score': 20.64722442626953},\n",
+       "  {'Name': 'Ethiopian  Sunshine', 'Score': 20.84449005126953}],\n",
+       " \"I want to try some new cuisines I've never had before. Can you recommend a restaurant with authentic Indian food?\": [{'Name': 'India Port',\n",
+       "   'Score': 35.02317428588867},\n",
+       "  {'Name': 'Kashmir Kitchen Utrecht', 'Score': 35.351844787597656},\n",
+       "  {'Name': 'Surya Utrecht | Indiaas & Nepalees restaurant & bar',\n",
+       "   'Score': 40.53645706176758}],\n",
+       " \"I'm planning a special date night and want to go somewhere romantic. What's a good restaurant with a nice view?\": [{'Name': 'Sevilla',\n",
+       "   'Score': 20.176685333251953},\n",
+       "  {'Name': 'Pand 33 Utrecht', 'Score': 20.325525283813477},\n",
+       "  {'Name': 'Hemel & Aarde', 'Score': 23.112428665161133}],\n",
+       " \"I'm meeting a client for lunch. Can you recommend a good restaurant for a business meeting?\": [{'Name': 'Sevilla',\n",
+       "   'Score': 20.176685333251953},\n",
+       "  {'Name': 'Pand 33 Utrecht', 'Score': 20.325525283813477},\n",
+       "  {'Name': 'Hemel & Aarde', 'Score': 23.112428665161133}],\n",
+       " \"I'm traveling through this city and need to find a good place to eat. Any recommendations near the airport?\": [{'Name': 'Sevilla',\n",
+       "   'Score': 20.176685333251953},\n",
+       "  {'Name': 'Pand 33 Utrecht', 'Score': 20.325525283813477},\n",
+       "  {'Name': 'Hemel & Aarde', 'Score': 23.112428665161133}],\n",
+       " \"I'm looking for a restaurant with live music or other entertainment. Any suggestions?\": [{'Name': 'Silk Road Utrecht',\n",
+       "   'Score': 19.077302932739258},\n",
+       "  {'Name': 'Hemel & Aarde', 'Score': 20.864463806152344},\n",
+       "  {'Name': 'The Thai Orchid', 'Score': 26.012712478637695}],\n",
+       " 'I have a gluten allergy and need to find a restaurant with gluten-free options. Do you know any good ones in this area?': [{'Name': 'Silk Road Utrecht',\n",
+       "   'Score': 19.077302932739258},\n",
+       "  {'Name': 'Hemel & Aarde', 'Score': 20.864463806152344},\n",
+       "  {'Name': 'The Thai Orchid', 'Score': 26.012712478637695}],\n",
+       " \"I just want to relax and have a nice meal out. What's a good restaurant with a cozy atmosphere?\": [{'Name': 'De Garde',\n",
+       "   'Score': 34.23313903808594},\n",
+       "  {'Name': 'Asia Street Cooking', 'Score': 34.71759796142578},\n",
+       "  {'Name': 'Hemel & Aarde', 'Score': 37.8213005065918}]}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_to_json(kk, test_p)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "afterhours_dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

elise/src/notebooks/playground_prompt/version1.py ADDED Viewed

	@@ -0,0 +1,22 @@

+prompts = {
+    "instruction": """Identify user needs from the prompts.""",
+    "examples": """
+Prompt: I want to celebrate my graduation with my friend. Recommend me somewhere nice with live music near the Harvard campus.
+Identification:
+occasion: celebrating graduation
+features: somewhere nice, live music
+location: near Harvard campus
+""",
+    "test_prompts": [
+        "I don't feel like cooking tonight. Where's a good place to get fast-food?",
+        "I'm planning a dinner with some friends. Any recommendations for a restaurant with a lively atmosphere?",
+        "I want to celebrate my graduation with a nice dinner out. What's a good restaurant with good food and a fun ambiance?",
+        "I want to try some new cuisines I've never had before. Can you recommend a restaurant with authentic Indian food?",
+        "I'm planning a special date night and want to go somewhere romantic. What's a good restaurant with a nice view?",
+        "I'm meeting a client for lunch. Can you recommend a good restaurant for a business meeting?",
+        "I'm traveling through this city and need to find a good place to eat. Any recommendations near the airport?",
+        "I'm looking for a restaurant with live music or other entertainment. Any suggestions?",
+        "I have a gluten allergy and need to find a restaurant with gluten-free options. Do you know any good ones in this area?",
+        "I just want to relax and have a nice meal out. What's a good restaurant with a cozy atmosphere?",
+    ],
+}

elise/src/notebooks/t5 funetinung.ipynb ADDED Viewed

	@@ -0,0 +1,449 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9510dd98",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
+    "from datasets import load_dataset\n",
+    "from transformers import get_scheduler\n",
+    "import torch\n",
+    "from torch.utils.data import DataLoader\n",
+    "from datasets import load_dataset\n",
+    "from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
+    "from transformers import DataCollatorForSeq2Seq\n",
+    "from accelerate import Accelerator\n",
+    "import evaluate\n",
+    "import datasets\n",
+    "\n",
+    "from tqdm.auto import tqdm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "f1da6c6c",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/kave/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
+      "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
+      "- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n",
+      "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
+      "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(\"t5-base\")\n",
+    "model = AutoModelForSeq2SeqLM.from_pretrained(\"t5-base\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a6de1719",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# prep dataset\n",
+    "dataset = load_dataset(\"tner/mit_restaurant\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8617d7d6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ner_tags = {\n",
+    "    \"O\": 0,\n",
+    "    \"B-Rating\": 1,\n",
+    "    \"I-Rating\": 2,\n",
+    "    \"B-Amenity\": 3,\n",
+    "    \"I-Amenity\": 4,\n",
+    "    \"B-Location\": 5,\n",
+    "    \"I-Location\": 6,\n",
+    "    \"B-Restaurant_Name\": 7,\n",
+    "    \"I-Restaurant_Name\": 8,\n",
+    "    \"B-Price\": 9,\n",
+    "    \"B-Hours\": 10,\n",
+    "    \"I-Hours\": 11,\n",
+    "    \"B-Dish\": 12,\n",
+    "    \"I-Dish\": 13,\n",
+    "    \"B-Cuisine\": 14,\n",
+    "    \"I-Price\": 15,\n",
+    "    \"I-Cuisine\": 16,\n",
+    "}\n",
+    "\n",
+    "\n",
+    "label_names = {v: k for k, v in ner_tags.items()}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "de52b597",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def decode_tags(tags, words):\n",
+    "    dict_out = {}\n",
+    "    word_ = \"\"\n",
+    "    for tag, word in zip(tags[::-1], words[::-1]):\n",
+    "        if tag == 0:\n",
+    "            continue\n",
+    "        word_ = word_ + \" \" + word\n",
+    "        if label_names[tag].startswith(\"B\"):\n",
+    "            tag_name = label_names[tag][2:]\n",
+    "            word_ = word_.strip()\n",
+    "            if tag_name not in dict_out:\n",
+    "                dict_out[tag_name] = [word_]\n",
+    "            else:\n",
+    "                dict_out[tag_name].append(word_)\n",
+    "            word_ = \"\"\n",
+    "    return dict_out\n",
+    "\n",
+    "\n",
+    "def format_to_text(decoded):\n",
+    "    text = \"\"\n",
+    "    for key, value in decoded.items():\n",
+    "        text += f\"{key}: {', '.join(value)}\\n\"\n",
+    "    return text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "5da715a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def generate_t5_data(example):\n",
+    "    decoded = decode_tags(example[\"tags\"], example[\"tokens\"])\n",
+    "    return {\"tokens\": \" \".join(example[\"tokens\"]), \"labels\": format_to_text(decoded)}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "57416e20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
+    "import torch\n",
+    "\n",
+    "# the following 2 hyperparameters are task-specific\n",
+    "max_source_length = 512\n",
+    "max_target_length = 128\n",
+    "\n",
+    "# encode the inputs\n",
+    "task_prefix = \"What is the user intent?\"\n",
+    "\n",
+    "\n",
+    "def tokenize(example):\n",
+    "    tokenized = tokenizer(\n",
+    "        task_prefix + example[\"tokens\"],\n",
+    "        text_target=example[\"labels\"],\n",
+    "        max_length=512,\n",
+    "        truncation=True,\n",
+    "    )\n",
+    "    return tokenized"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "137905d7",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "23bafa0f97bc4d4da8a96397f0f3bd5a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map:   0%|          | 0/6900 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenized_datasets = dataset.map(generate_t5_data)\n",
+    "tokenized_datasets = tokenized_datasets.remove_columns([\"tags\"])\n",
+    "tokenized_datasets = tokenized_datasets.map(tokenize)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e2bdf1b0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "\n",
+    "metric = evaluate.load(\"sacrebleu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "cd9871bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "def compute_metrics(eval_preds):\n",
+    "    preds, labels = eval_preds\n",
+    "    # In case the model returns more than the prediction logits\n",
+    "    if isinstance(preds, tuple):\n",
+    "        preds = preds[0]\n",
+    "\n",
+    "    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)\n",
+    "\n",
+    "    # Replace -100s in the labels as we can't decode them\n",
+    "    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)\n",
+    "    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)\n",
+    "\n",
+    "    # Some simple post-processing\n",
+    "    decoded_preds = [pred.strip() for pred in decoded_preds]\n",
+    "    decoded_labels = [[label.strip()] for label in decoded_labels]\n",
+    "\n",
+    "    result = metric.compute(predictions=decoded_preds, references=decoded_labels)\n",
+    "    return {\"bleu\": result[\"score\"]}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "09afe1d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "58e84fd1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import Seq2SeqTrainingArguments\n",
+    "\n",
+    "args = Seq2SeqTrainingArguments(\n",
+    "    f\"T5 test\",\n",
+    "    evaluation_strategy=\"no\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    learning_rate=3e-4,\n",
+    "    per_device_train_batch_size=64,\n",
+    "    per_device_eval_batch_size=32,\n",
+    "    weight_decay=0.01,\n",
+    "    save_total_limit=3,\n",
+    "    num_train_epochs=20,\n",
+    "    predict_with_generate=True,\n",
+    "    fp16=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "edfcbac1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from transformers import Seq2SeqTrainer\n",
+    "\n",
+    "trainer = Seq2SeqTrainer(\n",
+    "    model,\n",
+    "    args,\n",
+    "    train_dataset=tokenized_datasets[\"train\"],\n",
+    "    eval_dataset=tokenized_datasets[\"validation\"],\n",
+    "    data_collator=data_collator,\n",
+    "    tokenizer=tokenizer,\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e0065364",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='12' max='12' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [12/12 00:15]\n",
+       "    </div>\n",
+       "    "
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Trainer is attempting to log a value of \"{'summarization': {'early_stopping': True, 'length_penalty': 2.0, 'max_length': 200, 'min_length': 30, 'no_repeat_ngram_size': 3, 'num_beams': 4, 'prefix': 'summarize: '}, 'translation_en_to_de': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to German: '}, 'translation_en_to_fr': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to French: '}, 'translation_en_to_ro': {'early_stopping': True, 'max_length': 300, 'num_beams': 4, 'prefix': 'translate English to Romanian: '}}\" for key \"task_specific_params\" as a parameter. MLflow's log_param() only accepts values no longer than 250 characters so we dropped this attribute. You can use `MLFLOW_FLATTEN_PARAMS` environment variable to flatten the parameters and avoid this message.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'eval_loss': 6.675447940826416,\n",
+       " 'eval_bleu': 0.006728795795564811,\n",
+       " 'eval_runtime': 17.5858,\n",
+       " 'eval_samples_per_second': 43.217,\n",
+       " 'eval_steps_per_second': 0.682}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "trainer.evaluate(max_length=512)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "64ad307b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/kave/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/transformers/optimization.py:411: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='4' max='1080' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [   4/1080 00:01 < 09:22, 1.91 it/s, Epoch 0.06/20]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Step</th>\n",
+       "      <th>Training Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "OutOfMemoryError",
+     "evalue": "CUDA out of memory. Tried to allocate 456.00 MiB (GPU 0; 11.75 GiB total capacity; 10.26 GiB already allocated; 131.12 MiB free; 10.83 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/transformers/trainer.py:1539\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1534\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m   1536\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m   1537\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m   1538\u001b[0m )\n\u001b[0;32m-> 1539\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1540\u001b[0m \u001b[43m    \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1541\u001b[0m \u001b[43m    \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1542\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1543\u001b[0m \u001b[43m    \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1544\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/transformers/trainer.py:1809\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   1806\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_begin(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[1;32m   1808\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39maccumulate(model):\n\u001b[0;32m-> 1809\u001b[0m     tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1811\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m   1812\u001b[0m     args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m   1813\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_tpu_available()\n\u001b[1;32m   1814\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m   1815\u001b[0m ):\n\u001b[1;32m   1816\u001b[0m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m   1817\u001b[0m     tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/transformers/trainer.py:2654\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m   2651\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m   2653\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 2654\u001b[0m     loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2656\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mn_gpu \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m   2657\u001b[0m     loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()  \u001b[38;5;66;03m# mean() to average on multi-gpu parallel training\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/transformers/trainer.py:2679\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m   2677\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   2678\u001b[0m     labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 2679\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2680\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m   2681\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m   2682\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:581\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    580\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 581\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:569\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 569\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/torch/amp/autocast_mode.py:14\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     12\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     13\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 14\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:581\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    580\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 581\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:569\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    568\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 569\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconvert_to_fp32\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:548\u001b[0m, in \u001b[0;36mconvert_to_fp32\u001b[0;34m(tensor)\u001b[0m\n\u001b[1;32m    545\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_is_fp16_bf16_tensor\u001b[39m(tensor):\n\u001b[1;32m    546\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(tensor, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;129;01min\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39mfloat16, torch\u001b[38;5;241m.\u001b[39mbfloat16)\n\u001b[0;32m--> 548\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrecursively_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_convert_to_fp32\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtensor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_is_fp16_bf16_tensor\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:120\u001b[0m, in \u001b[0;36mrecursively_apply\u001b[0;34m(func, data, test_type, error_on_other_type, *args, **kwargs)\u001b[0m\n\u001b[1;32m    109\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m honor_type(\n\u001b[1;32m    110\u001b[0m         data,\n\u001b[1;32m    111\u001b[0m         (\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    116\u001b[0m         ),\n\u001b[1;32m    117\u001b[0m     )\n\u001b[1;32m    118\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, Mapping):\n\u001b[1;32m    119\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data)(\n\u001b[0;32m--> 120\u001b[0m         {\n\u001b[1;32m    121\u001b[0m             k: recursively_apply(\n\u001b[1;32m    122\u001b[0m                 func, v, \u001b[38;5;241m*\u001b[39margs, test_type\u001b[38;5;241m=\u001b[39mtest_type, error_on_other_type\u001b[38;5;241m=\u001b[39merror_on_other_type, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[1;32m    123\u001b[0m             )\n\u001b[1;32m    124\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    125\u001b[0m         }\n\u001b[1;32m    126\u001b[0m     )\n\u001b[1;32m    127\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m test_type(data):\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m func(data, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:121\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    109\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m honor_type(\n\u001b[1;32m    110\u001b[0m         data,\n\u001b[1;32m    111\u001b[0m         (\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    116\u001b[0m         ),\n\u001b[1;32m    117\u001b[0m     )\n\u001b[1;32m    118\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, Mapping):\n\u001b[1;32m    119\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data)(\n\u001b[1;32m    120\u001b[0m         {\n\u001b[0;32m--> 121\u001b[0m             k: \u001b[43mrecursively_apply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    122\u001b[0m \u001b[43m                \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_on_other_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merror_on_other_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    123\u001b[0m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    124\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    125\u001b[0m         }\n\u001b[1;32m    126\u001b[0m     )\n\u001b[1;32m    127\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m test_type(data):\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m func(data, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:128\u001b[0m, in \u001b[0;36mrecursively_apply\u001b[0;34m(func, data, test_type, error_on_other_type, *args, **kwargs)\u001b[0m\n\u001b[1;32m    119\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data)(\n\u001b[1;32m    120\u001b[0m         {\n\u001b[1;32m    121\u001b[0m             k: recursively_apply(\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    125\u001b[0m         }\n\u001b[1;32m    126\u001b[0m     )\n\u001b[1;32m    127\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m test_type(data):\n\u001b[0;32m--> 128\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    129\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_on_other_type:\n\u001b[1;32m    130\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m    131\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported types (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) passed to `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`. Only nested list/tuple/dicts of \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    132\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobjects that are valid for `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtest_type\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` should be passed.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    133\u001b[0m     )\n",
+      "File \u001b[0;32m~/miniconda3/envs/afterhours_dev/lib/python3.10/site-packages/accelerate/utils/operations.py:543\u001b[0m, in \u001b[0;36mconvert_to_fp32.<locals>._convert_to_fp32\u001b[0;34m(tensor)\u001b[0m\n\u001b[1;32m    542\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_convert_to_fp32\u001b[39m(tensor):\n\u001b[0;32m--> 543\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtensor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 456.00 MiB (GPU 0; 11.75 GiB total capacity; 10.26 GiB already allocated; 131.12 MiB free; 10.83 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF"
+     ]
+    }
+   ],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6672ff53",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

elise/src/train_t5_seq2seq.py ADDED Viewed

	@@ -0,0 +1,192 @@

+"""
+Training Flant_T5 model on tner/mit_restaurant on seq2seq task
+"""
+from dataclasses import asdict
+import torch
+import evaluate
+import datasets
+from torch.utils.data import DataLoader
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSeq2SeqLM,
+    DataCollatorForSeq2Seq,
+    get_scheduler,
+)
+from accelerate import Accelerator
+import numpy as np
+import mlflow
+from tqdm.auto import tqdm
+from utils.logger import get_logger
+from configs import T5TrainingConfig
+from data import MITRestaurants, get_default_transforms
+log = get_logger("Flan_T5")
+log.debug("heloooooooooooo?")
+# get dataset
+transforms = get_default_transforms()
+dataset = (
+    MITRestaurants.from_hf("tner/mit_restaurant")
+    .set_transforms(transforms)
+    .hf_training()
+)
+dataset["train"] = datasets.concatenate_datasets([dataset["train"], dataset["test"]])
+# log.info(dataset)
+print(dataset)
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
+def tokenize(example):
+    """Tokenizes dataset for seq2seq task"""
+    tokenized = tokenizer(
+        example["tokens"],
+        text_target=example["labels"],
+        max_length=512,
+        truncation=True,
+    )
+    return tokenized
+tokenized_datasets = dataset.map(
+    tokenize,
+    batched=True,
+    remove_columns=dataset["train"].column_names,
+)
+# bleu metric
+metric = evaluate.load("sacrebleu")
+def postprocess(predictions, labels):
+    """Post processing to convert model output for evaluation"""
+    predictions = predictions.cpu().numpy()
+    labels = labels.cpu().numpy()
+    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
+    # Replace -100 in the labels as we can't decode them.
+    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
+    # Some simple post-processing
+    decoded_preds = [pred.strip() for pred in decoded_preds]
+    decoded_labels = [[label.strip()] for label in decoded_labels]
+    return decoded_preds, decoded_labels
+config = T5TrainingConfig()
+# data collator
+data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
+# data loaders
+tokenized_datasets.set_format("torch")
+train_dataloader = DataLoader(
+    tokenized_datasets["train"],
+    shuffle=True,
+    collate_fn=data_collator,
+    batch_size=config.train_batch_size,
+)
+eval_dataloader = DataLoader(
+    tokenized_datasets["validation"],
+    collate_fn=data_collator,
+    batch_size=config.eval_batch_size,
+)
+# optimizer
+optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
+num_update_steps_per_epoch = len(train_dataloader)
+num_training_steps = config.epochs * num_update_steps_per_epoch
+lr_scheduler = get_scheduler(
+    "linear",
+    optimizer=optimizer,
+    num_warmup_steps=config.num_warmup_steps,
+    num_training_steps=num_training_steps,
+)
+# accelerator
+accelerator = Accelerator(
+    mixed_precision=config.mixed_precision,
+    gradient_accumulation_steps=config.gradient_accumulation_steps,
+)
+model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
+    model, optimizer, train_dataloader, eval_dataloader
+)
+progress_bar = tqdm(range(num_training_steps))
+def train():
+    """Training function for finetuing flanT5"""
+    # log.info("Starting Training")
+    print("Starting Traning")
+    for epoch in range(config.epochs):
+        # Training
+        model.train()
+        for batch in train_dataloader:
+            with accelerator.accumulate(model):
+                outputs = model(**batch)
+                loss = outputs.loss
+                accelerator.backward(loss)
+                optimizer.step()
+                lr_scheduler.step()
+                optimizer.zero_grad()
+                progress_bar.update(1)
+        # Evaluation
+        model.eval()
+        for batch in tqdm(eval_dataloader):
+            with torch.no_grad():
+                generated_tokens = accelerator.unwrap_model(model).generate(
+                    batch["input_ids"],
+                    attention_mask=batch["attention_mask"],
+                    max_length=128,
+                )
+            labels = batch["labels"]
+            # Necessary to pad predictions and labels for being gathered
+            generated_tokens = accelerator.pad_across_processes(
+                generated_tokens, dim=1, pad_index=tokenizer.pad_token_id
+            )
+            labels = accelerator.pad_across_processes(labels, dim=1, pad_index=-100)
+            predictions_gathered = accelerator.gather(generated_tokens)
+            labels_gathered = accelerator.gather(labels)
+            decoded_preds, decoded_labels = postprocess(
+                predictions_gathered, labels_gathered
+            )
+            metric.add_batch(predictions=decoded_preds, references=decoded_labels)
+        results = metric.compute()
+        mlflow.log_metrics({"epoch": epoch, "BLEU score": results["score"]})
+        print(f"epoch {epoch}, BLEU score: {results['score']:.2f}")
+        # Save and upload
+        accelerator.wait_for_everyone()
+        unwrapped_model = accelerator.unwrap_model(model)
+        unwrapped_model.save_pretrained(
+            config.output_dir, save_function=accelerator.save
+        )
+        if accelerator.is_main_process:
+            tokenizer.save_pretrained(config.output_dir)
+    # save model with mlflow
+    mlflow.transformers.log_model(
+        transformers_model={"model": unwrapped_model, "tokenizer": tokenizer},
+        task="text2text-generation",
+        artifact_path="seq2seq_model",
+        registered_model_name="FlanT5_MIT",
+    )
+mlflow.set_tracking_uri("http://127.0.0.1:5000")
+with mlflow.start_run() as mlflow_run:
+    mlflow.log_params(asdict(config))
+    train()

elise/src/utils/logger.py CHANGED Viewed

@@ -4,7 +4,9 @@ Logging helper module
 import logging.config
 import yaml
-with open("elise/src/configs/logging_config.yaml", "r", encoding="utf-8") as f:
     config = yaml.safe_load(f.read())
     logging.config.dictConfig(config)
     logging.captureWarnings(True)

 import logging.config
 import yaml
+with open(
+    "/home/kave/work/Elise/elise/src/configs/logging_config.yaml", "r", encoding="utf-8"
+) as f:
     config = yaml.safe_load(f.read())
     logging.config.dictConfig(config)
     logging.captureWarnings(True)

requirements.txt CHANGED Viewed

@@ -11,3 +11,5 @@ transformers==4.31.0
 pylint==2.17.5
 gradio==3.39.0
 gradio_client==0.3.0

 pylint==2.17.5
 gradio==3.39.0
 gradio_client==0.3.0
+accelerate==0.21.0
+evaluate==0.4.0