fiddler-labs · lestan · Mar 10, 2024 · Mar 10, 2024 · Mar 11, 2024 · Mar 11, 2024
@@ -0,0 +1,7 @@
+{
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
@@ -45,6 +45,22 @@ Fiddler Auditor supports
 
 ## Installation
 
+### From Poetry
+Auditor supports the Poetry package management system and we test on Python 3.8 and above.  We recommend creating a virtual python environment using Poetry and installing using the following command
+
+```bash
+poetry install
+```
+This will install the dependencies listed in the pyproject.toml file and install the local virtual environment if Poetry is configured to do so.
+
+After installing, run the tests to confirm all dependencies are installed and fiddler-auditor is functioning.  
+```bash
+poetry run pytest -v -s
+```
+
+>**NOTE:** Run the above test command twice as the first time will trigger downloads of transformer assets and fail, but rerunning the tests should pass.
+
+
 ### From PyPI
 Auditor is available on PyPI and we test on Python 3.8 and above. We recommend creating a virtual python environment and installing using the following command
 

@@ -0,0 +1,82 @@
+# Upgrading Fiddler
+
+## Poetry upgrade
+
+These instructions create a version of the **pyproject.toml** file to be compatible with the poetry build system instead of setuptools
+
+```
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+
+[tool.poetry]
+name = "auditor"
+version = "0.0.5"
+authors = ["Fiddler Labs <[email protected]>"]
+description = "Auditing large language models made easy."
+readme = "README.md"
+license = "Elastic License 2.0 (ELv2)"
+repository = "https://github.com/fiddler-labs/fiddler-auditor"
+
+[tool.poetry.dependencies]
+python = ">=3.8.1<4.0"
+notebook = "^6.0.1"
+fiddler-checklist = "0.0.1"
+pandas = "^1.3.5"
+spacy-transformers = "^1.1.8"
+jinja2 = "3.1.2"
+langchain = ">=0.0.158,<=0.0.330"
+openai = ">=0.27.0,<=0.28.1"
+sentence-transformers = "^2.2.2"
+tqdm = "^4.66.1"
+httplib2 = "~0.22.0"
+
+[tool.poetry.dev-dependencies]
+pytest = "*"
+build = "*"
+twine = "*"
+flake8 = "*"
+
+```
+
+## Install Poetry dependencies
+
+This will set up the virtual environment, install Python and begin to install dependencies
+
+    poetry install
+
+### Install iso-639
+
+Poetry will complain about a compatibility issue with fiddler-checklist and iso-639.  To get around this, install iso-639 first using pip
+
+    poetry run pip install iso-639 
+
+### Install poetry dependencies
+
+    poetry install --with=dev
+
+### Upgrade packages
+
+    poetry add openai@latest
+    poetry add langchain@latest
+    poetry add spacy-transformers@latest
+    poetry add sentence-transformers@latest
+
+### Add Langchain Community
+
+This adds support for openai and other models
+
+    poetry add langchain-openai@latest
+
+## Build auditor package
+
+    poetry build
+
+This should create the /dist directory and create
+the wheel and tar.gz package assets
+
+## Run tests
+
+    poetry run pytest -v -s
+
+This should set up the test environment and run all the tests in the /tests folder
@@ -6,7 +6,8 @@
 import numpy as np
 from sentence_transformers.SentenceTransformer import SentenceTransformer
 from transformers import pipeline
-from langchain.llms import OpenAI
+#from langchain_community.llms.openai import OpenAI
+from langchain_openai import ChatOpenAI
 
 from auditor.utils.progress_logger import ProgressLogger
 from auditor.utils.similarity import compute_similarity
@@ -213,7 +214,7 @@ def __init__(
         metric_key: str = 'Rationale',
     ) -> None:
         self.grading_model = grading_model
-        self.model = OpenAI(model_name=grading_model, temperature=0.0)
+        self.model = ChatOpenAI(model_name=grading_model, temperature=0.0)
         self.metric_key = metric_key
         self.descriptor = (
             f'Model response graded using {self.grading_model}.'
@@ -277,13 +278,14 @@ def _grade(
             f'Begin your response by providing the reason for your conclusion and avoid simply stating the correct answer.'  # noqa: E501
             f'End the response by printing only a single character "Y" or "N" on a separate line.'  # noqa: E501
         )
-        resp = self.model(grading_str)
+        resp = self.model.invoke(grading_str)
         return self._process_str(resp)
 
     def _process_str(
         self,
         resp: str
     ):
+        resp = resp.content
         rationale = resp.split('\n')[0]
         conclusion = resp.split('\n')[-1]
         if conclusion == 'Y':

@@ -150,7 +150,7 @@ def _get_generation(
                 pre_context,
                 post_context
             )
-            response = str(self.llm(llm_input))
+            response = str(self.llm.invoke(llm_input))
         except Exception as err:
             LOG.error('Unable to fetch generations from the model.')
             raise err

@@ -1,6 +1,6 @@
 from typing import List, Optional
 import os
-import openai
+from openai import OpenAI
 
 from auditor.perturbations.constants import OPENAI_CHAT_COMPLETION
 
@@ -16,7 +16,6 @@ def generate_similar_sentences(
     model: str = OPENAI_CHAT_COMPLETION,
     num_sentences: int = 5,
     temperature: float = 0.0,
-    api_version: Optional[str] = None,
 ) -> List[str]:
     prompt = SIMILAR_SENTENCES_PROMPT.format(
         n=num_sentences,
@@ -30,26 +29,24 @@ def generate_similar_sentences(
     ]
     if api_key is None:
         api_key = os.getenv("OPENAI_API_KEY")
-    openai.api_key = api_key
 
     engine = None
-    if openai.api_type == "azure":
-        engine = model
-        api_version = api_version
+    #if openai.api_type == "azure":
+    #    engine = model
+    #    api_version = api_version
 
-    response = openai.ChatCompletion.create(
+    client = OpenAI(api_key=api_key)
+    response = client.chat.completions.create(
       model=model,
       messages=payload,
       temperature=temperature,
-      engine=engine,
-      api_version=api_version
     )
     return _process_similar_sentence_reponse(response)
 
 
 def _process_similar_sentence_reponse(response):
     sim_sent = []
-    lines = response['choices'][0]['message']['content'].split('\n')
+    lines = response.choices[0].message.content.split('\n')
     for ln in lines:
         r = ln.split('.')[1]
         sim_sent.append(r.strip())

@@ -2,7 +2,7 @@
 import os
 import re
 
-import openai
+from openai import OpenAI
 
 from auditor.perturbations.base import TransformBase
 from auditor.perturbations.constants import OPENAI_CHAT_COMPLETION
@@ -40,8 +40,9 @@ def _init_key(self, api_key: str):
         """Initialize API key"""
         if api_key is None:
             api_key = os.getenv("OPENAI_API_KEY")
+
         self.api_key = api_key
-        openai.api_key = api_key
+        self.client = OpenAI(api_key=api_key)
         return
 
     def _init_model(
@@ -52,11 +53,12 @@ def _init_model(
         """Initialize model, engine and api version"""
         self.model = model
         self.api_version = api_version
-        if openai.api_type == "azure":
-            self.engine = model
-            self.api_version = api_version
-        else:
-            self.engine = None
+        #if openai.api_type == "azure":
+        #    self.engine = model
+        #    self.api_version = api_version
+        #else:
+        #    self.engine = None
+        self.engine = None
         return
 
     def transform(
@@ -73,21 +75,19 @@ def transform(
                 "content": prompt
             }
         ]
-        response = openai.ChatCompletion.create(
+        response = self.client.chat.completions.create(
             model=self.model,
             messages=payload,
             temperature=self.temperature,
-            engine=self.engine,
-            api_version=self.api_version,
         )
         return Paraphrase._process_similar_sentence_reponse(response)
 
     @staticmethod
     def _process_similar_sentence_reponse(response):
-        generation = response['choices'][0]['message']['content']
+        generation = response.choices[0].message.content
         # Use a combination of lookahead and lookback
         # Expr extracts generations between the
-        # bulltet '-' and newline character
+        # bullet '-' and newline character
         sim_sent = re.findall(
             r'(?<=\n-)(.*?)(?=\n)',
             '\n'+generation+'\n'

@@ -300,7 +300,6 @@ def paraphrase(
         temperature: float = 0.0,
         api_key: Optional[str] = None,
         similarity_model: Optional[str] = None,
-        api_version: Optional[str] = None,
     ) -> PerturbedTextDataset:
         """Perturb the sentence by paraphrasing.
 
@@ -312,7 +311,6 @@ def paraphrase(
             api_key (str) : openai API key
             similarity_model : Model to use for scoring the similarity of
                 perturbations.
-            api_version (str, optional): openai API version
 
         Returns:
             PerturbedTextDataset: Perturbed dataset object
@@ -331,7 +329,6 @@ def paraphrase(
                 sentence=sentence,
                 api_key=api_key,
                 model=model,
-                api_version=api_version,
                 num_sentences=self.perturbations_per_sample,
                 temperature=temperature,
             )