from openai import OpenAI
from math import exp
import numpy as np
from IPython.display import display, HTML
import os
open_ai_key = "sk-None-BcvcTP9SBTw3yuuBzCE9T3BlbkFJq0ZmKf4zLrW2tpGZKAn5"
#open_ai_key = "sk-proj-nfLesDiu6oVaARYzTSOBT3BlbkFJpW0Y8kdWyLMhQKyx86Ip"
os.environ["OPENAI_API_KEY"] = open_ai_key
client = OpenAI()
def get_completion(
messages: list[dict[str, str]],
model: str = "gpt-4o-mini",
max_tokens=500,
temperature=0,
stop=None,
seed=123,
tools=None,
logprobs=None, # whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message..
top_logprobs=None,
) -> str:
params = {
"model": model,
"messages": messages,
"max_tokens": max_tokens,
"temperature": temperature,
"stop": stop,
"seed": seed,
"logprobs": logprobs,
"top_logprobs": top_logprobs,
}
if tools:
params["tools"] = tools
completion = client.chat.completions.create(**params)
return completion
CLASSIFICATION_PROMPT = """You will be given a headline of a news article.
Classify the article into one of the following categories: Technology, Politics, Sports, and Art.
MAKE SURE your output is one of the four categories stated. GIve a short explanation (at most a 5 word sentence). Start with the explanation and an 'a'
Article headline: {headline}"""
headlines = [
"Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.",
"Local Mayor Launches Initiative to Enhance Urban Public Transport.",
"Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut",
]Package
for headline in headlines:
print(f"\nHeadline: {headline}")
API_RESPONSE = get_completion(
[{"role": "user", "content": CLASSIFICATION_PROMPT.format(headline=headline)}],
model="gpt-4",
logprobs=True,
top_logprobs=3,
)
top_two_logprobs = API_RESPONSE.choices[0].logprobs.content[0].top_logprobs
html_content = ""
for i, logprob in enumerate(top_two_logprobs, start=1):
html_content += (
f"<span style='color: cyan'>Output token {i}:</span> {logprob.token}, "
f"<span style='color: darkorange'>logprobs:</span> {logprob.logprob}, "
f"<span style='color: magenta'>linear probability:</span> {np.round(np.exp(logprob.logprob)*100,2)}%<br>"
)
display(HTML(html_content))
print("\n")
Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.
Output token 1: Technology, logprobs: -0.58375543, linear probability: 55.78%
Output token 2: Explanation, logprobs: -1.008667, linear probability: 36.47%
Output token 3: a, logprobs: -3.0114517, linear probability: 4.92%
Output token 2: Explanation, logprobs: -1.008667, linear probability: 36.47%
Output token 3: a, logprobs: -3.0114517, linear probability: 4.92%
Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.
Output token 1: Politics, logprobs: -0.0012809455, linear probability: 99.87%
Output token 2: a, logprobs: -7.3205748, linear probability: 0.07%
Output token 3: Explanation, logprobs: -7.4592924, linear probability: 0.06%
Output token 2: a, logprobs: -7.3205748, linear probability: 0.07%
Output token 3: Explanation, logprobs: -7.4592924, linear probability: 0.06%
Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut
Output token 1: Art, logprobs: -0.09257728, linear probability: 91.16%
Output token 2: Sports, logprobs: -2.4768393, linear probability: 8.4%
Output token 3: A, logprobs: -5.93566, linear probability: 0.26%
Output token 2: Sports, logprobs: -2.4768393, linear probability: 8.4%
Output token 3: A, logprobs: -5.93566, linear probability: 0.26%
API_RESPONSE = get_completion(
[{"role": "user", "content": CLASSIFICATION_PROMPT.format(headline=headline)}],
model="gpt-4",
logprobs=True,
top_logprobs=3,
max_tokens=1
)def get_next_token(
messages: list[dict[str, str]],
model: str = "gpt-4o-mini",
temperature=0,
stop=None,
seed=123,
tools=None,
logprobs=None,
top_logprobs=None,
) -> str:
params = {
"model": model,
"messages": messages,
"max_tokens": 1, # Limit to only the next token
"temperature": temperature,
"stop": stop,
"seed": seed,
"logprobs": logprobs,
"top_logprobs": top_logprobs,
}
if tools:
params["tools"] = tools
completion = client.chat.completions.create(**params)
return completion.choices[0].message['content']def generate_sequentially(
initial_messages: list[dict[str, str]],
model: str = "gpt-4o-mini",
max_tokens: int = 50,
temperature: float = 0,
stop=None,
seed=123,
tools=None,
logprobs=None,
top_logprobs=None,
) -> str:
# Copy the initial messages to maintain the history
messages = initial_messages.copy()
# Initialize the assistant's message to collect generated tokens
current_response = ""
for _ in range(max_tokens):
# Call get_next_token to get only the next token
next_token = get_next_token(
messages=messages,
model=model,
temperature=temperature,
stop=stop,
seed=seed,
tools=tools,
logprobs=logprobs,
top_logprobs=top_logprobs,
)
# Check if the stop condition is met (like an <EOS> token)
if next_token == "<EOS>":
break
# Append the next token to the current response
current_response += next_token
# Update messages with the new token as part of the assistant's message
messages.append({"role": "assistant", "content": current_response})
return current_responseAPI_RESPONSE = generate_sequentially(
[{"role": "user", "content": CLASSIFICATION_PROMPT.format(headline=headline)}],
model="gpt-4",
logprobs=True,
top_logprobs=3,
max_tokens=2
)--------------------------------------------------------------------------- TypeError Traceback (most recent call last) Cell In[11], line 1 ----> 1 API_RESPONSE = generate_sequentially( 2 [{"role": "user", "content": CLASSIFICATION_PROMPT.format(headline=headline)}], 3 model="gpt-4", 4 logprobs=True, 5 top_logprobs=3, 6 max_tokens=2 7 ) Cell In[10], line 20, in generate_sequentially(initial_messages, model, max_tokens, temperature, stop, seed, tools, logprobs, top_logprobs) 16 current_response = "" 18 for _ in range(max_tokens): 19 # Call get_next_token to get only the next token ---> 20 next_token = get_next_token( 21 messages=messages, 22 model=model, 23 temperature=temperature, 24 stop=stop, 25 seed=seed, 26 tools=tools, 27 logprobs=logprobs, 28 top_logprobs=top_logprobs, 29 ) 31 # Check if the stop condition is met (like an <EOS> token) 32 if next_token == "<EOS>": Cell In[9], line 25, in get_next_token(messages, model, temperature, stop, seed, tools, logprobs, top_logprobs) 22 params["tools"] = tools 24 completion = client.chat.completions.create(**params) ---> 25 return completion.choices[0].message['content'] TypeError: 'ChatCompletionMessage' object is not subscriptable
# parameters:
** Model
** sampling/ search
*** Greedy, temperature, beam search, top-k, top-p
** Sampling kewords
** selection strategy
*** Varentropy, Entropy This package allows to explore an llm by showing the effect of different sampling and selection strategies.
Demo
model.predict( sampling=[‘top-k’, ‘blub’], ordering=‘heuristic’, n_samples=20, max_token=128 )
m