Salamandra2b-Instruct-Aina-hack (Recomended)
Salamandra7b-Instruct-Aina-hack (Recomended)
#pip install openai
from dotenv import load_dotenv
import os
from openai import OpenAI
load_dotenv(".env")
HF_TOKEN = os.environ["HF_TOKEN"]
BASE_URL = os.environ["BASE_URL"]
#pip install openai
client = OpenAI(
base_url=BASE_URL + "/v1/",
api_key=HF_TOKEN
)
messages = [{ "role": "system", "content": "you are a helpful assistant"}]
messages.append( {"role":"user", "content": "Tell me somthing about AI"})
stream = False
chat_completion = client.chat.completions.create(
model="tgi",
messages=messages,
stream=stream,
max_tokens=1000,
# temperature=0.1,
# top_p=0.95,
# frequency_penalty=0.2,
)
text = ""
if stream:
for message in chat_completion:
text += message.choices[0].delta.content
print(message.choices[0].delta.content, end="")
print(text)
else:
text = chat_completion.choices[0].message.content
print(text)
import requests
HF_TOKEN = os.environ["HF_TOKEN"]
BASE_URL = os.environ["BASE_URL"]
model_name = "BSC-LT/salamandra-7b-instruct-aina-hack"
tokenizer = AutoTokenizer.from_pretrained(model_name)
headers = {
"Accept" : "application/json",
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
system_prompt = "you are a helpful assistant"
text = "Tell me somthing about AI"
message = [ { "role": "system", "content": system_prompt} ]
message += [ { "role": "user", "content": text } ]
prompt = tokenizer.apply_chat_template(
message,
tokenize=False,
add_generation_prompt=True,
)
payload = {
"inputs": prompt,
"parameters": {}
}
api_url = BASE_URL + "/generate"
response = requests.post(api_url, headers=headers, json=payload)
print(response.json())
URL=replace_with_endpoint_hf_url
TOKEN=replace_with_provided_token
curl "${URL}/v1/chat/completions" -X POST -H "Authorization: Bearer $TOKEN" -H "Content-Type: application/json" -d '{
"model": "tgi",
"messages": [
{
"role": "user",
"content": "What is deep learning?"
}
],
"max_tokens": 150,
"stream": true
}'
URL=replace_with_endpoint_hf_url
TOKEN=replace_with_provided_token
curl "${URL}/generate" -X POST -H "Authorization: Bearer $TOKEN" -H "Content-Type: appli
cation/json" -d '{
"model": "tgi",
"inputs": "what is AI",
"max_tokens": 150,
"stream": false
}'
You can follow this example from Meta, just pointing to the Salamandra models instead of Llama: