Categories
Tools

Groq Function Calling

https://github.com/unclecode/funckycall

git clone https://github.com/unclecode/funckycall
cd funkycall
export GROQ_API_KEY=xxxxxxxxxxxxxxx
mkdir .logs
uvicorn app.main:app --reload

Note: Keep the terminal running

Next, run the code only in the new terminal.

http://127.0.0.1:8000/proxy/groq/v1

Phidata

from phi.llm.openai.like import OpenAILike
from phi.assistant import Assistant
from phi.tools.duckduckgo import DuckDuckGo
import os, json

my_groq = OpenAILike(
        model="mixtral-8x7b-32768",
        api_key=os.environ["GROQ_API_KEY"],
        base_url="http://127.0.0.1:8000/proxy/groq/v1"
    )
assistant = Assistant(
    llm=my_groq,
    tools=[DuckDuckGo()], show_tool_calls=True, markdown=True
)
assistant.print_response("Whats happening in France? Summarize top stories with sources, very short and concise.", stream=False)

# Ollama
my_ollama = OpenAILike(
        model="mistral",
        api_key="",
        base_url="http://localhost:11235/proxy/ollama/v1"
    )
ollama_assistant = Assistant(
    llm=my_ollama,
    tools=[DuckDuckGo()], show_tool_calls=True, markdown=True
)
ollama_assistant.print_response("Whats happening in France? Summarize top stories with sources, very short and concise.", stream=False)

Function Definition

from duckduckgo_search import DDGS
import requests, os
api_key=os.environ["GROQ_API_KEY"]
import json
header = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}
proxy_url = "http://127.0.0.1:8000/proxy/groq/v1/chat/completions"


def duckduckgo_search(query, max_results=None):
    """
    Use this function to search DuckDuckGo for a query.
    """
    with DDGS() as ddgs:
        return [r for r in ddgs.text(query, safesearch='off', max_results=max_results)]
    
def duckduckgo_news(query, max_results=None):
    """
    Use this function to get the latest news from DuckDuckGo.
    """    
    with DDGS() as ddgs:
        return [r for r in ddgs.news(query, safesearch='off', max_results=max_results)]

function_map = {
    "duckduckgo_search": duckduckgo_search,
    "duckduckgo_news": duckduckgo_news,
}

request = {
    "messages": [
        {
            "role": "system",
            "content": "YOU MUST FOLLOW THESE INSTRUCTIONS CAREFULLY.\n<instructions>\n1. Use markdown to format your answers.\n</instructions>"
        },
        {
            "role": "user",
            "content": "Whats happening in France? Summarize top stories with sources, very short and concise."
        }
    ],
    "model": "mixtral-8x7b-32768",
    "tool_choice": "auto",
    "tools": [
        {
            "type": "function",
            "function": {
                "name": "duckduckgo_search",
                "description": "Use this function to search DuckDuckGo for a query.\n\nArgs:\n    query(str): The query to search for.\n    max_results (optional, default=5): The maximum number of results to return.\n\nReturns:\n    The result from DuckDuckGo.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {
                            "type": "string"
                        },
                        "max_results": {
                            "type": [
                                "number",
                                "null"
                            ]
                        }
                    }
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "duckduckgo_news",
                "description": "Use this function to get the latest news from DuckDuckGo.\n\nArgs:\n    query(str): The query to search for.\n    max_results (optional, default=5): The maximum number of results to return.\n\nReturns:\n    The latest news from DuckDuckGo.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "query": {
                            "type": "string"
                        },
                        "max_results": {
                            "type": [
                                "number",
                                "null"
                            ]
                        }
                    }
                }
            }
        }
    ]
}

response = requests.post(
    proxy_url,
    headers=header,
    json=request
)
# Check if the request was successful
if response.status_code == 200:
    # Process the response data (if needed)
    res = response.json()
    message = res['choices'][0]['message']
    tools_response_messages = []
    if not message['content'] and 'tool_calls' in message:
        for tool_call in message['tool_calls']:
            tool_name = tool_call['function']['name']
            tool_args = tool_call['function']['arguments']
            tool_args = json.loads(tool_args)
            if tool_name not in function_map:
                print(f"Error: {tool_name} is not a valid function name.")
                continue
            tool_func = function_map[tool_name]
            tool_response = tool_func(**tool_args)
            tools_response_messages.append({
                "role": "tool", "content": json.dumps(tool_response)
            })
    
        if tools_response_messages:
            request['messages'] += tools_response_messages
            response = requests.post(
                proxy_url,
                headers=header,
                json=request
            )
            if response.status_code == 200:
                res = response.json()
                print(res['choices'][0]['message']['content'])
            else:
                print("Error:", response.status_code, response.text)
    else:
        print(message['content'])
else:
    print("Error:", response.status_code, response.text)

Tools

import requests, os
api_key = os.environ["GROQ_API_KEY"]
header = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

proxy_url = "http://127.0.0.1:8000/proxy/groq/v1/chat/completions"


request = {
    "messages": [
        {
            "role": "system",
            "content": "YOU MUST FOLLOW THESE INSTRUCTIONS CAREFULLY.\n<instructions>\n1. Use markdown to format your answers.\n</instructions>",
        },
        {
            "role": "user",
            "content": "Whats happening in France? Summarize top stories with sources, very short and concise. Also please search about the histoy of france as well.",
        },
    ],
    "model": "mixtral-8x7b-32768",
    "tool_choice": "auto",
    "tools": [
        {
            "type": "function",
            "function": {
                "name": "duckduck.search",
            },
        },
        {
            "type": "function",
            "function": {
                "name": "duckduck.news",
            },
        },
    ],
}

response = requests.post(
    proxy_url,
    headers=header,
    json=request,
)

if response.status_code == 200:
    res = response.json()
    print(res["choices"][0]["message"]["content"])
else:
    print("Error:", response.status_code, response.text)
Categories
Tools

AI Real-Time Code Iteration

Repo: https://github.com/freuk/iter

  1. Download Nix https://nixos.org/download
  2. Mac
sh <(curl -L https://nixos.org/nix/install)

3. Nix Profile

nix profile install github:freuk/iter#iter --extra-experimental-features nix-command --extra-experimental-features flakes

Example 1

app.yml

model: "mixtral-8x7b-32768"
feedback:
  execute: ["python", "{file}"]
  pylint: ["pylint", "{file}"]
legacy-api: false

app.py

# Empty

Example 2

demo.yml

model: "mixtral-8x7b-32768"
# model: "llama2-70b-4096" <- somewhat better at Haskell
# model: "gemma-7b-it" <- doesn't follow the hardcoded prompts very well

# these available templates get replaced in strings: {file}, {basename}
feedback:
  # [<cmd>, <arg1>, <arg2>, ..]
  execute: ["python", "{file}"]
  country: ["python", "{file}", "--country=USA"]
  pylint: ["pylint", "{file}"]

# set to true if you have an old-style 'request_manager' token.
legacy-api: false

# define this if you'd like to log the full LLM interaction sequence to a
# file.
# log-llm: "/tmp/iter.log"

demo.py

import argparse
import json

# List of customer dictionaries
customers = [
    {"name": "John Doe", "country": "USA" },
    {"name": "Jane Smith", "country": "Canada"},
    {"name": "Alice Johnson", "country": "UK"},
    {"name": "Bob Brown", "country": "Australia"}
]

def filter_customers_by_country(country):
    """
    Filter customers based on the given country.

    :param country: The target country for filtering.
    :return: A list of customers who are from the given country.
    """
    return [customer for customer in customers if customer["country"] == country]

def load_customers_from_file(file_path):
    """
    Load customers from a JSON file.

    :param file_path: The path to the JSON file containing customers.
    """
    with open(file_path, 'r') as file:
        global customers
        customers = json.load(file)

def main(country=None, file_path=None):
    """
    The main function to filter and print customers.

    :param country: Filter customers by country.
    :param file_path: Path to the file containing customers.
    """
    if file_path:
        load_customers_from_file(file_path)
    if country:
        filtered_customers = filter_customers_by_country(country)
    else:
        filtered_customers = customers
    for customer in filtered_customers:
        print(customer)

if __name__ == "__main__":
    # Initialize the argument parser
    parser = argparse.ArgumentParser()

    # Add arguments
    parser.add_argument("--country", help="Filter customers by country")
    parser.add_argument("--file", help="Path to the file containing customers")

    # Parse arguments
    args = parser.parse_args()

    # Call the main function
    main(args.country, args.file)


  • Remove the Ability to load customers from a file
  • :f execute
  • :f pylint
  • Improve this file for SWE Best Practices. Don’t make it longer
  • reflection

Note: Examples are in the demo folder of the main repo

Categories
Tools

MusicLang AI Music Generator

conda create -n musiclang python=3.10 -y
conda activate musiclang
pip install musiclang_predict
import time
from musiclang_predict import MusicLangPredictor, corpus

# Initialize MusicLangPredictor
ml = MusicLangPredictor('musiclang/musiclang-v2')

# 1. Generating a free music idea
# 1024 tokens ~ 25s of music (depending on the number of instruments)
nb_tokens_free = 1024 # {type:"slider", min:32, max:1024, step:32}
temperature_free = 0.9 # {type:"slider", min:0.0, max:1, step:0.05}
top_p_free = 1.0 # {type:"slider", min:0.0, max:1.0, step:0.05}
seed_free = 16 # {type: "integer"} Set to 0 to unset seed

start = time.time()
score_free = ml.predict(
    nb_tokens=nb_tokens_free,
    temperature=temperature_free,
    topp=top_p_free,
    rng_seed=seed_free
)
end = time.time()
print(f"Free music idea generated in {end - start} seconds")
score_free.to_midi('music.mid')

# 2. Controlling the chord progression
# Chord qualities: M, m, 7, m7b5, sus2, sus4, m7, M7, dim, dim7.
chord_progression = "Am CM Dm E7 Am" # {type: "string"}
nb_tokens_chords = 1024 # {type:"slider", min:32, max:1024, step:32}
temperature_chords = 0.8 # {type:"slider", min:0.0, max:1, step:0.05}
top_p_chords = 1.0 # {type:"slider", min:0.0, max:1.0, step:0.05}
seed_chords = 42 # {type: "integer"}

start = time.time()
score_chords = ml.predict_chords(
    chord_progression,
    time_signature=(4, 4),
    temperature=temperature_chords,
    topp=top_p_chords,
    rng_seed=seed_chords
)
end = time.time()
print(f"Music with chord progression generated in {end - start} seconds")
score_chords.to_midi('chord.mid', tempo=110, time_signature=(4, 4))

# 3. Using a prompt to continue your own music
song_name_continue = 'boney_m_ma_baker' # ['bach_847', 'bob_marley_jammin', 'boney_m_ma_baker', 'mozart_alla_turca', 'white_stripes_seven_nation_army']
nb_tokens_continue = 1504 # {type:"slider", min:32, max:2048, step:32}
temperature_continue = 0.85 # {type:"slider", min:0.0, max:1, step:0.05}
top_p_continue = 1.0 # {type:"slider", min:0.0, max:1.0, step:0.05}
seed_continue = 1000 # {type: "integer"}

start = time.time()
score_continue = ml.predict(
    score=corpus.get_midi_path_from_corpus(song_name_continue),
    nb_tokens=nb_tokens_continue,
    prompt_chord_range=(0,4),
    temperature=temperature_continue,
    topp=top_p_continue,
    rng_seed=seed_continue
)
end = time.time()
print(f"Continued music generated in {end - start} seconds")
score_continue.to_midi('continue.mid', tempo=120, time_signature=(4, 4))

# 4. Combining both prompt and chord progression
song_name_with_chords = 'bach_847' # ['bach_847', 'bob_marley_jammin', 'boney_m_ma_baker', 'mozart_alla_turca', 'white_stripes_seven_nation_army']
chord_progression_with_prompt = "Cm C7/E Fm F#dim G7 Cm" # {type: "string"}
nb_tokens_with_chords = 1024 # {type:"slider", min:32, max:1024, step:32}
temperature_with_chords = 0.8 # {type:"slider", min:0.0, max:1, step:0.05}
top_p_with_chords = 1.0 # {type:"slider", min:0.0, max:1.0, step:0.05}
seed_with_chords = 3666 # {type: "integer"}

start = time.time()
score_combined = ml.predict_chords(
    chord_progression_with_prompt,
    score=corpus.get_midi_path_from_corpus(song_name_with_chords),
    time_signature=(4, 4),
    nb_tokens=nb_tokens_with_chords,
    prompt_chord_range=(0,4),
    temperature=temperature_with_chords,
    topp=top_p_with_chords,
    rng_seed=seed_with_chords
)
end = time.time()
print(f"Combined music generated in {end - start} seconds")
score_combined.to_midi('songchord.mid', tempo=110, time_signature=(4, 4))

https://github.com/MusicLang/musiclang_predict

Categories
LLM

Tamil LLM

Tamil Large Language Model

from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("mervinpraison/tamil-large-language-model-7b-v1.0")
model = AutoModelForCausalLM.from_pretrained("mervinpraison/tamil-large-language-model-7b-v1.0")

query_to_llm = "ஆரோக்கியமாக இருப்பதற்கான இரண்டு வழிகள்"
inputs = tokenizer.encode(query_to_llm, return_tensors="pt")
outputs = model.generate(inputs, max_length=200)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

Output

ஆரோக்கியமாக இருப்பதற்கான இரண்டு வழிகள்:

1. உடல் நலம்
2. மன நலம்
pip install huggingface_hub[hf_transfer]
export HF_HUB_ENABLE_HF_TRANSFER=1 
Categories
Finetuning

Metrics in Fine Tuning a Large Language Model

  • Identify Overfitting:
    • If train/loss decreases but train/train_loss does not decrease or increases.
  • Good Fine Tune of Large Language Model:
    • Decreasing train/loss and train/train_loss.
    • Stable or decreasing train/grad_norm.
    • Appropriate train/learning_rate.
MetricDescriptionIdentificationCalculation
epochNumber of full passes over the training dataset.Higher values indicate more training.Total dataset passes.
global_stepTotal number of training steps completed.Tracks progress. High value indicates more training steps.Incremented with each batch processed.
grad_normNorm of gradients.Indicates if gradients are exploding/vanishing. Stable or decreasing values are good.Norm calculation of gradients.
learning_rateLearning rate at the current step.Optimal learning rate improves learning efficiency.Set by the learning rate scheduler.
lossAverage loss over the current training step.Decreasing loss indicates model learning.Mean of loss function values.
total_flosTotal floating-point operations performed.High value indicates more computation.Sum of all FLOPs in training.
train_lossLoss on the training dataset.Decreasing value indicates effective learning.Calculated similarly to loss.
train_runtimeTotal runtime of the training process.Efficiency measure. Shorter is better, but depends on dataset size and model complexity.Sum of all training times.
train_samples_per_secondTraining throughput, samples processed per second.Higher values indicate better performance.Total samples / total training time.
train_steps_per_secondNumber of training steps processed per second.Higher values indicate better performance.Total steps / total training time.
train/epoch
train/global_step
train/grad_norm
train/learning_rate
train/loss
train/total_flos
train/train_loss
train/train_runtime
train/train_samples_per_second
train/train_steps_per_second
MetricUsesImpact on Training
Disk I/O Utilization (MB)Measures the amount of data read/written to disk, indicating I/O performance.High I/O can slow down data loading, affecting training speed.
Disk Utilization (%)Shows how much of the disk capacity is in use, can indicate if storage is running out.Insufficient disk space can interrupt training or limit dataset size.
Disk Utilization (GB)Absolute disk space used, useful for monitoring available storage.Running out of space can halt training processes.
GPU Memory Allocated (%)Percentage of GPU memory in use, helps in understanding memory usage and limits.High memory usage can lead to out-of-memory errors, interrupting training.
GPU Power Usage (%)Indicates how much of the GPU’s power capacity is being used, can signal efficiency or overload.Excessive power usage can indicate inefficient training, potentially leading to thermal throttling.
GPU Power Usage (W)Measures the actual power consumption of the GPU in watts, important for energy management.Directly affects energy cost and can indicate inefficiency in training algorithms.
GPU Temp (℃)GPU temperature, critical for preventing overheating and managing cooling solutions.Overheating can lead to thermal throttling, reducing training performance.
GPU Time Spent Accessing Memory (%)Shows the percentage of time the GPU spends accessing memory, indicating potential memory bottlenecks.High values may indicate memory access inefficiencies, slowing down training.
GPU Utilization (%)Overall GPU usage, useful for monitoring workload and performance optimization.Low utilization can indicate inefficiencies in training, while high usage can signal good resource use.
Network Traffic (bytes)Amount of incoming/outgoing network data, crucial for bandwidth management and performance analysis.High network traffic can slow down training, especially in distributed setups.
Process CPU Threads In UseNumber of CPU threads a process is using, indicates process concurrency and CPU usage.More threads can improve training speed, but excessive use may lead to contention.
Process CPU Utilization (%)How much CPU a process is using, helps identify resource-intensive processes.High CPU usage by non-training processes can detract from training resources.
Process GPU Memory Allocated (%)GPU memory usage by a specific process, useful for identifying memory-intensive processes.Excessive memory use by one process can limit resources for training other models.
Process GPU Power Usage (%)Percentage of GPU power used by a process, helps in identifying power-intensive applications.High power usage by training can increase costs and risk overheating.
Process GPU Power Usage (W)Actual GPU power consumption by a process, important for detailed energy management.Directly impacts energy efficiency and costs of training.
Process GPU Temp (℃)Temperature of GPU for a specific process, useful for monitoring thermal performance per process.High temperatures during training can lead to hardware throttling, slowing down processes.
Process GPU Time Spent Accessing Memory (%)Shows how much time a process’s GPU operations spend accessing memory, indicating efficiency.Inefficient memory access can slow down training, especially for large models.
Process GPU Utilization (%)Percentage of GPU used by a process, helps in workload distribution and optimization.Optimizing GPU utilization can enhance training efficiency and speed.
Process Memory Available (non-swap) (MB)Memory available to a process, important for resource allocation and avoiding out-of-memory errors.Insufficient memory can interrupt or slow down training processes.
Process Memory In Use (non-swap) (%)Percentage of memory used by a process, indicates if a process is memory-intensive.High memory usage can limit the ability to run multiple training jobs simultaneously.
Process Memory In Use (non-swap) (MB)Absolute memory used by a process, useful for memory management and optimization.Managing this can help prevent out-of-memory errors during training.
System CPU Utilization (per core) (%)Usage of CPU per core, important for identifying imbalanced workloads and performance bottlenecks.Imbalanced utilization can indicate that the training is not efficiently using available CPU resources.
System Memory Utilization (%)Overall system memory usage, critical for understanding system load and preventing memory exhaustion.High system memory use can slow down training or lead to swapping, significantly impacting performance.
system/gpu.0.memoryAllocatedBytesGPU memory allocated, typically used for detailed monitoring of GPU resource usage.Can indicate if the training is using GPU resources efficiently, affecting model complexity and batch size.
system/gpu.process.0.memoryAllocatedByIdentifies processes by how much GPU memory they have allocated, crucial for resource management and optimization.Helps in optimizing memory use among multiple training jobs, ensuring efficient use of

ConfigurationOptimal Number/RangeImpact on TrainingExplanationOptimal Number/Range Explanation
output_dirN/ANo direct impactDetermines where to save model checkpoints and predictions.User-specific, based on storage preferences.
num_train_epochs1-5MediumDefines how many times the entire dataset is passed through the model, affecting overall training duration.Depends on dataset size and complexity; more epochs can improve accuracy up to a point.
fp16/bf16True (if supported)HighEnables mixed-precision training, significantly improving speed and reducing memory usage on compatible hardware.Use if hardware supports it for efficiency; A100 for bf16, most modern GPUs for fp16.
per_device_train_batch_size8-32HighDetermines how much data is processed per GPU, impacting memory usage and potentially training speed.Adjust based on GPU memory; larger batches can improve efficiency but require more memory.
per_device_eval_batch_size8-32MediumAffects evaluation speed and memory usage but does not directly impact training effectiveness.Similar to training batch size, adjust based on memory and evaluation speed requirements.
gradient_accumulation_steps1-32HighAllows for larger effective batch sizes without increasing GPU memory requirements, affecting convergence and stability.Increases effective batch size, improving training stability; optimal value depends on memory constraints.
gradient_checkpointingTRUEMediumReduces memory usage at the cost of computational overhead, enabling training of larger models.Useful for training large models on hardware with limited memory.
max_grad_norm0.1-10MediumPrevents exploding gradients by clipping, crucial for model stability.Prevents instability in training; optimal range depends on model and data.
learning_rate1e-5 to 5e-4HighDirectly influences the speed and quality of convergence.Depends on model size and dataset; smaller models and datasets might require lower rates.
weight_decay0-0.1MediumRegularization parameter, helps prevent overfitting by penalizing large weights.Helps in controlling overfitting; exact value depends on model complexity and dataset.
optimadamw, “sgd”, etc.MediumChoice of optimizer affects convergence speed and stability.adamw is widely used for its balance between performance and stability.
lr_scheduler_typelinear, “cosine”, etc.MediumInfluences how learning rate changes over time, impacting convergence and training effectiveness.cosine for natural decay, “linear” for steady decrease; choice depends on training length and preference.
max_steps-1 or positive integerMediumIf set, defines the total number of training steps, overriding num_train_epochs, affecting training length.-1 for epoch-based training, positive integer for fine-grained control over training duration.
warmup_ratio0-0.1MediumGradually increases learning rate at the start of training, can improve model stability and performance.0.06 is a common starting point; adjust based on total training steps.
group_by_lengthTRUEMediumIncreases training efficiency by reducing padding needs, can speed up training and reduce memory usage.Recommended for efficiency, especially with variable-length sequences.
save_steps100-1000LowFrequency of saving model checkpoints, impacts disk usage but not training performance directly.Adjust based on training length and checkpoint management preferences.
logging_steps10-100LowFrequency of logging metrics, useful for monitoring but does not impact training effectiveness.Adjust for balance between detailed monitoring and logging overhead.
packingTRUEHighIncreases data efficiency for short sequences, significantly speeding up training and reducing memory usage.Recommended for datasets with many short sequences for efficiency gains.
dataset_text_fieldN/ANo direct impactSpecifies which dataset field to use for text, crucial for correctly loading data.Depends on dataset structure.
dataset_num_proc1-4LowNumber of processes for dataset loading and processing, can speed up data preparation.Depends on available CPU resources; higher values can speed up preprocessing.
device_mapauto or specific mappingLow to MediumAutomates device allocation for model and data, potentially optimizing GPU utilization.auto for convenience, specific mapping for manual control over multi-GPU training.
report_towandb, “none”, etc.LowDetermines where to send training logs and metrics, useful for monitoring but does not directly affect training performance.wandb for comprehensive monitoring; “none” if logging is not required.
max_seq_length128-512 for tasks, up to 2048 for specific needsHighDetermines the maximum sequence length for model inputs, affecting memory usage and processing speed.Depends on task and hardware limitations; longer lengths may require more memory.
dtypefloat16, “bfloat16”, or None for autoHighSpecifies data type for training, affecting memory usage and computational efficiency.float16 for efficiency on compatible GPUs; “bfloat16” for newer architectures.
load_in_4bitFalse or TrueMediumEnables 4-bit quantization, potentially reducing memory usage further.Experimental; use if supported and memory constraints are very tight.
Categories
Finetuning

Axolotl Fine Tuning

In Terminal

git clone https://github.com/OpenAccess-AI-Collective/axolotl
cd axolotl
sudo usermod -aG docker $USER # Add current user to docker
newgrp docker
docker run --gpus '"all"' --rm -it winglian/axolotl:main-latest
accelerate launch -m axolotl.cli.train examples/openllama-3b/qlora.yml

Editing Qlora.yml Training file from terminal using Nano

nano examples/openllama-3b/qlora.yml
(Ctrl+o) = Save
(Ctrl+x) = Exit

OpenLllama 3B Qlora

base_model: openlm-research/open_llama_3b_v2
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false
push_dataset_to_hub:
datasets:
  - path: mhenrichsen/alpaca_2k_test
    type: alpaca
dataset_prepared_path:
val_set_size: 0.05
adapter: qlora
lora_model_dir:
sequence_len: 1024
sample_packing: true
lora_r: 8
lora_alpha: 32
lora_dropout: 0.05
lora_target_modules:
lora_target_linear: true
lora_fan_in_fan_out:
wandb_project:
wandb_entity:
wandb_watch:
wandb_name:
wandb_log_model:
output_dir: ./qlora-out
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 1
optimizer: paged_adamw_32bit
torchdistx_path:
lr_scheduler: cosine
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: false
fp16: true
tf32: false
gradient_checkpointing: true
early_stopping_patience:
resume_from_checkpoint:
local_rank:
logging_steps: 1
xformers_attention:
flash_attention: true
gptq_groupsize:
gptq_model_v1:
warmup_steps: 20
evals_per_epoch: 4
saves_per_epoch: 1
debug:
deepspeed:
weight_decay: 0.1
fsdp:
fsdp_config:
special_tokens:
  bos_token: "<s>"
  eos_token: "</s>"
  unk_token: "<unk>"

Output

(PeftModelForCausalLM(   (base_model): LoraModel(     (model): LlamaForCausalLM(       (model): LlamaModel(         (embed_tokens): Embedding(32000, 3200, padding_idx=0)         (layers): ModuleList(           (0-25): 26 x LlamaDecoderLayer(             (self_attn): LlamaAttention(               (q_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=3200, out_features=3200, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=3200, out_features=8, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=8, out_features=3200, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (k_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=3200, out_features=3200, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=3200, out_features=8, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=8, out_features=3200, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (v_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=3200, out_features=3200, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=3200, out_features=8, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=8, out_features=3200, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (o_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=3200, out_features=3200, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=3200, out_features=8, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=8, out_features=3200, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (rotary_emb): LlamaRotaryEmbedding()             )             (mlp): LlamaMLP(               (gate_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=3200, out_features=8640, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=3200, out_features=8, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=8, out_features=8640, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (up_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=3200, out_features=8640, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=3200, out_features=8, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=8, out_features=8640, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (down_proj): lora.Linear4bit(                 (base_layer): Linear4bit(in_features=8640, out_features=3200, bias=False)                 (lora_dropout): ModuleDict(                   (default): Dropout(p=0.05, inplace=False)                 )                 (lora_A): ModuleDict(                   (default): Linear(in_features=8640, out_features=8, bias=False)                 )                 (lora_B): ModuleDict(                   (default): Linear(in_features=8, out_features=3200, bias=False)                 )                 (lora_embedding_A): ParameterDict()                 (lora_embedding_B): ParameterDict()               )               (act_fn): SiLU()             )             (input_layernorm): LlamaRMSNorm()             (post_attention_layernorm): LlamaRMSNorm()           )         )         (norm): LlamaRMSNorm()       )       (lm_head): Linear(in_features=3200, out_features=32000, bias=False)     )   ) ), LlamaTokenizer(name_or_path='openlm-research/open_llama_3b_v2', vocab_size=32000, model_max_length=2048, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={       0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),      1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),        2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True), })

Upload to hugging face

huggingface-cli upload USERNAME/MY-MODELNAME qlora-out
Categories
Embedding

Visualise OpenAI Embeddings

from openai import OpenAI
import pandas as pd
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import numpy as np
client = OpenAI()

# Function to fetch embeddings from OpenAI API
def get_embeddings(text):
    response = client.embeddings.create(
        input=text,
        model="text-embedding-3-small"
    )
    return response.data[0].embedding

# Load the text content from your file and simulate a dataframe similar to the loaded CSV
with open('content.txt', 'r', encoding='utf-8') as file:
    text_content = file.read()
    texts = text_content.split('.')  # Split the file's content at each period

# Assuming non-empty text segments are needed
texts = [text.strip() for text in texts if text.strip() != '']

# Continue as before
df = pd.DataFrame({'text': texts})
df['embedding'] = df['text'].apply(lambda x: get_embeddings(x.strip()))

# Convert embeddings into a format suitable for TSNE
matrix = np.array(df['embedding'].tolist())
print(matrix.shape)

tsne = TSNE(n_components=2, perplexity=5, random_state=42, init='random', learning_rate=200)
vis_dims = tsne.fit_transform(matrix)
print(vis_dims)
print(vis_dims.shape)

# Plotting
plt.figure(figsize=(10, 10))
x = vis_dims[:, 0]
y = vis_dims[:, 1]
plt.scatter(x, y, alpha=0.5)

plt.title("Text Embeddings Visualized with t-SNE")
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.show()

content.txt

Text splitting in LangChain is a critical feature that facilitates the division of large texts into smaller, manageable segments. 
This capability is vital for improving comprehension and processing efficiency, especially in tasks that require detailed analysis or extraction of specific contexts.

ChatGPT, developed by OpenAI, represents a leap forward in natural language processing technologies.
It's a conversational AI model capable of understanding and generating human-like text, allowing for dynamic interactions and providing responses that are remarkably coherent and contextually relevant. ChatGPT has been integrated into a multitude of applications, revolutionizing the way we interact with machines and access information.

By leveraging LangChain for text splitting, users can efficiently navigate and analyze vast amounts of text data, facilitating a deeper understanding and more insightful conclusions.
Categories
Tools

Anthropic Tools

from tool_use_package.tools.base_tool import BaseTool
from tool_use_package.tool_user import ToolUser

# 1. Define the Tool
class AdditionTool(BaseTool):
    """Adds together two numbers, a + b."""

    def use_tool(self, a, b):
        print(f"Adding {a} and {b}")
        return a+b

# 2. Tool Description
addition_tool_name = "perform_addition"
addition_tool_description = """Add one number (a) to another (b), returning a+b.
Use this tool WHENEVER you need to perform any addition calculation, as it will ensure your answer is precise."""
addition_tool_parameters = [
    {"name": "a", "type": "float", "description": "The first number in your addition equation."},
    {"name": "b", "type": "float", "description": "The second number in your addition equation."}
]

addition_tool = AdditionTool(addition_tool_name, addition_tool_description, addition_tool_parameters)

# 3. Assign Tool and Ask Claude
tool_user = ToolUser([addition_tool])
messages = [
    {
        "role":"user", 
        "content":"""
            John has three apples. 
            Maggie has nine apples. 
            Tim has 4 bananas. 
            If John gives Maggie 1 apple and 
            Tim gives John 2 bananas, 
            how much total fruits does each person have?"""
    }
]
print(tool_user.use_tools(messages, execution_mode="automatic"))
Categories
Tools

Claude 3 Function Calling

pip install anthropic yfinance rich
export ANTHROPIC_API_KEY=xxxxxxxxxxxx
from anthropic import Anthropic
from rich import print
import re
import yfinance as yf

client = Anthropic()
MODEL_NAME = "claude-3-opus-20240229"

stock_message = {
    "role": "user", 
    "content": "Find the current price of Apple stock"
}

message = client.messages.create(
    model=MODEL_NAME,
    max_tokens=1024,
    messages=[stock_message]
).content[0].text
print("##### Before Function Calling ####\n\n" + message)

# 1. Define the stock price finding function
def get_stock_price(ticker_symbol):
    stock = yf.Ticker(ticker_symbol)
    hist = stock.history(period="1d")
    current_price = hist['Close'].iloc[0]
    return current_price

# 2. Construct Tool description
tool_description = """
<tool_description>
    <tool_name>get_stock_price</tool_name>
    <description>
        Function for finding the current price of a stock using its ticker symbol.
    </description>
    <parameters>
        <parameter>
            <name>ticker_symbol</name>
            <type>str</type>
            <description>Ticker symbol of the stock</description>
        </parameter>
    </parameters>
</tool_description>
"""

# 3. Ask Claude
system_prompt = f"""
In this environment you have access to a set of tools you can use to answer the 
user's question.

You may call them like this:
<function_calls>
    <invoke>
        <tool_name>$TOOL_NAME</tool_name>
        <parameters>
            <$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
            ...
        </parameters>
    </invoke>
</function_calls>

Here are the tools available:
<tools>{tool_description}</tools>
"""

function_calling_message = client.messages.create(
    model=MODEL_NAME,
    max_tokens=1024,
    messages=[stock_message],
    system=system_prompt
).content[0].text

# print(function_calling_message)

# 4. Extract parameters from response & call the function
def extract_between_tags(tag, string, strip=False):
    ext_list = re.findall(f"<{tag}>(.+?)</{tag}>", string, re.DOTALL)
    return [e.strip() for e in ext_list] if strip else ext_list

function_params = {"ticker_symbol": extract_between_tags("ticker_symbol", function_calling_message)[0]}
function_name = extract_between_tags("tool_name", function_calling_message)[0]
names_to_functions = {
    'get_stock_price': get_stock_price,
}
price = names_to_functions[function_name](**function_params)

# Construct function results
function_results = f"""
<function_results>
  <result>
    <tool_name>get_stock_price</tool_name>
    <stdout>{price}</stdout>
  </result>
</function_results>"""

# 5. Send all messages back to Claude
partial_assistant_message = function_calling_message + function_results

final_message = client.messages.create(
    model=MODEL_NAME,
    max_tokens=1024,
    messages=[
        stock_message,
        {
            "role": "assistant",
            "content": partial_assistant_message
        }
    ],
    system=system_prompt
).content[0].text
print("\n\n##### After Function Calling #####"+ final_message)

Output

❯ python app.py
##### Before Function Calling ####

To find the current price of Apple stock, I'll use the stock ticker symbol "AAPL" and check a 
financial website or stock market data provider. As of my knowledge cutoff date of September 
2021, the stock price will likely have changed since then. For the most up-to-date price, I 
recommend checking a financial website or app directly.


##### After Function Calling #####

The current price of Apple (AAPL) stock is $175.10.
Categories
AI Agents

CrewAI Meeting Prep

❯ python main.py
## Welcome to the Meeting Prep Crew
-------------------------------
What are the emails for the participants (other than you) in the meeting?
jeff@amazon.com
What is the context of the meeting?
About AI
What is your objective for this meeting?
Launching a new AI Product