Categories
AutoGen

AutoGen Llama 3 Text to SQL Evaluation

pip install pyautogen spider-env

Using Groq

export GROQ_API_KEY=xxxxxxxxxxxx
# 1. Configuration
import json
import os
from typing import Annotated, Dict
from spider_env import SpiderEnv
from autogen import ConversableAgent, UserProxyAgent, config_list_from_json

os.environ["AUTOGEN_USE_DOCKER"] = "False"
llm_config = {
    "cache_seed": 48,
    "config_list": [{
        "model": os.environ.get("OPENAI_MODEL_NAME", "llama3-70b-8192"), 
        "api_key": os.environ["GROQ_API_KEY"], 
        "base_url": os.environ.get("OPENAI_API_BASE", "https://api.groq.com/openai/v1")}
    ],
}

# 2. Import Data
gym = SpiderEnv()
observation, info = gym.reset()
question = observation["instruction"]
print(question)
schema = info["schema"]
print(schema)

# 3. Create Agents
def check_termination(msg: Dict):
    if "tool_responses" not in msg:
        return False
    json_str = msg["tool_responses"][0]["content"]
    obj = json.loads(json_str)
    return "error" not in obj or obj["error"] is None and obj["reward"] == 1

sql_writer = ConversableAgent(
    "sql_writer",
    llm_config=llm_config,
    system_message="You are good at writing SQL queries. Always respond with a function call to execute_sql().",
    is_termination_msg=check_termination,
)

user_proxy = UserProxyAgent(
    "user_proxy", 
    human_input_mode="NEVER", 
    max_consecutive_auto_reply=5
)

# 4. Create Tools / Function Calling
@sql_writer.register_for_llm(description="Function for executing SQL query and returning a response")
@user_proxy.register_for_execution()
def execute_sql(reflection: Annotated[str, "Think about what to do"], sql: Annotated[str, "SQL query"]) -> Annotated[Dict[str, str], "Dictionary with keys 'result' and 'error'"]:
    observation, reward, _, _, info = gym.step(sql)
    error = observation["feedback"]["error"]
    if not error and reward == 0:
        error = "The SQL query returned an incorrect result"
    if error:
        return { "error": error, "wrong_result": observation["feedback"]["result"], "correct_result": info["gold_result"], }
    else:
        return { "result": observation["feedback"]["result"], }


# 5. Initiate Chat
prompt_template = f"""Below is the schema for a SQL database:
{schema}
Generate a SQL query to answer the following question:
{question}
"""

user_proxy.initiate_chat(sql_writer, message=prompt_template)

Using Ollama

export OPENAI_API_BASE=http://localhost:11434/v1
export OPENAI_MODEL_NAME=llama3
Categories
AutoGen

AutoGen AI Research Agents

import autogen
import os

llm_config = {
    "cache_seed": 47,
    "temperature": 0,
    "config_list": [{"model": os.environ.get("OPENAI_MODEL_NAME", "gpt-4-turbo"), "api_key": os.environ["OPENAI_API_KEY"], "base_url": os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1")}],
    "timeout": 120,
}

user_proxy = autogen.UserProxyAgent(
    name="Admin",
    system_message="A human admin. Interact with the planner to discuss the plan. Plan execution needs to be approved by this admin.",
    code_execution_config=False,
)

engineer = autogen.AssistantAgent(
    name="Engineer",
    llm_config=llm_config,
    system_message="""Engineer. You follow an approved plan. You write python/shell code to solve tasks. Wrap the code in a code block that specifies the script type. The user can't modify your code. So do not suggest incomplete code which requires others to modify. Don't use a code block if it's not intended to be executed by the executor.
Don't include multiple code blocks in one response. Do not ask others to copy and paste the result. Check the execution result returned by the executor.
If the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can't be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.
""",
)

scientist = autogen.AssistantAgent(
    name="Scientist",
    llm_config=llm_config,
    system_message="""Scientist. You follow an approved plan. You are able to categorize papers after seeing their abstracts printed. You don't write code.""",
)

planner = autogen.AssistantAgent(
    name="Planner",
    system_message="""Planner. Suggest a plan. Revise the plan based on feedback from admin and critic, until admin approval.
The plan may involve an engineer who can write code. The executed by executor. 
Explain the plan first. Be clear which step is performed by an engineer, and which step is performed by a executor.
""",
    llm_config=llm_config,
)

executor = autogen.UserProxyAgent(
    name="Executor",
    system_message="Executor. Execute the code written by the engineer. If it fails try again with the fix. Finally report the result.",
    human_input_mode="NEVER",
    code_execution_config={
        "last_n_messages": 3,
        "work_dir": "paper",
        "use_docker": False,
    },  
)

critic = autogen.AssistantAgent(
    name="Critic",
    system_message="Critic. Double check plan, claims, code from other agents and provide feedback. Check whether the plan includes adding verifiable info such as source URL.",
    llm_config=llm_config,
)

groupchat = autogen.GroupChat(
    agents=[user_proxy, engineer, planner, scientist, executor, critic], messages=[], max_round=50
)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

response = user_proxy.initiate_chat(
    manager,
    message="""
find papers on LLM applications from arxiv in the last week, create a markdown table of different domains. Generate code and make the executor run it to find the information.
""",
)
print("Response:")
print(response)
summary = response.summary if hasattr(response, 'summary') else "No summary available."
print("Summary:")
print(summary)
Categories
LLM

Best NVIDIA GPUs for LLMs

Here is a strong case for investing in NVIDIA GPUs, along with a list of recommended devices:

  1. For training larger LLMs like LLaMA 65B or Bloom, a multi-GPU setup with each GPU having at least 40GB of VRAM is recommended, such as NVIDIA’s A100 or the new H100 GPUs.
  2. For inference tasks with larger models, GPUs with substantial VRAM, like the NVIDIA RTX 6000 Ada (48GB VRAM) or RTX 4090 (24GB VRAM), are ideal choices, ensuring smooth and efficient performance.
  3. Smaller and more efficient LLMs, such as Alpaca, BERT, and some variants of Falcon and Zephyr, can be run on GPUs with 8GB to 16GB of VRAM, making them accessible on devices like the NVIDIA RTX 3080 or RTX 4080.

Here is a list of recommended NVIDIA GPUs to consider for testing and running LLMs locally:

Here is the table with the recommended LLM names for each GPU category:

PurposeModelVRAMRecommended LLMs
For Training Large LLMsNVIDIA A10040GB/80GBLLaMA 65B, Bloom
NVIDIA H10080GBLLaMA 65B, Bloom
For Inference with Large LLMsNVIDIA RTX 6000 Ada48GBBloom
NVIDIA RTX 409024GBLLaMA 7B, Falcon, Zephyr
For Training and Inference with Smaller/Medium LLMsNVIDIA RTX 408016GBMistral 7B, MPT 7B, Alpaca, BERT
NVIDIA RTX 309024GBFalcon, Phi 2, Zephyr
NVIDIA RTX 308010GBAlpaca, BERT
Categories
Script

Removing Comments in VSCode Instantly

For a single file in VSCode:

  1. Open the file.
  2. Press Ctrl+F to open the Find widget.
  3. Click the .* icon to enable regex search.
  4. In the Find field, enter ^#.*\n?.
  5. Press Ctrl+Enter to select all occurrences.
  6. Press Delete to remove all selected comment lines.

For all files in the folder

  1. Open the command palette with Ctrl+Shift+P.
  2. Type “Replace” and select “Replace in Files”.
  3. In the “Find” field, enter ^#.*\n? and leave the “Replace” field empty.
  4. Click “Replace All” to remove all comment lines.
Categories
AutoGen

AutoGen Text to SQL Evaluation

pip install autogen spider-env
import json
import os
from typing import Annotated, Dict

from spider_env import SpiderEnv

from autogen import ConversableAgent, UserProxyAgent, config_list_from_json

gym = SpiderEnv()

observation, info = gym.reset()

question = observation["instruction"]
print(question)

schema = info["schema"]
print(schema)


os.environ["AUTOGEN_USE_DOCKER"] = "False"
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")


def check_termination(msg: Dict):
    if "tool_responses" not in msg:
        return False
    json_str = msg["tool_responses"][0]["content"]
    obj = json.loads(json_str)
    return "error" not in obj or obj["error"] is None and obj["reward"] == 1


sql_writer = ConversableAgent(
    "sql_writer",
    llm_config={"config_list": config_list},
    system_message="You are good at writing SQL queries. Always respond with a function call to execute_sql().",
    is_termination_msg=check_termination,
)
user_proxy = UserProxyAgent("user_proxy", human_input_mode="NEVER", max_consecutive_auto_reply=5)


@sql_writer.register_for_llm(description="Function for executing SQL query and returning a response")
@user_proxy.register_for_execution()
def execute_sql(
    reflection: Annotated[str, "Think about what to do"], sql: Annotated[str, "SQL query"]
) -> Annotated[Dict[str, str], "Dictionary with keys 'result' and 'error'"]:
    observation, reward, _, _, info = gym.step(sql)
    error = observation["feedback"]["error"]
    if not error and reward == 0:
        error = "The SQL query returned an incorrect result"
    if error:
        return {
            "error": error,
            "wrong_result": observation["feedback"]["result"],
            "correct_result": info["gold_result"],
        }
    else:
        return {
            "result": observation["feedback"]["result"],
        }


message = f"""Below is the schema for a SQL database:
{schema}
Generate a SQL query to answer the following question:
{question}
"""

user_proxy.initiate_chat(sql_writer, message=message)

OAI_CONFIG_LIST

[
    {
        "model": "gpt-3.5-turbo",
        "api_key": "sk-xxxxxxxxxxxxxxxx"
    }
]
Categories
RAG

Llama3 PDF RAG using PhiData

Groq PDF RAG using PhiData

~/phidata/cookbook/llms/groq/rag

1. Setup Environment and Install packages

git clone https://github.com/phidatahq/phidata
cd phidata/cookbook/llms/groq/rag
conda create -n phidata python=3.11 -y
conda activate phidata
pip install -r requirements.txt
export GROQ_API_KEY=xxxxxxxxxx

2. Start Database

Download: Docker Desktop https://www.docker.com/products/docker-desktop/

Test if Docker got downloaded

docker ps
docker run -d \
  -e POSTGRES_DB=ai \
  -e POSTGRES_USER=ai \
  -e POSTGRES_PASSWORD=ai \
  -e PGDATA=/var/lib/postgresql/data/pgdata \
  -v pgvolume:/var/lib/postgresql/data \
  -p 5532:5432 \
  --name pgvector \
  phidata/pgvector:16

Test if the got database started

docker ps

3. Start Application

streamlit run app.py

Ollama PDF RAG using Phi Data

~/phidata/cookbook/llms/ollama/rag
cd phidata/cookbook/llms/ollama/rag
pip install -r requirements.txt
ollama pull llama3
streamlit run app.py
Categories
AI Agents

CrewAI Groq Llama 3: Sports News Agency

Install

conda create -n crewai python=3.11 -y
conda activate crewai
pip install 'crewai[tools]' flask requests

Create DB Code (Optional)

Optional, if you already have a DB with data.

import sqlite3

def create_db():
    conn = sqlite3.connect('nba_games.db')
    c = conn.cursor()
    c.execute('''
        CREATE TABLE IF NOT EXISTS games (
            team_name TEXT,
            game_id TEXT,
            status TEXT,
            home_team TEXT,
            home_team_score INTEGER,
            away_team TEXT,
            away_team_score INTEGER
        )
    ''')
    games = [
        ("warriors", "401585601", "Final", "Los Angeles Lakers", 121, "Golden State Warriors", 128),
        ("lakers", "401585601", "Final", "Los Angeles Lakers", 121, "Golden State Warriors", 128),
        ("nuggets", "401585577", "Final", "Miami Heat", 88, "Denver Nuggets", 100),
        ("heat", "401585577", "Final", "Miami Heat", 88, "Denver Nuggets", 100)
    ]
    c.executemany('INSERT INTO games (team_name, game_id, status, home_team, home_team_score, away_team, away_team_score) VALUES (?, ?, ?, ?, ?, ?, ?)', games)
    conn.commit()
    conn.close()

create_db()

API Code

from flask import Flask, request, jsonify
import sqlite3

app = Flask(__name__)

def get_game_score(team_name):
    conn = sqlite3.connect('nba_games.db')
    c = conn.cursor()
    team_name = team_name.lower()
    c.execute('SELECT * FROM games WHERE team_name = ?', (team_name,))
    result = c.fetchone()
    conn.close()
    if result:
        keys = ["game_id", "status", "home_team", "home_team_score", "away_team", "away_team_score"]
        return dict(zip(keys, result[1:]))
    else:
        return {"team_name": team_name, "score": "unknown"}

@app.route('/')
def home():
    return jsonify({
        'message': 'Welcome to the NBA Scores API. Use /score?team=<team_name> to fetch game scores.'
    })

@app.route('/score', methods=['GET'])
def score():
    team_name = request.args.get('team', '')
    if not team_name:
        return jsonify({'error': 'Missing team name'}), 400
    score = get_game_score(team_name)
    return jsonify(score)

if __name__ == '__main__':
    app.run(debug=True)

CrewAI Code

import os
import json
import requests
from crewai import Agent, Task, Crew
from crewai_tools import BaseTool

# 1. Create Custom Tool to Get Game Score from API
from crewai_tools import tool
@tool("Game Score Tool")
def game_score_tool(team_name: str) -> str:
    """Get the current score for a given NBA game by querying the Flask API. It accepts team_name"""
    url = f'http://127.0.0.1:5000/score?team={team_name}'
    response = requests.get(url)
    if response.status_code == 200:
        return json.dumps(response.json(), indent=2)
    else:
        return json.dumps({"error": "API request failed", "status_code": response.status_code}, indent=2)

# 2. Create Agents
researcher = Agent(
    role='Researcher',
    goal='Gather and analyze information on NBA game scores',
    verbose=True,
    backstory=(
        "As a seasoned researcher, you have a keen eye for detail and a "
        "deep understanding of sports analytics. You're adept at sifting through "
        "scores to find the most relevant and accurate data."
    ),
    tools=[game_score_tool],
    allow_delegation=False
)

writer = Agent(
    role='Sports Journalist',
    goal='Compose an engaging news article based on NBA game scores',
    verbose=True,
    backstory=(
        "With a talent for storytelling, you convert statistical data and game outcomes "
        "into engaging sports narratives. Your articles are insightful, capturing the excitement "
        "of the games and providing a deep analysis for sports enthusiasts."
    ),
    allow_delegation=False
)

# 3. Define Tasks
research_task = Task(
    description="Investigate the scores for the Warriors game.",
    expected_output='A detailed report summarizing the data.',
    tools=[game_score_tool],
    agent=researcher,
)

writing_task = Task(
    description="Write a detailed news article about an NBA game, focusing stats.",
    expected_output='An engaging and informative article suitable for publication in sports media.',
    context=[research_task],
    agent=writer,
)

# 4. Run the Crew
crew = Crew(
    agents=[researcher, writer],
    tasks=[research_task, writing_task]
)

result = crew.kickoff()
print(result)
Categories
RAG

Llama 3 RAG using Ollama

Install

pip install ollama langchain beautifulsoup4 chromadb gradio
ollama pull llama3
ollama pull nomic-embed-text

Code

import ollama
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# 1. Load the data
loader = WebBaseLoader(
    web_paths=("http://localhost/llm.html",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# 2. Create Ollama embeddings and vector store
embeddings = OllamaEmbeddings(model="llama3")
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

# 3. Call Ollama Llama3 model
def ollama_llm(question, context):
    formatted_prompt = f"Question: {question}\n\nContext: {context}"
    response = ollama.chat(model='llama3', messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']

# 4. RAG Setup
retriever = vectorstore.as_retriever()
def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)
def rag_chain(question):
    retrieved_docs = retriever.invoke(question)
    formatted_context = combine_docs(retrieved_docs)
    return ollama_llm(question, formatted_context)

# 5. Use the RAG App
result = rag_chain("What is Task Decomposition?")
print(result)

UI

import gradio as gr
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
import ollama

# Function to load, split, and retrieve documents
def load_and_retrieve_docs(url):
    loader = WebBaseLoader(
        web_paths=(url,),
        bs_kwargs=dict() 
    )
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    embeddings = OllamaEmbeddings(model="nomic-embed-text")
    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
    return vectorstore.as_retriever()

# Function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Function that defines the RAG chain
def rag_chain(url, question):
    retriever = load_and_retrieve_docs(url)
    retrieved_docs = retriever.invoke(question)
    formatted_context = format_docs(retrieved_docs)
    formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
    response = ollama.chat(model='llama3', messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']

# Gradio interface
iface = gr.Interface(
    fn=rag_chain,
    inputs=["text", "text"],
    outputs="text",
    title="RAG Chain Question Answering",
    description="Enter a URL and a query to get answers from the RAG chain."
)

# Launch the app
iface.launch()
Categories
LLM Ollama

Llama 3 Run Locally: Ollama, LM Studio, Jan AI

Ollama

import ollama

stream = ollama.chat(
    model='llama3',
    messages=[{'role': 'user', 'content': 'Why is the sky blue?'}],
    stream=True,
)

for chunk in stream:
  print(chunk['message']['content'], end='', flush=True)

LM Studio

# Example: reuse your existing OpenAI setup
from openai import OpenAI

# Point to the local server
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")

completion = client.chat.completions.create(
  model="model-identifier",
  messages=[
    {"role": "system", "content": "Always answer in rhymes."},
    {"role": "user", "content": "Introduce yourself."}
  ],
  temperature=0.7,
)

print(completion.choices[0].message)

Jan AI

import requests
import json

# Endpoint URL
url = "http://localhost:1337/v1/chat/completions"

# Headers
headers = {
    "Content-Type": "application/json",
}

# Payload
payload = {
    "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Give me meal plan for today."},
    ],
    "model": "llama2-chat-7b-q4",
    "stream": False,
    "max_tokens": 2048,
    "stop": None,
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "temperature": 0.7,
    "top_p": 0.95
}

# Make the request
response = requests.post(url, headers=headers, data=json.dumps(payload))

# Extracting 'content' value from response
result = response.json()
content = result['choices'][0]['message']['content']
print(content)
Categories
Tools

Groq Tools Llama3: Create AI App with API Integration

pip install groq requests flask
export GROQ_API_KEY=xxxxxxxxxxx
from groq import Groq
import os
import json
import requests

client = Groq(api_key = os.getenv('GROQ_API_KEY'))
MODEL = 'llama3-70b-8192'

def get_game_score(team_name):
    """Get the current score for a given NBA game by querying the Flask API."""
    url = f'http://127.0.0.1:5000/score?team={team_name}'
    response = requests.get(url)
    if response.status_code == 200:
        return json.dumps(response.json())
    else:
        return json.dumps({"error": "API request failed", "status_code": response.status_code})

def run_conversation(user_prompt):
    # Step 1: send the conversation and available functions to the model
    messages=[
        {
            "role": "system",
            "content": "You are a function calling LLM that uses the data extracted from the get_game_score function to answer questions around NBA game scores. Include the team and their opponent in your response."
        },
        {
            "role": "user",
            "content": user_prompt,
        }
    ]
    tools = [
        {
            "type": "function",
            "function": {
                "name": "get_game_score",
                "description": "Get the score for a given NBA game",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "team_name": {
                            "type": "string",
                            "description": "The name of the NBA team (e.g. 'Golden State Warriors')",
                        }
                    },
                    "required": ["team_name"],
                },
            },
        }
    ]
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        tools=tools,
        tool_choice="auto",  
        max_tokens=4096
    )

    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls
    # Step 2: check if the model wanted to call a function
    if tool_calls:
        # Step 3: call the function
        # Note: the JSON response may not always be valid; be sure to handle errors
        available_functions = {
            "get_game_score": get_game_score,
        }  # only one function in this example, but you can have multiple
        messages.append(response_message)  # extend conversation with assistant's reply
        # Step 4: send the info for each function call and function response to the model
        for tool_call in tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)
            function_response = function_to_call(
                team_name=function_args.get("team_name")
            )
            messages.append(
                {
                    "tool_call_id": tool_call.id,
                    "role": "tool",
                    "name": function_name,
                    "content": function_response,
                }
            )  # extend conversation with function response
        second_response = client.chat.completions.create(
            model=MODEL,
            messages=messages
        )  # get a new response from the model where it can see the function response
        return second_response.choices[0].message.content
    
user_prompt = "What was the score of the Warriors game?"
print(run_conversation(user_prompt))

API

from flask import Flask, request, jsonify
import json

app = Flask(__name__)

def get_game_score(team_name):
    """Get the current score for a given NBA game"""
    team_name = team_name.lower()
    if "warriors" in team_name:
        return {"game_id": "401585601", "status": 'Final', "home_team": "Los Angeles Lakers", "home_team_score": 121, "away_team": "Golden State Warriors", "away_team_score": 128}
    elif "lakers" in team_name:
        return {"game_id": "401585601", "status": 'Final', "home_team": "Los Angeles Lakers", "home_team_score": 121, "away_team": "Golden State Warriors", "away_team_score": 128}
    elif "nuggets" in team_name:
        return {"game_id": "401585577", "status": 'Final', "home_team": "Miami Heat", "home_team_score": 88, "away_team": "Denver Nuggets", "away_team_score": 100}
    elif "heat" in team_name:
        return {"game_id": "401585577", "status": 'Final', "home_team": "Miami Heat", "home_team_score": 88, "away_team": "Denver Nuggets", "away_team_score": 100}
    else:
        return {"team_name": team_name, "score": "unknown"}

@app.route('/score', methods=['GET'])
def score():
    team_name = request.args.get('team', '')
    if not team_name:
        return jsonify({'error': 'Missing team name'}), 400
    score = get_game_score(team_name)
    return jsonify(score)

if __name__ == '__main__':
    app.run(debug=True)
curl http://127.0.0.1:5000/score?team=lakers