Mervin Praison

Tools

Anthropic Tools: Stock Price Integration

Post author By praison
Post date April 6, 2024

import anthropic
import yfinance as yf

client = anthropic.Anthropic()

# 1. Get Stock Price function / Tool
def get_stock_price(ticker_symbol):
    stock = yf.Ticker(ticker_symbol)
    hist = stock.history(period="1d")
    current_price = hist['Close'].iloc[0]
    return str(current_price)

# 2. Tool Definition 
tools = [
    {
        "name": "get_stock_price",
        "description": "Get the current stock price for a given ticker symbol",
        "input_schema": {
            "type": "object",
            "properties": {
                "ticker_symbol": {
                    "type": "string",
                    "description": "The stock ticker symbol, e.g., AAPL for Apple Inc."
                }
            },
            "required": ["ticker_symbol"]
        }
    }
]

# 3. Ask Claude for the Stock Price
initial_response = client.beta.tools.messages.create(
    model="claude-3-sonnet-20240229",
    max_tokens=1024,
    tools=tools,
    messages=[{"role": "user", "content": "What is the Stock price of Apple?"}]
)

print(f"Stop Reason: {initial_response.stop_reason}")
print(f"Initial Response: {initial_response.content}")

# 4. Parse the Tool Name and Run the Get Stock Price Tool
def process_tool_call(tool_name, tool_input):
    if tool_name == "get_stock_price":
        return get_stock_price(tool_input["ticker_symbol"])

if initial_response.stop_reason == "tool_use":
    tool_use = next(block for block in initial_response.content if block.type == "tool_use")
    tool_name = tool_use.name
    tool_input = tool_use.input

    print(f"\nTool Used: {tool_name}") # get_stock_price
    print(f"Tool Input: {tool_input}") # {'ticker_symbol': 'Apple'}

    tool_result = process_tool_call(tool_name, tool_input)

    print(f"Tool Result: {tool_result}")

    # 5. Send the Tool Result back to the Assistant
    response = client.beta.tools.messages.create(
        model="claude-3-sonnet-20240229",
        max_tokens=4096,
        messages=[
            {"role": "user", "content": "What is the Stock price of Apple?"},
            {"role": "assistant", "content": initial_response.content},
            {
                "role": "user",
                "content": [
                    {
                        "type": "tool_result",
                        "tool_use_id": tool_use.id,
                        "content": tool_result,
                    }
                ],
            },
        ],
        tools=tools,
    )
else:
    response = initial_response

final_response = next(
    (block.text for block in response.content if hasattr(block, "text")),
    None,
)
print(f"\nFinal Response: {final_response}")

❯ python app.py
Stop Reason: tool_use
Initial Response: [TextBlock(text="Okay, let's get the current stock price for Apple Inc. (AAPL).", type='text'), ToolUseBlock(id='toolu_012KPAkjCHG8GUYC3YdEJTTo', input={'ticker_symbol': 'AAPL'}, name='get_stock_price', type='tool_use')]

Tool Used: get_stock_price
Tool Input: {'ticker_symbol': 'AAPL'}
Tool Result: 169.5800018310547

Final Response: The current stock price of Apple Inc. (AAPL) is $169.58.

UI

import anthropic
import yfinance as yf
import gradio as gr

client = anthropic.Anthropic()

# 1. Get Stock Price function / Tool
def get_stock_price(ticker_symbol):
    stock = yf.Ticker(ticker_symbol)
    hist = stock.history(period="1d")
    current_price = hist['Close'].iloc[0]
    return str(current_price)

# 2. Tool Definition
tools = [
    {
        "name": "get_stock_price",
        "description": "Get the current stock price for a given ticker symbol",
        "input_schema": {
            "type": "object",
            "properties": {
                "ticker_symbol": {
                    "type": "string",
                    "description": "The stock ticker symbol, e.g., AAPL for Apple Inc."
                }
            },
            "required": ["ticker_symbol"]
        }
    }
]

def process_tool_call(tool_name, tool_input):
    if tool_name == "get_stock_price":
        return get_stock_price(tool_input["ticker_symbol"])


def run_tool_interaction(user_message, model="claude-3-sonnet-20240229"):
    initial_response = client.beta.tools.messages.create(
        model=model,
        max_tokens=1024,
        tools=tools,
        messages=[{"role": "user", "content": user_message}]
    )

    print(f"Stop Reason: {initial_response.stop_reason}")
    print(f"Initial Response: {initial_response.content}")

    # Process tool call if indicated by stop reason
    if initial_response.stop_reason == "tool_use":
        tool_use = next(block for block in initial_response.content if block.type == "tool_use")
        tool_name = tool_use.name
        tool_input = tool_use.input

        print(f"\nTool Used: {tool_name}")  # Example: get_stock_price
        print(f"Tool Input: {tool_input}")  # Example: {'ticker_symbol': 'AAPL'}

        tool_result = process_tool_call(tool_name, tool_input)
        print(f"Tool Result: {tool_result}")

        # Send the tool result back to the assistant
        response = client.beta.tools.messages.create(
            model=model,
            max_tokens=4096,
            messages=[
                {"role": "user", "content": user_message},
                {"role": "assistant", "content": initial_response.content},
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "tool_result",
                            "tool_use_id": tool_use.id,
                            "content": tool_result,
                        }
                    ],
                },
            ],
            tools=tools,
        )
    else:
        response = initial_response

    # Extract and return the final response
    final_response = next(
        (block.text for block in response.content if hasattr(block, "text")),
        None,
    )
    return final_response

iface = gr.Interface(
    fn=run_tool_interaction,
    inputs=[
        gr.Textbox(label="User Message")
    ],
    outputs=gr.Textbox(label="Response"),
    title="Anthropic Tools",
    description="Get Stock Price"
)

iface.launch()

AI Agents

Praison AI Tools

Post author By praison
Post date April 4, 2024

CodeDocsSearchTool
CSVSearchTool
DirectorySearchTool
DirectoryReadTool
DOCXSearchTool
FileReadTool
GithubSearchTool
SerperDevTool
TXTSearchTool
JSONSearchTool
MDXSearchTool
PDFSearchTool
PGSearchTool
RagTool
ScrapeElementFromWebsiteTool
ScrapeWebsiteTool
SeleniumScrapingTool
WebsiteSearchTool
XMLSearchTool
YoutubeChannelSearchTool
YoutubeVideoSearchTool

AutoGen

AutoGen Tools

Post author By praison
Post date April 1, 2024

from typing import Annotated, Literal
import os
from autogen import ConversableAgent

Operator = Literal["+", "-", "*", "/"]

def calculator(a: int, b: int, operator: Annotated[Operator, "operator"]) -> int:
    if operator == "+":
        return a + b
    elif operator == "-":
        return a - b
    elif operator == "*":
        return a * b
    elif operator == "/":
        return int(a / b)
    else:
        raise ValueError("Invalid operator")

assistant = ConversableAgent(
    name="Assistant",
    system_message="You are a helpful AI assistant. "
    "You can help with simple calculations. "
    "Return 'TERMINATE' when the task is done.",
    llm_config={"config_list": [{"model": "gpt-3.5-turbo", "api_key": os.environ["OPENAI_API_KEY"]}]},
)

user_proxy = ConversableAgent(
    name="User",
    llm_config=False,
    is_termination_msg=lambda msg: msg.get("content") is not None and "TERMINATE" in msg["content"],
    human_input_mode="NEVER",
)

from autogen import register_function
# Register the calculator function to the agent and user proxy to add a calculator tool.
register_function(
    calculator,
    caller=assistant,  # The assistant agent can suggest calls to the calculator.
    executor=user_proxy,  # The user proxy agent can execute the calculator calls.
    name="calculator",  # By default, the function name is used as the tool name.
    description="A simple calculator",  # A description of the tool.
)

chat_result = user_proxy.initiate_chat(assistant, message="What is (44232 + 13312 / (232 - 32)) * 5?")

Praison AI

Praison AI Agents.yml

Post author By praison
Post date March 25, 2024

framework: crewai
topic: Artificial Intelligence
roles:
  movie_concept_creator:
    backstory: 'Creative thinker with a deep understanding of cinematic storytelling,
      capable of using AI-generated storylines to create unique and compelling movie
      ideas.'
    goal: Generate engaging movie concepts using AI storylines
    role: Movie Concept Creator
    tasks:
      movie_concept_development:
        description: 'Develop movie concepts from AI-generated storylines, ensuring
          they are engaging and have strong narrative arcs.'
        expected_output: 'Well-structured movie concept document with character
          bios, settings, and plot outlines.'
  screenwriter:
    backstory: 'Expert in writing engaging dialogue and script structure, able to
      turn movie concepts into production-ready scripts.'
    goal: Write compelling scripts based on movie concepts
    role: Screenwriter
    tasks:
      scriptwriting_task:
        description: 'Turn movie concepts into polished scripts with well-developed
          characters, strong dialogue, and effective scene transitions.'
        expected_output: 'Production-ready script with a beginning, middle, and
          end, along with character development and engaging dialogues.'
  editor:
    backstory: 'Adept at identifying inconsistencies, improving language usage,
      and maintaining the overall flow of the script.'
    goal: Refine the scripts and ensure continuity of the movie storyline
    role: Editor
    tasks:
      editing_task:
        description: 'Review, edit, and refine the scripts to ensure they are cohesive
          and follow a well-structured narrative.'
        expected_output: 'A polished final draft of the script with no inconsistencies,
          strong character development, and effective dialogue.'
dependencies: []

OpenAI

OpenAI API Key, Base URL and Model Hard Coded

Post author By praison
Post date March 25, 2024

from openai import OpenAI
client = OpenAI(api_key="sk-xxxxx", base_url="https://api.openai.com/v1")

response = client.chat.completions.create(
  model="gpt-3.5-turbo-0125",
  messages=[
    {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
    {"role": "user", "content": "Who won the world series in 2020?"}
  ]
)
print(response.choices[0].message.content)

Ollama

Ollama Json Response Code

Post author By praison
Post date March 25, 2024

from openai import OpenAI
from pydantic import BaseModel, Field
# client = OpenAI(api_key="xxx", base_url="http://localhost:11434/v1")

import instructor

class WinnerData(BaseModel):
    winner: str = Field(..., description="Winner")

class WinnerInfo(BaseModel):
    data: WinnerData

# enables `response_model` in create call
client = instructor.patch(
    OpenAI(
        base_url="http://localhost:11434/v1",
        api_key="ollama",
    ),
    mode=instructor.Mode.JSON,
)

response = client.chat.completions.create(
  model="mistral",
  response_model=WinnerInfo,
  messages=[
    {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
    {"role": "user", "content": "Who won the world series in 2020?"}
  ]
)
print(response.data.winner)
print(response.model_dump_json(indent=2))

Python

Identify Error causing file

Post author By praison
Post date March 25, 2024

import tracemalloc
import logging
import warnings
import sys
import traceback

# Enable Tracemalloc
tracemalloc.start()

# Configure Logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(filename)s - %(funcName)s - %(lineno)d - %(message)s')

# Function to Log Warnings with Traceback
def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
    log = file if hasattr(file, 'write') else sys.stderr
    traceback.print_stack(file=log)
    log.write(warnings.formatwarning(message, category, filename, lineno, line))

# Override showwarning
warnings.showwarning = warn_with_traceback

RAG

AI Research RAG Using ChromaDB Ollama

Post author By praison
Post date March 23, 2024

import gradio as gr
import os
import time
import arxiv
from langchain_community.vectorstores import Qdrant
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.embeddings.ollama import OllamaEmbeddings

def init_or_load_chroma(path="./tmp/local_chroma", collection_name="llm_dataset_survey_papers"):
    try:
        chroma_instance = Chroma(
            persist_directory=path,
        )
        print("Existing embeddings loaded.")
    except Exception as e:
        print("No existing embeddings found, starting new Chroma instance.", e)
        chroma_instance = None

    return chroma_instance, chroma_instance if chroma_instance else None

# Attempt to load existing Chroma instance
chroma, retriever = init_or_load_chroma()

def process_papers(query):
    dirpath = "llm_dataset_survey_papers"
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    
    client = arxiv.Client()
    search = arxiv.Search(
        query=query,
        max_results=10,
        sort_order=arxiv.SortOrder.Descending
    )
    
    for result in client.results(search):
        while True:
            try:
                result.download_pdf(dirpath=dirpath)
                print(result)
                print(f"-> Paper id {result.get_short_id()} with title '{result.title}' is downloaded.")
                break
            except (FileNotFoundError, ConnectionResetError) as e:
                print("Error occurred:", e)
                time.sleep(5)
    
    papers = []
    loader = DirectoryLoader(dirpath, glob="./*.pdf", loader_cls=PyPDFLoader)
    try:
        papers = loader.load()
    except Exception as e:
        print(f"Error loading file: {e}")
    full_text = ''
    for paper in papers:
        full_text += paper.page_content
    
    full_text = " ".join(line for line in full_text.splitlines() if line)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    paper_chunks = text_splitter.create_documents([full_text])
    
    global qdrant, retriever
    qdrant = Qdrant.from_documents(
        documents=paper_chunks,
        embedding=OllamaEmbeddings(model='nomic-embed-text'),
        path="./tmp/local_qdrant",
        collection_name="llm_dataset_survey_papers",
    )
    retriever = qdrant.as_retriever()
    
    return "Papers processed and saved to embeddings database."

def perform_query(question_text):
    global retriever
    if not retriever:
        return "Error: No papers processed. Please process papers first."
    
    template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
    prompt = ChatPromptTemplate.from_template(template)
    
    ollama_llm = "llama2:7b-chat"
    model = ChatOllama(model=ollama_llm)
    
    chain = (
        RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
        | prompt
        | model
        | StrOutputParser()
    )
    
    class Question(BaseModel):
        __root__: str
    
    chain = chain.with_types(input_type=Question)
    result = chain.invoke(Question(__root__=question_text))
    return result

with gr.Blocks() as demo:
    with gr.Tab("Process Papers"):
        query_input = gr.Text(label="Search Query")
        process_button = gr.Button("Process Papers")
        process_output = gr.Text(label="Process Output")
        process_button.click(process_papers, inputs=query_input, outputs=process_output)

    with gr.Tab("Query Retriever"):
        question_input = gr.Text(label="Question")
        query_button = gr.Button("Query")
        query_output = gr.Text(label="Query Output")
        query_button.click(perform_query, inputs=question_input, outputs=query_output)

demo.launch()

Errors

Qdrant AI Research

Post author By praison
Post date March 22, 2024

Not Working

import gradio as gr
import os
import time
import arxiv
from langchain_community.vectorstores import Qdrant
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import BaseModel
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnableParallel, RunnablePassthrough
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.embeddings.ollama import OllamaEmbeddings

# Assuming Qdrant and embeddings setup done globally for simplicity
def init_or_load_qdrant(path="./tmp/local_qdrant", collection_name="arxiv_papers"):
    try:
        qdrant_instance = Qdrant.load(
            path=path,
            collection_name=collection_name,
        )
        print("Existing embeddings loaded.")
    except Exception as e:
        print("No existing embeddings found, starting new Qdrant instance.", e)
        qdrant_instance = None

    return qdrant_instance, qdrant_instance.as_retriever() if qdrant_instance else None

# Attempt to load existing Qdrant instance and retriever
qdrant, retriever = init_or_load_qdrant()

def process_papers(query):
    dirpath = "arxiv_papers"
    if not os.path.exists(dirpath):
        os.makedirs(dirpath)
    
    client = arxiv.Client()
    search = arxiv.Search(
        query=query,
        max_results=5,
        sort_order=arxiv.SortOrder.Descending
    )
    
    for result in client.results(search):
        while True:
            try:
                result.download_pdf(dirpath=dirpath)
                print(result)
                print(f"-> Paper id {result.get_short_id()} with title '{result.title}' is downloaded.")
                break
            except (FileNotFoundError, ConnectionResetError) as e:
                print("Error occurred:", e)
                time.sleep(5)
    
    papers = []
    loader = DirectoryLoader(dirpath, glob="./*.pdf", loader_cls=PyPDFLoader)
    try:
        papers = loader.load()
    except Exception as e:
        print(f"Error loading file: {e}")
    full_text = ''
    for paper in papers:
        full_text += paper.page_content
    
    full_text = " ".join(line for line in full_text.splitlines() if line)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    paper_chunks = text_splitter.create_documents([full_text])
    
    global qdrant, retriever
    qdrant = Qdrant.from_documents(
        documents=paper_chunks,
        embedding=OllamaEmbeddings(model='nomic-embed-text'),
        path="./tmp/local_qdrant",
        collection_name="arxiv_papers",
    )
    retriever = qdrant.as_retriever()
    
    return "Papers processed and saved to embeddings database."

def perform_query(question_text):
    global retriever
    if not retriever:
        return "Error: No papers processed. Please process papers first."
    
    template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
    prompt = ChatPromptTemplate.from_template(template)
    
    ollama_llm = "llama2:7b-chat"
    model = ChatOllama(model=ollama_llm)
    
    chain = (
        RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
        | prompt
        | model
        | StrOutputParser()
    )
    
    class Question(BaseModel):
        __root__: str
    
    chain = chain.with_types(input_type=Question)
    result = chain.invoke(Question(__root__=question_text))
    return result

with gr.Blocks() as demo:
    with gr.Tab("Process Papers"):
        query_input = gr.Text(label="Search Query")
        process_button = gr.Button("Process Papers")
        process_output = gr.Text(label="Process Output")
        process_button.click(process_papers, inputs=query_input, outputs=process_output)

    with gr.Tab("Query Retriever"):
        question_input = gr.Text(label="Question")
        query_button = gr.Button("Query")
        query_output = gr.Text(label="Query Output")
        query_button.click(perform_query, inputs=question_input, outputs=query_output)

demo.launch()

Errors

Fixing UTF-8 Encoding Surrogate Error in Python Embeddings

Post author By praison
Post date March 22, 2024

Error message

UnicodeEncodeError: 'utf-8' codec can't encode characters in position 100-103: surrogates not allowed

langchain_community/embeddings/gpt4all.py

from typing import Any, Dict, List

from langchain_core.embeddings import Embeddings
from langchain_core.pydantic_v1 import BaseModel, root_validator


class GPT4AllEmbeddings(BaseModel, Embeddings):
    """GPT4All embedding models.

    To use, you should have the gpt4all python package installed

    Example:
        .. code-block:: python

            from langchain_community.embeddings import GPT4AllEmbeddings

            embeddings = GPT4AllEmbeddings()
    """

    client: Any  #: :meta private:

    @root_validator()
    def validate_environment(cls, values: Dict) -> Dict:
        """Validate that GPT4All library is installed."""

        try:
            from gpt4all import Embed4All

            values["client"] = Embed4All()
        except ImportError:
            raise ImportError(
                "Could not import gpt4all library. "
                "Please install the gpt4all library to "
                "use this embedding model: pip install gpt4all"
            )
        return values
    def sanitize_text(self, text):
        return text.encode('utf-8', 'ignore').decode('utf-8')

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed a list of documents using GPT4All.

        Args:
            texts: The list of texts to embed.

        Returns:
            List of embeddings, one for each text.
        """
        texts_sanitized = [self.sanitize_text(text) for text in texts]
        embeddings = [self.client.embed(text) for text in texts_sanitized]
        return [list(map(float, e)) for e in embeddings]

    def embed_query(self, text: str) -> List[float]:
        """Embed a query using GPT4All.

        Args:
            text: The text to embed.

        Returns:
            Embeddings for the text.
        """
        return self.embed_documents([text])[0]