import os import azure.identity import openai from dotenv import load_dotenv # Setup the OpenAI client to use either Azure, OpenAI.com, or Ollama API load_dotenv(override=True) API_HOST = os.getenv("API_HOST", "github") if API_HOST == "azure": token_provider = azure.identity.get_bearer_token_provider( azure.identity.DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" ) client = openai.OpenAI( base_url=os.environ["AZURE_OPENAI_ENDPOINT"], api_key=token_provider, ) MODEL_NAME = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"] elif API_HOST == "ollama": client = openai.OpenAI(base_url=os.environ["OLLAMA_ENDPOINT"], api_key="nokeyneeded") MODEL_NAME = os.environ["OLLAMA_MODEL"] elif API_HOST == "github": client = openai.OpenAI(base_url="https://models.github.ai/inference", api_key=os.environ["GITHUB_TOKEN"]) MODEL_NAME = os.getenv("GITHUB_MODEL", "openai/gpt-4o") else: client = openai.OpenAI(api_key=os.environ["OPENAI_KEY"]) MODEL_NAME = os.environ["OPENAI_MODEL"] messages = [ {"role": "system", "content": "I am a large language model."}, ] while True: question = input("\nYour question: ") print("Sending question...") messages.append({"role": "user", "content": question}) response = client.chat.completions.create( model=MODEL_NAME, messages=messages, temperature=0.7, stream=True, ) print("\nAnswer: ") bot_response = "" for event in response: if event.choices and event.choices[0].delta.content: content = event.choices[0].delta.content print(content, end="", flush=True) bot_response += content print("\n") messages.append({"role": "assistant", "content": bot_response})